19 lines
664 B
Python
19 lines
664 B
Python
import pyspark.sql.types as T
|
|
|
|
INPUT_DATA_PATH = '../../resources/rides.csv'
|
|
BOOTSTRAP_SERVERS = 'localhost:9092'
|
|
|
|
TOPIC_WINDOWED_VENDOR_ID_COUNT = 'vendor_counts_windowed'
|
|
|
|
PRODUCE_TOPIC_RIDES_CSV = CONSUME_TOPIC_RIDES_CSV = 'rides_csv'
|
|
|
|
RIDE_SCHEMA = T.StructType(
|
|
[T.StructField("vendor_id", T.IntegerType()),
|
|
T.StructField('tpep_pickup_datetime', T.TimestampType()),
|
|
T.StructField('tpep_dropoff_datetime', T.TimestampType()),
|
|
T.StructField("passenger_count", T.IntegerType()),
|
|
T.StructField("trip_distance", T.FloatType()),
|
|
T.StructField("payment_type", T.IntegerType()),
|
|
T.StructField("total_amount", T.FloatType()),
|
|
])
|