Compare commits
1 Commits
homework-s
...
de-zoomcam
| Author | SHA1 | Date | |
|---|---|---|---|
| 87f33b1b85 |
@ -1,5 +0,0 @@
|
||||
|
||||
target/
|
||||
dbt_modules/
|
||||
logs/
|
||||
dbt_packages/
|
||||
@ -1,38 +0,0 @@
|
||||
Welcome to your new dbt project!
|
||||
|
||||
### How to run this project
|
||||
### About the project
|
||||
This project is based in [dbt starter project](https://github.com/dbt-labs/dbt-starter-project) (generated by running `dbt init`)
|
||||
Try running the following commands:
|
||||
- dbt run
|
||||
- dbt test
|
||||
|
||||
A project includes the following files:
|
||||
- dbt_project.yml: file used to configure the dbt project. If you are using dbt locally, make sure the profile here matches the one setup during installation in ~/.dbt/profiles.yml
|
||||
- *.yml files under folders models, data, macros: documentation files
|
||||
- csv files in the data folder: these will be our sources, files described above
|
||||
- Files inside folder models: The sql files contain the scripts to run our models, this will cover staging, core and a datamarts models. At the end, these models will follow this structure:
|
||||
|
||||

|
||||
|
||||
|
||||
#### Workflow
|
||||

|
||||
|
||||
#### Execution
|
||||
After having installed the required tools and cloning this repo, execute the following commnads:
|
||||
|
||||
1. Change into the project's directory from the command line: `$ cd [..]/taxi_rides_ny`
|
||||
2. Load the CSVs into the database. This materializes the CSVs as tables in your target schema: `$ dbt seed`
|
||||
3. Run the models: `$ dbt run`
|
||||
4. Test your data: `$ dbt test`
|
||||
_Alternative: use `$ dbt build` to execute with one command the 3 steps above together_
|
||||
5. Generate documentation for the project: `$ dbt docs generate`
|
||||
6. View the documentation for the project, this step should open the documentation page on a webserver, but it can also be accessed from http://localhost:8080 : `$ dbt docs serve`
|
||||
|
||||
### dbt resources:
|
||||
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
|
||||
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
|
||||
- Join the [chat](http://slack.getdbt.com/) on Slack for live discussions and support
|
||||
- Find [dbt events](https://events.getdbt.com) near you
|
||||
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
|
||||
@ -1,46 +0,0 @@
|
||||
|
||||
CREATE TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata` as
|
||||
SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_green_trips_2019`;
|
||||
|
||||
|
||||
CREATE TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata` as
|
||||
SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2019`;
|
||||
|
||||
insert into `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_green_trips_2020` ;
|
||||
|
||||
|
||||
insert into `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2020`;
|
||||
|
||||
-- Fixes yellow table schema
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN vendor_id TO VendorID;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN pickup_datetime TO tpep_pickup_datetime;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN dropoff_datetime TO tpep_dropoff_datetime;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN rate_code TO RatecodeID;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN imp_surcharge TO improvement_surcharge;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN pickup_location_id TO PULocationID;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.yellow_tripdata`
|
||||
RENAME COLUMN dropoff_location_id TO DOLocationID;
|
||||
|
||||
-- Fixes green table schema
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN vendor_id TO VendorID;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN pickup_datetime TO lpep_pickup_datetime;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN dropoff_datetime TO lpep_dropoff_datetime;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN rate_code TO RatecodeID;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN imp_surcharge TO improvement_surcharge;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN pickup_location_id TO PULocationID;
|
||||
ALTER TABLE `taxi-rides-ny-339813-412521.trips_data_all.green_tripdata`
|
||||
RENAME COLUMN dropoff_location_id TO DOLocationID;
|
||||
@ -1,10 +0,0 @@
|
||||
version: 2
|
||||
|
||||
seeds:
|
||||
- name: taxi_zone_lookup
|
||||
description: >
|
||||
Taxi Zones roughly based on NYC Department of City Planning's Neighborhood
|
||||
Tabulation Areas (NTAs) and are meant to approximate neighborhoods, so you can see which
|
||||
neighborhood a passenger was picked up in, and which neighborhood they were dropped off in.
|
||||
Includes associated service_zone (EWR, Boro Zone, Yellow Zone)
|
||||
|
||||
@ -1,266 +0,0 @@
|
||||
"locationid","borough","zone","service_zone"
|
||||
1,"EWR","Newark Airport","EWR"
|
||||
2,"Queens","Jamaica Bay","Boro Zone"
|
||||
3,"Bronx","Allerton/Pelham Gardens","Boro Zone"
|
||||
4,"Manhattan","Alphabet City","Yellow Zone"
|
||||
5,"Staten Island","Arden Heights","Boro Zone"
|
||||
6,"Staten Island","Arrochar/Fort Wadsworth","Boro Zone"
|
||||
7,"Queens","Astoria","Boro Zone"
|
||||
8,"Queens","Astoria Park","Boro Zone"
|
||||
9,"Queens","Auburndale","Boro Zone"
|
||||
10,"Queens","Baisley Park","Boro Zone"
|
||||
11,"Brooklyn","Bath Beach","Boro Zone"
|
||||
12,"Manhattan","Battery Park","Yellow Zone"
|
||||
13,"Manhattan","Battery Park City","Yellow Zone"
|
||||
14,"Brooklyn","Bay Ridge","Boro Zone"
|
||||
15,"Queens","Bay Terrace/Fort Totten","Boro Zone"
|
||||
16,"Queens","Bayside","Boro Zone"
|
||||
17,"Brooklyn","Bedford","Boro Zone"
|
||||
18,"Bronx","Bedford Park","Boro Zone"
|
||||
19,"Queens","Bellerose","Boro Zone"
|
||||
20,"Bronx","Belmont","Boro Zone"
|
||||
21,"Brooklyn","Bensonhurst East","Boro Zone"
|
||||
22,"Brooklyn","Bensonhurst West","Boro Zone"
|
||||
23,"Staten Island","Bloomfield/Emerson Hill","Boro Zone"
|
||||
24,"Manhattan","Bloomingdale","Yellow Zone"
|
||||
25,"Brooklyn","Boerum Hill","Boro Zone"
|
||||
26,"Brooklyn","Borough Park","Boro Zone"
|
||||
27,"Queens","Breezy Point/Fort Tilden/Riis Beach","Boro Zone"
|
||||
28,"Queens","Briarwood/Jamaica Hills","Boro Zone"
|
||||
29,"Brooklyn","Brighton Beach","Boro Zone"
|
||||
30,"Queens","Broad Channel","Boro Zone"
|
||||
31,"Bronx","Bronx Park","Boro Zone"
|
||||
32,"Bronx","Bronxdale","Boro Zone"
|
||||
33,"Brooklyn","Brooklyn Heights","Boro Zone"
|
||||
34,"Brooklyn","Brooklyn Navy Yard","Boro Zone"
|
||||
35,"Brooklyn","Brownsville","Boro Zone"
|
||||
36,"Brooklyn","Bushwick North","Boro Zone"
|
||||
37,"Brooklyn","Bushwick South","Boro Zone"
|
||||
38,"Queens","Cambria Heights","Boro Zone"
|
||||
39,"Brooklyn","Canarsie","Boro Zone"
|
||||
40,"Brooklyn","Carroll Gardens","Boro Zone"
|
||||
41,"Manhattan","Central Harlem","Boro Zone"
|
||||
42,"Manhattan","Central Harlem North","Boro Zone"
|
||||
43,"Manhattan","Central Park","Yellow Zone"
|
||||
44,"Staten Island","Charleston/Tottenville","Boro Zone"
|
||||
45,"Manhattan","Chinatown","Yellow Zone"
|
||||
46,"Bronx","City Island","Boro Zone"
|
||||
47,"Bronx","Claremont/Bathgate","Boro Zone"
|
||||
48,"Manhattan","Clinton East","Yellow Zone"
|
||||
49,"Brooklyn","Clinton Hill","Boro Zone"
|
||||
50,"Manhattan","Clinton West","Yellow Zone"
|
||||
51,"Bronx","Co-Op City","Boro Zone"
|
||||
52,"Brooklyn","Cobble Hill","Boro Zone"
|
||||
53,"Queens","College Point","Boro Zone"
|
||||
54,"Brooklyn","Columbia Street","Boro Zone"
|
||||
55,"Brooklyn","Coney Island","Boro Zone"
|
||||
56,"Queens","Corona","Boro Zone"
|
||||
57,"Queens","Corona","Boro Zone"
|
||||
58,"Bronx","Country Club","Boro Zone"
|
||||
59,"Bronx","Crotona Park","Boro Zone"
|
||||
60,"Bronx","Crotona Park East","Boro Zone"
|
||||
61,"Brooklyn","Crown Heights North","Boro Zone"
|
||||
62,"Brooklyn","Crown Heights South","Boro Zone"
|
||||
63,"Brooklyn","Cypress Hills","Boro Zone"
|
||||
64,"Queens","Douglaston","Boro Zone"
|
||||
65,"Brooklyn","Downtown Brooklyn/MetroTech","Boro Zone"
|
||||
66,"Brooklyn","DUMBO/Vinegar Hill","Boro Zone"
|
||||
67,"Brooklyn","Dyker Heights","Boro Zone"
|
||||
68,"Manhattan","East Chelsea","Yellow Zone"
|
||||
69,"Bronx","East Concourse/Concourse Village","Boro Zone"
|
||||
70,"Queens","East Elmhurst","Boro Zone"
|
||||
71,"Brooklyn","East Flatbush/Farragut","Boro Zone"
|
||||
72,"Brooklyn","East Flatbush/Remsen Village","Boro Zone"
|
||||
73,"Queens","East Flushing","Boro Zone"
|
||||
74,"Manhattan","East Harlem North","Boro Zone"
|
||||
75,"Manhattan","East Harlem South","Boro Zone"
|
||||
76,"Brooklyn","East New York","Boro Zone"
|
||||
77,"Brooklyn","East New York/Pennsylvania Avenue","Boro Zone"
|
||||
78,"Bronx","East Tremont","Boro Zone"
|
||||
79,"Manhattan","East Village","Yellow Zone"
|
||||
80,"Brooklyn","East Williamsburg","Boro Zone"
|
||||
81,"Bronx","Eastchester","Boro Zone"
|
||||
82,"Queens","Elmhurst","Boro Zone"
|
||||
83,"Queens","Elmhurst/Maspeth","Boro Zone"
|
||||
84,"Staten Island","Eltingville/Annadale/Prince's Bay","Boro Zone"
|
||||
85,"Brooklyn","Erasmus","Boro Zone"
|
||||
86,"Queens","Far Rockaway","Boro Zone"
|
||||
87,"Manhattan","Financial District North","Yellow Zone"
|
||||
88,"Manhattan","Financial District South","Yellow Zone"
|
||||
89,"Brooklyn","Flatbush/Ditmas Park","Boro Zone"
|
||||
90,"Manhattan","Flatiron","Yellow Zone"
|
||||
91,"Brooklyn","Flatlands","Boro Zone"
|
||||
92,"Queens","Flushing","Boro Zone"
|
||||
93,"Queens","Flushing Meadows-Corona Park","Boro Zone"
|
||||
94,"Bronx","Fordham South","Boro Zone"
|
||||
95,"Queens","Forest Hills","Boro Zone"
|
||||
96,"Queens","Forest Park/Highland Park","Boro Zone"
|
||||
97,"Brooklyn","Fort Greene","Boro Zone"
|
||||
98,"Queens","Fresh Meadows","Boro Zone"
|
||||
99,"Staten Island","Freshkills Park","Boro Zone"
|
||||
100,"Manhattan","Garment District","Yellow Zone"
|
||||
101,"Queens","Glen Oaks","Boro Zone"
|
||||
102,"Queens","Glendale","Boro Zone"
|
||||
103,"Manhattan","Governor's Island/Ellis Island/Liberty Island","Yellow Zone"
|
||||
104,"Manhattan","Governor's Island/Ellis Island/Liberty Island","Yellow Zone"
|
||||
105,"Manhattan","Governor's Island/Ellis Island/Liberty Island","Yellow Zone"
|
||||
106,"Brooklyn","Gowanus","Boro Zone"
|
||||
107,"Manhattan","Gramercy","Yellow Zone"
|
||||
108,"Brooklyn","Gravesend","Boro Zone"
|
||||
109,"Staten Island","Great Kills","Boro Zone"
|
||||
110,"Staten Island","Great Kills Park","Boro Zone"
|
||||
111,"Brooklyn","Green-Wood Cemetery","Boro Zone"
|
||||
112,"Brooklyn","Greenpoint","Boro Zone"
|
||||
113,"Manhattan","Greenwich Village North","Yellow Zone"
|
||||
114,"Manhattan","Greenwich Village South","Yellow Zone"
|
||||
115,"Staten Island","Grymes Hill/Clifton","Boro Zone"
|
||||
116,"Manhattan","Hamilton Heights","Boro Zone"
|
||||
117,"Queens","Hammels/Arverne","Boro Zone"
|
||||
118,"Staten Island","Heartland Village/Todt Hill","Boro Zone"
|
||||
119,"Bronx","Highbridge","Boro Zone"
|
||||
120,"Manhattan","Highbridge Park","Boro Zone"
|
||||
121,"Queens","Hillcrest/Pomonok","Boro Zone"
|
||||
122,"Queens","Hollis","Boro Zone"
|
||||
123,"Brooklyn","Homecrest","Boro Zone"
|
||||
124,"Queens","Howard Beach","Boro Zone"
|
||||
125,"Manhattan","Hudson Sq","Yellow Zone"
|
||||
126,"Bronx","Hunts Point","Boro Zone"
|
||||
127,"Manhattan","Inwood","Boro Zone"
|
||||
128,"Manhattan","Inwood Hill Park","Boro Zone"
|
||||
129,"Queens","Jackson Heights","Boro Zone"
|
||||
130,"Queens","Jamaica","Boro Zone"
|
||||
131,"Queens","Jamaica Estates","Boro Zone"
|
||||
132,"Queens","JFK Airport","Airports"
|
||||
133,"Brooklyn","Kensington","Boro Zone"
|
||||
134,"Queens","Kew Gardens","Boro Zone"
|
||||
135,"Queens","Kew Gardens Hills","Boro Zone"
|
||||
136,"Bronx","Kingsbridge Heights","Boro Zone"
|
||||
137,"Manhattan","Kips Bay","Yellow Zone"
|
||||
138,"Queens","LaGuardia Airport","Airports"
|
||||
139,"Queens","Laurelton","Boro Zone"
|
||||
140,"Manhattan","Lenox Hill East","Yellow Zone"
|
||||
141,"Manhattan","Lenox Hill West","Yellow Zone"
|
||||
142,"Manhattan","Lincoln Square East","Yellow Zone"
|
||||
143,"Manhattan","Lincoln Square West","Yellow Zone"
|
||||
144,"Manhattan","Little Italy/NoLiTa","Yellow Zone"
|
||||
145,"Queens","Long Island City/Hunters Point","Boro Zone"
|
||||
146,"Queens","Long Island City/Queens Plaza","Boro Zone"
|
||||
147,"Bronx","Longwood","Boro Zone"
|
||||
148,"Manhattan","Lower East Side","Yellow Zone"
|
||||
149,"Brooklyn","Madison","Boro Zone"
|
||||
150,"Brooklyn","Manhattan Beach","Boro Zone"
|
||||
151,"Manhattan","Manhattan Valley","Yellow Zone"
|
||||
152,"Manhattan","Manhattanville","Boro Zone"
|
||||
153,"Manhattan","Marble Hill","Boro Zone"
|
||||
154,"Brooklyn","Marine Park/Floyd Bennett Field","Boro Zone"
|
||||
155,"Brooklyn","Marine Park/Mill Basin","Boro Zone"
|
||||
156,"Staten Island","Mariners Harbor","Boro Zone"
|
||||
157,"Queens","Maspeth","Boro Zone"
|
||||
158,"Manhattan","Meatpacking/West Village West","Yellow Zone"
|
||||
159,"Bronx","Melrose South","Boro Zone"
|
||||
160,"Queens","Middle Village","Boro Zone"
|
||||
161,"Manhattan","Midtown Center","Yellow Zone"
|
||||
162,"Manhattan","Midtown East","Yellow Zone"
|
||||
163,"Manhattan","Midtown North","Yellow Zone"
|
||||
164,"Manhattan","Midtown South","Yellow Zone"
|
||||
165,"Brooklyn","Midwood","Boro Zone"
|
||||
166,"Manhattan","Morningside Heights","Boro Zone"
|
||||
167,"Bronx","Morrisania/Melrose","Boro Zone"
|
||||
168,"Bronx","Mott Haven/Port Morris","Boro Zone"
|
||||
169,"Bronx","Mount Hope","Boro Zone"
|
||||
170,"Manhattan","Murray Hill","Yellow Zone"
|
||||
171,"Queens","Murray Hill-Queens","Boro Zone"
|
||||
172,"Staten Island","New Dorp/Midland Beach","Boro Zone"
|
||||
173,"Queens","North Corona","Boro Zone"
|
||||
174,"Bronx","Norwood","Boro Zone"
|
||||
175,"Queens","Oakland Gardens","Boro Zone"
|
||||
176,"Staten Island","Oakwood","Boro Zone"
|
||||
177,"Brooklyn","Ocean Hill","Boro Zone"
|
||||
178,"Brooklyn","Ocean Parkway South","Boro Zone"
|
||||
179,"Queens","Old Astoria","Boro Zone"
|
||||
180,"Queens","Ozone Park","Boro Zone"
|
||||
181,"Brooklyn","Park Slope","Boro Zone"
|
||||
182,"Bronx","Parkchester","Boro Zone"
|
||||
183,"Bronx","Pelham Bay","Boro Zone"
|
||||
184,"Bronx","Pelham Bay Park","Boro Zone"
|
||||
185,"Bronx","Pelham Parkway","Boro Zone"
|
||||
186,"Manhattan","Penn Station/Madison Sq West","Yellow Zone"
|
||||
187,"Staten Island","Port Richmond","Boro Zone"
|
||||
188,"Brooklyn","Prospect-Lefferts Gardens","Boro Zone"
|
||||
189,"Brooklyn","Prospect Heights","Boro Zone"
|
||||
190,"Brooklyn","Prospect Park","Boro Zone"
|
||||
191,"Queens","Queens Village","Boro Zone"
|
||||
192,"Queens","Queensboro Hill","Boro Zone"
|
||||
193,"Queens","Queensbridge/Ravenswood","Boro Zone"
|
||||
194,"Manhattan","Randalls Island","Yellow Zone"
|
||||
195,"Brooklyn","Red Hook","Boro Zone"
|
||||
196,"Queens","Rego Park","Boro Zone"
|
||||
197,"Queens","Richmond Hill","Boro Zone"
|
||||
198,"Queens","Ridgewood","Boro Zone"
|
||||
199,"Bronx","Rikers Island","Boro Zone"
|
||||
200,"Bronx","Riverdale/North Riverdale/Fieldston","Boro Zone"
|
||||
201,"Queens","Rockaway Park","Boro Zone"
|
||||
202,"Manhattan","Roosevelt Island","Boro Zone"
|
||||
203,"Queens","Rosedale","Boro Zone"
|
||||
204,"Staten Island","Rossville/Woodrow","Boro Zone"
|
||||
205,"Queens","Saint Albans","Boro Zone"
|
||||
206,"Staten Island","Saint George/New Brighton","Boro Zone"
|
||||
207,"Queens","Saint Michaels Cemetery/Woodside","Boro Zone"
|
||||
208,"Bronx","Schuylerville/Edgewater Park","Boro Zone"
|
||||
209,"Manhattan","Seaport","Yellow Zone"
|
||||
210,"Brooklyn","Sheepshead Bay","Boro Zone"
|
||||
211,"Manhattan","SoHo","Yellow Zone"
|
||||
212,"Bronx","Soundview/Bruckner","Boro Zone"
|
||||
213,"Bronx","Soundview/Castle Hill","Boro Zone"
|
||||
214,"Staten Island","South Beach/Dongan Hills","Boro Zone"
|
||||
215,"Queens","South Jamaica","Boro Zone"
|
||||
216,"Queens","South Ozone Park","Boro Zone"
|
||||
217,"Brooklyn","South Williamsburg","Boro Zone"
|
||||
218,"Queens","Springfield Gardens North","Boro Zone"
|
||||
219,"Queens","Springfield Gardens South","Boro Zone"
|
||||
220,"Bronx","Spuyten Duyvil/Kingsbridge","Boro Zone"
|
||||
221,"Staten Island","Stapleton","Boro Zone"
|
||||
222,"Brooklyn","Starrett City","Boro Zone"
|
||||
223,"Queens","Steinway","Boro Zone"
|
||||
224,"Manhattan","Stuy Town/Peter Cooper Village","Yellow Zone"
|
||||
225,"Brooklyn","Stuyvesant Heights","Boro Zone"
|
||||
226,"Queens","Sunnyside","Boro Zone"
|
||||
227,"Brooklyn","Sunset Park East","Boro Zone"
|
||||
228,"Brooklyn","Sunset Park West","Boro Zone"
|
||||
229,"Manhattan","Sutton Place/Turtle Bay North","Yellow Zone"
|
||||
230,"Manhattan","Times Sq/Theatre District","Yellow Zone"
|
||||
231,"Manhattan","TriBeCa/Civic Center","Yellow Zone"
|
||||
232,"Manhattan","Two Bridges/Seward Park","Yellow Zone"
|
||||
233,"Manhattan","UN/Turtle Bay South","Yellow Zone"
|
||||
234,"Manhattan","Union Sq","Yellow Zone"
|
||||
235,"Bronx","University Heights/Morris Heights","Boro Zone"
|
||||
236,"Manhattan","Upper East Side North","Yellow Zone"
|
||||
237,"Manhattan","Upper East Side South","Yellow Zone"
|
||||
238,"Manhattan","Upper West Side North","Yellow Zone"
|
||||
239,"Manhattan","Upper West Side South","Yellow Zone"
|
||||
240,"Bronx","Van Cortlandt Park","Boro Zone"
|
||||
241,"Bronx","Van Cortlandt Village","Boro Zone"
|
||||
242,"Bronx","Van Nest/Morris Park","Boro Zone"
|
||||
243,"Manhattan","Washington Heights North","Boro Zone"
|
||||
244,"Manhattan","Washington Heights South","Boro Zone"
|
||||
245,"Staten Island","West Brighton","Boro Zone"
|
||||
246,"Manhattan","West Chelsea/Hudson Yards","Yellow Zone"
|
||||
247,"Bronx","West Concourse","Boro Zone"
|
||||
248,"Bronx","West Farms/Bronx River","Boro Zone"
|
||||
249,"Manhattan","West Village","Yellow Zone"
|
||||
250,"Bronx","Westchester Village/Unionport","Boro Zone"
|
||||
251,"Staten Island","Westerleigh","Boro Zone"
|
||||
252,"Queens","Whitestone","Boro Zone"
|
||||
253,"Queens","Willets Point","Boro Zone"
|
||||
254,"Bronx","Williamsbridge/Olinville","Boro Zone"
|
||||
255,"Brooklyn","Williamsburg (North Side)","Boro Zone"
|
||||
256,"Brooklyn","Williamsburg (South Side)","Boro Zone"
|
||||
257,"Brooklyn","Windsor Terrace","Boro Zone"
|
||||
258,"Queens","Woodhaven","Boro Zone"
|
||||
259,"Bronx","Woodlawn/Wakefield","Boro Zone"
|
||||
260,"Queens","Woodside","Boro Zone"
|
||||
261,"Manhattan","World Trade Center","Yellow Zone"
|
||||
262,"Manhattan","Yorkville East","Yellow Zone"
|
||||
263,"Manhattan","Yorkville West","Yellow Zone"
|
||||
264,"Unknown","NV","N/A"
|
||||
265,"Unknown","NA","N/A"
|
||||
|
@ -1,49 +0,0 @@
|
||||
|
||||
# Name your project! Project names should contain only lowercase characters
|
||||
# and underscores. A good package name should reflect your organization's
|
||||
# name or the intended use of these models
|
||||
name: 'taxi_rides_ny'
|
||||
version: '1.0.0'
|
||||
config-version: 2
|
||||
|
||||
# This setting configures which "profile" dbt uses for this project.
|
||||
profile: 'pg-dbt-workshop'
|
||||
|
||||
# These configurations specify where dbt should look for different types of files.
|
||||
# The `source-paths` config, for example, states that models in this project can be
|
||||
# found in the "models/" directory. You probably won't need to change these!
|
||||
model-paths: ["models"]
|
||||
analysis-paths: ["analysis"]
|
||||
test-paths: ["tests"]
|
||||
seed-paths: ["data"]
|
||||
macro-paths: ["macros"]
|
||||
snapshot-paths: ["snapshots"]
|
||||
|
||||
target-path: "target" # directory which will store compiled SQL files
|
||||
clean-targets: # directories to be removed by `dbt clean`
|
||||
- "target"
|
||||
- "dbt_packages"
|
||||
- "dbt_modules"
|
||||
|
||||
|
||||
# Configuring models
|
||||
# Full documentation: https://docs.getdbt.com/docs/configuring-models
|
||||
|
||||
# In this example config, we tell dbt to build all models in the example/ directory
|
||||
# as tables. These settings can be overridden in the individual model files
|
||||
# using the `{{ config(...) }}` macro.
|
||||
models:
|
||||
taxi_rides_ny:
|
||||
# Applies to all files under models/.../
|
||||
staging:
|
||||
materialized: view
|
||||
core:
|
||||
materialized: table
|
||||
vars:
|
||||
rateID_values: [ 1,2,3,4,5,6]
|
||||
|
||||
seeds:
|
||||
taxi_rides_ny:
|
||||
taxi_zone_lookup:
|
||||
+column_types:
|
||||
locationid: numeric
|
||||
@ -1,19 +0,0 @@
|
||||
{#
|
||||
This macro returns the description of the payment_type
|
||||
#}
|
||||
|
||||
{% macro get_payment_type_description(payment_type) -%}
|
||||
|
||||
case cast( {{ payment_type }} as integer)
|
||||
when 1 then 'Credit card'
|
||||
when 2 then 'Cash'
|
||||
when 3 then 'No charge'
|
||||
when 4 then 'Dispute'
|
||||
when 5 then 'Unknown'
|
||||
when 6 then 'Voided trip'
|
||||
else 'EMPTY'
|
||||
end
|
||||
|
||||
{%- endmacro %}
|
||||
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
version: 2
|
||||
|
||||
macros:
|
||||
- name: get_payment_type_description
|
||||
description: >
|
||||
This macro receives a payment_type and returns the corresponding description.
|
||||
arguments:
|
||||
- name: payment_type
|
||||
type: int
|
||||
description: >
|
||||
payment_type value.
|
||||
Must be one of the accepted values, otherwise the macro will return null
|
||||
@ -1,9 +0,0 @@
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
|
||||
select
|
||||
locationid,
|
||||
borough,
|
||||
zone,
|
||||
replace(service_zone,'Boro','Green') as service_zone
|
||||
from {{ ref('taxi_zone_lookup') }}
|
||||
@ -1,31 +0,0 @@
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
with trips_data as (
|
||||
select * from {{ ref('fact_trips') }}
|
||||
)
|
||||
select
|
||||
-- Reveneue grouping
|
||||
pickup_zone as revenue_zone,
|
||||
date_trunc('month', pickup_datetime) as revenue_month,
|
||||
--Note: For BQ use instead: date_trunc(pickup_datetime, month) as revenue_month,
|
||||
|
||||
service_type,
|
||||
|
||||
-- Revenue calculation
|
||||
sum(fare_amount) as revenue_monthly_fare,
|
||||
sum(extra) as revenue_monthly_extra,
|
||||
sum(mta_tax) as revenue_monthly_mta_tax,
|
||||
sum(tip_amount) as revenue_monthly_tip_amount,
|
||||
sum(tolls_amount) as revenue_monthly_tolls_amount,
|
||||
sum(ehail_fee) as revenue_monthly_ehail_fee,
|
||||
sum(improvement_surcharge) as revenue_monthly_improvement_surcharge,
|
||||
sum(total_amount) as revenue_monthly_total_amount,
|
||||
sum(congestion_surcharge) as revenue_monthly_congestion_surcharge,
|
||||
|
||||
-- Additional calculations
|
||||
count(tripid) as total_monthly_trips,
|
||||
avg(passenger_count) as avg_montly_passenger_count,
|
||||
avg(trip_distance) as avg_montly_trip_distance
|
||||
|
||||
from trips_data
|
||||
group by 1,2,3
|
||||
@ -1,56 +0,0 @@
|
||||
{{ config(materialized='table') }}
|
||||
|
||||
with green_data as (
|
||||
select *,
|
||||
'Green' as service_type
|
||||
from {{ ref('stg_green_tripdata') }}
|
||||
),
|
||||
|
||||
yellow_data as (
|
||||
select *,
|
||||
'Yellow' as service_type
|
||||
from {{ ref('stg_yellow_tripdata') }}
|
||||
),
|
||||
|
||||
trips_unioned as (
|
||||
select * from green_data
|
||||
union all
|
||||
select * from yellow_data
|
||||
),
|
||||
|
||||
dim_zones as (
|
||||
select * from {{ ref('dim_zones') }}
|
||||
where borough != 'Unknown'
|
||||
)
|
||||
select
|
||||
trips_unioned.tripid,
|
||||
trips_unioned.vendorid,
|
||||
trips_unioned.service_type,
|
||||
trips_unioned.ratecodeid,
|
||||
trips_unioned.pickup_locationid,
|
||||
pickup_zone.borough as pickup_borough,
|
||||
pickup_zone.zone as pickup_zone,
|
||||
trips_unioned.dropoff_locationid,
|
||||
dropoff_zone.borough as dropoff_borough,
|
||||
dropoff_zone.zone as dropoff_zone,
|
||||
trips_unioned.pickup_datetime,
|
||||
trips_unioned.dropoff_datetime,
|
||||
trips_unioned.store_and_fwd_flag,
|
||||
trips_unioned.passenger_count,
|
||||
trips_unioned.trip_distance,
|
||||
trips_unioned.trip_type,
|
||||
trips_unioned.fare_amount,
|
||||
trips_unioned.extra,
|
||||
trips_unioned.mta_tax,
|
||||
trips_unioned.tip_amount,
|
||||
trips_unioned.tolls_amount,
|
||||
trips_unioned.ehail_fee,
|
||||
trips_unioned.improvement_surcharge,
|
||||
trips_unioned.total_amount,
|
||||
trips_unioned.payment_type,
|
||||
trips_unioned.payment_type_description
|
||||
from trips_unioned
|
||||
inner join dim_zones as pickup_zone
|
||||
on trips_unioned.pickup_locationid = pickup_zone.locationid
|
||||
inner join dim_zones as dropoff_zone
|
||||
on trips_unioned.dropoff_locationid = dropoff_zone.locationid
|
||||
@ -1,26 +0,0 @@
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: dim_zones
|
||||
description: >
|
||||
List of unique zones idefied by locationid.
|
||||
Includes the service zone they correspond to (Green or yellow).
|
||||
- name: fact_trips
|
||||
description: >
|
||||
Taxi trips corresponding to both service zones (Green and yellow).
|
||||
The table contains records where both pickup and dropoff locations are valid and known zones.
|
||||
Each record corresponds to a trip uniquely identified by tripid.
|
||||
|
||||
- name: dm_monthly_zone_revenue
|
||||
description: >
|
||||
Aggregated table of all taxi trips corresponding to both service zones (Green and yellow) per pickup zone, month and service.
|
||||
The table contains monthly sums of the fare elements used to calculate the monthly revenue.
|
||||
The table contains also monthly indicators like number of trips, and average trip distance.
|
||||
columns:
|
||||
- name: revenue_monthly_total_amount
|
||||
description: Monthly sum of the the total_amount of the fare charged for the trip per pickup zone, month and service.
|
||||
tests:
|
||||
- not_null:
|
||||
severity: error
|
||||
|
||||
|
||||
@ -1,193 +0,0 @@
|
||||
|
||||
version: 2
|
||||
|
||||
sources:
|
||||
- name: staging
|
||||
#For bigquery:
|
||||
database: taxi-rides-ny-339813-412521
|
||||
|
||||
# For postgres:
|
||||
#database: production
|
||||
|
||||
schema: trips_data_all
|
||||
|
||||
# loaded_at_field: record_loaded_at
|
||||
tables:
|
||||
- name: green_tripdata
|
||||
- name: yellow_tripdata
|
||||
# freshness:
|
||||
# error_after: {count: 6, period: hour}
|
||||
|
||||
models:
|
||||
- name: stg_green_tripdata
|
||||
description: >
|
||||
Trip made by green taxis, also known as boro taxis and street-hail liveries.
|
||||
Green taxis may respond to street hails,but only in the areas indicated in green on the
|
||||
map (i.e. above W 110 St/E 96th St in Manhattan and in the boroughs).
|
||||
The records were collected and provided to the NYC Taxi and Limousine Commission (TLC) by
|
||||
technology service providers.
|
||||
columns:
|
||||
- name: tripid
|
||||
description: Primary key for this table, generated with a concatenation of vendorid+pickup_datetime
|
||||
tests:
|
||||
- unique:
|
||||
severity: warn
|
||||
- not_null:
|
||||
severity: warn
|
||||
- name: VendorID
|
||||
description: >
|
||||
A code indicating the TPEP provider that provided the record.
|
||||
1= Creative Mobile Technologies, LLC;
|
||||
2= VeriFone Inc.
|
||||
- name: pickup_datetime
|
||||
description: The date and time when the meter was engaged.
|
||||
- name: dropoff_datetime
|
||||
description: The date and time when the meter was disengaged.
|
||||
- name: Passenger_count
|
||||
description: The number of passengers in the vehicle. This is a driver-entered value.
|
||||
- name: Trip_distance
|
||||
description: The elapsed trip distance in miles reported by the taximeter.
|
||||
- name: Pickup_locationid
|
||||
description: locationid where the meter was engaged.
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('taxi_zone_lookup')
|
||||
field: locationid
|
||||
severity: warn
|
||||
- name: dropoff_locationid
|
||||
description: locationid where the meter was engaged.
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('taxi_zone_lookup')
|
||||
field: locationid
|
||||
- name: RateCodeID
|
||||
description: >
|
||||
The final rate code in effect at the end of the trip.
|
||||
1= Standard rate
|
||||
2=JFK
|
||||
3=Newark
|
||||
4=Nassau or Westchester
|
||||
5=Negotiated fare
|
||||
6=Group ride
|
||||
- name: Store_and_fwd_flag
|
||||
description: >
|
||||
This flag indicates whether the trip record was held in vehicle
|
||||
memory before sending to the vendor, aka “store and forward,”
|
||||
because the vehicle did not have a connection to the server.
|
||||
Y= store and forward trip
|
||||
N = not a store and forward trip
|
||||
- name: Dropoff_longitude
|
||||
description: Longitude where the meter was disengaged.
|
||||
- name: Dropoff_latitude
|
||||
description: Latitude where the meter was disengaged.
|
||||
- name: Payment_type
|
||||
description: >
|
||||
A numeric code signifying how the passenger paid for the trip.
|
||||
- name: payment_type_description
|
||||
description: Description of the payment_type code
|
||||
- name: Fare_amount
|
||||
description: >
|
||||
The time-and-distance fare calculated by the meter.
|
||||
Extra Miscellaneous extras and surcharges. Currently, this only includes
|
||||
the $0.50 and $1 rush hour and overnight charges.
|
||||
MTA_tax $0.50 MTA tax that is automatically triggered based on the metered
|
||||
rate in use.
|
||||
- name: Improvement_surcharge
|
||||
description: >
|
||||
$0.30 improvement surcharge assessed trips at the flag drop. The
|
||||
improvement surcharge began being levied in 2015.
|
||||
- name: Tip_amount
|
||||
description: >
|
||||
Tip amount. This field is automatically populated for credit card
|
||||
tips. Cash tips are not included.
|
||||
- name: Tolls_amount
|
||||
description: Total amount of all tolls paid in trip.
|
||||
- name: Total_amount
|
||||
description: The total amount charged to passengers. Does not include cash tips.
|
||||
|
||||
- name: stg_yellow_tripdata
|
||||
description: >
|
||||
Trips made by New York City's iconic yellow taxis.
|
||||
Yellow taxis are the only vehicles permitted to respond to a street hail from a passenger in all five
|
||||
boroughs. They may also be hailed using an e-hail app like Curb or Arro.
|
||||
The records were collected and provided to the NYC Taxi and Limousine Commission (TLC) by
|
||||
technology service providers.
|
||||
columns:
|
||||
- name: tripid
|
||||
description: Primary key for this table, generated with a concatenation of vendorid+pickup_datetime
|
||||
tests:
|
||||
- unique:
|
||||
severity: warn
|
||||
- not_null:
|
||||
severity: warn
|
||||
- name: VendorID
|
||||
description: >
|
||||
A code indicating the TPEP provider that provided the record.
|
||||
1= Creative Mobile Technologies, LLC;
|
||||
2= VeriFone Inc.
|
||||
- name: pickup_datetime
|
||||
description: The date and time when the meter was engaged.
|
||||
- name: dropoff_datetime
|
||||
description: The date and time when the meter was disengaged.
|
||||
- name: Passenger_count
|
||||
description: The number of passengers in the vehicle. This is a driver-entered value.
|
||||
- name: Trip_distance
|
||||
description: The elapsed trip distance in miles reported by the taximeter.
|
||||
- name: Pickup_locationid
|
||||
description: locationid where the meter was engaged.
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('taxi_zone_lookup')
|
||||
field: locationid
|
||||
severity: warn
|
||||
- name: dropoff_locationid
|
||||
description: locationid where the meter was engaged.
|
||||
tests:
|
||||
- relationships:
|
||||
to: ref('taxi_zone_lookup')
|
||||
field: locationid
|
||||
severity: warn
|
||||
- name: RateCodeID
|
||||
description: >
|
||||
The final rate code in effect at the end of the trip.
|
||||
1= Standard rate
|
||||
2=JFK
|
||||
3=Newark
|
||||
4=Nassau or Westchester
|
||||
5=Negotiated fare
|
||||
6=Group ride
|
||||
- name: Store_and_fwd_flag
|
||||
description: >
|
||||
This flag indicates whether the trip record was held in vehicle
|
||||
memory before sending to the vendor, aka “store and forward,”
|
||||
because the vehicle did not have a connection to the server.
|
||||
Y= store and forward trip
|
||||
N= not a store and forward trip
|
||||
- name: Dropoff_longitude
|
||||
description: Longitude where the meter was disengaged.
|
||||
- name: Dropoff_latitude
|
||||
description: Latitude where the meter was disengaged.
|
||||
- name: Payment_type
|
||||
description: >
|
||||
A numeric code signifying how the passenger paid for the trip.
|
||||
- name: payment_type_description
|
||||
description: Description of the payment_type code
|
||||
- name: Fare_amount
|
||||
description: >
|
||||
The time-and-distance fare calculated by the meter.
|
||||
Extra Miscellaneous extras and surcharges. Currently, this only includes
|
||||
the $0.50 and $1 rush hour and overnight charges.
|
||||
MTA_tax $0.50 MTA tax that is automatically triggered based on the metered
|
||||
rate in use.
|
||||
- name: Improvement_surcharge
|
||||
description: >
|
||||
$0.30 improvement surcharge assessed trips at the flag drop. The
|
||||
improvement surcharge began being levied in 2015.
|
||||
- name: Tip_amount
|
||||
description: >
|
||||
Tip amount. This field is automatically populated for credit card
|
||||
tips. Cash tips are not included.
|
||||
- name: Tolls_amount
|
||||
description: Total amount of all tolls paid in trip.
|
||||
- name: Total_amount
|
||||
description: The total amount charged to passengers. Does not include cash tips.
|
||||
@ -1,48 +0,0 @@
|
||||
{{ config(materialized='view') }}
|
||||
|
||||
with tripdata as
|
||||
(
|
||||
select *,
|
||||
row_number() over(partition by vendorid, lpep_pickup_datetime) as rn
|
||||
from {{ source('staging','green_tripdata') }}
|
||||
where vendorid is not null
|
||||
)
|
||||
select
|
||||
-- identifiers
|
||||
{{ dbt_utils.surrogate_key(['vendorid', 'lpep_pickup_datetime']) }} as tripid,
|
||||
cast(vendorid as integer) as vendorid,
|
||||
cast(ratecodeid as integer) as ratecodeid,
|
||||
cast(pulocationid as integer) as pickup_locationid,
|
||||
cast(dolocationid as integer) as dropoff_locationid,
|
||||
|
||||
-- timestamps
|
||||
cast(lpep_pickup_datetime as timestamp) as pickup_datetime,
|
||||
cast(lpep_dropoff_datetime as timestamp) as dropoff_datetime,
|
||||
|
||||
-- trip info
|
||||
store_and_fwd_flag,
|
||||
cast(passenger_count as integer) as passenger_count,
|
||||
cast(trip_distance as numeric) as trip_distance,
|
||||
cast(trip_type as integer) as trip_type,
|
||||
|
||||
-- payment info
|
||||
cast(fare_amount as numeric) as fare_amount,
|
||||
cast(extra as numeric) as extra,
|
||||
cast(mta_tax as numeric) as mta_tax,
|
||||
cast(tip_amount as numeric) as tip_amount,
|
||||
cast(tolls_amount as numeric) as tolls_amount,
|
||||
cast(ehail_fee as numeric) as ehail_fee,
|
||||
cast(improvement_surcharge as numeric) as improvement_surcharge,
|
||||
cast(total_amount as numeric) as total_amount,
|
||||
coalesce(cast(payment_type as integer),0) as payment_type,
|
||||
{{ get_payment_type_description('payment_type') }} as payment_type_description
|
||||
from tripdata
|
||||
where rn = 1
|
||||
|
||||
|
||||
-- dbt build --m <model.sql> --var 'is_test_run: false'
|
||||
{% if var('is_test_run', default=true) %}
|
||||
|
||||
limit 100
|
||||
|
||||
{% endif %}
|
||||
@ -1,49 +0,0 @@
|
||||
{{ config(materialized='view') }}
|
||||
|
||||
with tripdata as
|
||||
(
|
||||
select *,
|
||||
row_number() over(partition by vendorid, tpep_pickup_datetime) as rn
|
||||
from {{ source('staging','yellow_tripdata') }}
|
||||
where vendorid is not null
|
||||
)
|
||||
select
|
||||
-- identifiers
|
||||
{{ dbt_utils.surrogate_key(['vendorid', 'tpep_pickup_datetime']) }} as tripid,
|
||||
cast(vendorid as integer) as vendorid,
|
||||
cast(ratecodeid as integer) as ratecodeid,
|
||||
cast(pulocationid as integer) as pickup_locationid,
|
||||
cast(dolocationid as integer) as dropoff_locationid,
|
||||
|
||||
-- timestamps
|
||||
cast(tpep_pickup_datetime as timestamp) as pickup_datetime,
|
||||
cast(tpep_dropoff_datetime as timestamp) as dropoff_datetime,
|
||||
|
||||
-- trip info
|
||||
store_and_fwd_flag,
|
||||
cast(passenger_count as integer) as passenger_count,
|
||||
cast(trip_distance as numeric) as trip_distance,
|
||||
-- yellow cabs are always street-hail
|
||||
1 as trip_type,
|
||||
|
||||
-- payment info
|
||||
cast(fare_amount as numeric) as fare_amount,
|
||||
cast(extra as numeric) as extra,
|
||||
cast(mta_tax as numeric) as mta_tax,
|
||||
cast(tip_amount as numeric) as tip_amount,
|
||||
cast(tolls_amount as numeric) as tolls_amount,
|
||||
cast(0 as numeric) as ehail_fee,
|
||||
cast(improvement_surcharge as numeric) as improvement_surcharge,
|
||||
cast(total_amount as numeric) as total_amount,
|
||||
coalesce(cast(payment_type as integer),0) as payment_type,
|
||||
{{ get_payment_type_description('payment_type') }} as payment_type_description
|
||||
from tripdata
|
||||
where rn = 1
|
||||
|
||||
-- dbt build --m <model.sql> --var 'is_test_run: false'
|
||||
{% if var('is_test_run', default=true) %}
|
||||
|
||||
limit 100
|
||||
|
||||
{% endif %}
|
||||
|
||||
@ -1,4 +0,0 @@
|
||||
packages:
|
||||
- package: dbt-labs/dbt_utils
|
||||
version: 1.1.1
|
||||
sha1_hash: a158c48c59c2bb7d729d2a4e215aabe5bb4f3353
|
||||
@ -1,3 +0,0 @@
|
||||
packages:
|
||||
- package: dbt-labs/dbt_utils
|
||||
version: 1.1.1
|
||||
Reference in New Issue
Block a user