More Classes with the Flight Dataset
This example is from TDM 102 Project 12 Spring 2024.
These example(s) depend on the database:
-
/anvil/projects/tdm/data/flights/2014.csv
Learn more about the dataset here.
You need to use 2 cores for your Jupyter Lab session for this example |
You can use |
1a. In the previous project, you created a class named Flight
, which contains attributes for the flight number, origin airport ID, destination airport ID, departure time, arrival time, departure delay, and arrival delay. Now let us use this class as a base class. Create a new subclass called ScheduledFlight
. Add 2 more attributes to this new subclass: CRSDepTime
and CRSArrTime
.
class Flight:
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay):
self.flight_number = flight_number
self.origin_airport_id = origin_airport_id
self.dest_airport_id = dest_airport_id
self.dep_time = dep_time
self.arr_time = arr_time
self.dep_delay = dep_delay
self.arr_delay = arr_delay
class ScheduledFlight(Flight):
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay)
self.crs_dep_time = crs_dep_time
self.crs_arr_time = crs_arr_time
1b. Add a method called is_ontime()
to the class, which returns a boolean value that indicates if the flight departs on time and arrives on time.
class Flight:
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay):
self.flight_number = flight_number
self.origin_airport_id = origin_airport_id
self.dest_airport_id = dest_airport_id
self.dep_time = dep_time
self.arr_time = arr_time
self.dep_delay = dep_delay
self.arr_delay = arr_delay
class ScheduledFlight(Flight):
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay)
self.crs_dep_time = crs_dep_time
self.crs_arr_time = crs_arr_time
def is_ontime(self):
return self.dep_delay <= 0 and self.arr_delay <= 0
2a. Create a DataFrame named myDF
, to store data from the 2014.csv
data set. It suffices to import (only) the columns listed below, and to (only) read in the first 100 rows. Although we provide the columns_to_read
, please make (and use) a dictionary of col_types
like we did in Question 1 of Project 10.
col_types = {"Year":"int64",
"Quarter":"int64",
"Month":"int64",
"DayofMonth":"int64",
"DayOfWeek":"int64",
"FlightDate":"object",
"Reporting_Airline":"object",
"DOT_ID_Reporting_Airline":"int64",
"IATA_CODE_Reporting_Airline":"object",
"Tail_Number":"object",
"Flight_Number_Reporting_Airline":"int64",
"OriginAirportID":"int64",
"OriginAirportSeqID":"int64",
"OriginCityMarketID":"int64",
"Origin":"object",
"OriginCityName":"object",
"OriginState":"object",
"OriginStateFips":"int64",
"OriginStateName":"object",
"OriginWac":"int64",
"DestAirportID":"int64",
"DestAirportSeqID":"int64",
"DestCityMarketID":"int64",
"Dest":"object",
"DestCityName":"object",
"DestState":"object",
"DestStateFips":"int64",
"DestStateName":"object",
"DestWac":"int64",
"CRSDepTime":"int64",
"DepTime":"float64",
"DepDelay":"float64",
"DepDelayMinutes":"float64",
"DepDel15":"float64",
"DepartureDelayGroups":"float64",
"DepTimeBlk":"object",
"TaxiOut":"float64",
"WheelsOff":"float64",
"WheelsOn":"float64",
"TaxiIn":"float64",
"CRSArrTime":"int64",
"ArrTime":"float64",
"ArrDelay":"float64",
"ArrDelayMinutes":"float64",
"ArrDel15":"float64",
"ArrivalDelayGroups":"float64",
"ArrTimeBlk":"object",
"Cancelled":"float64",
"CancellationCode":"object",
"Diverted":"float64",
"CRSElapsedTime":"float64",
"ActualElapsedTime":"float64",
"AirTime":"float64",
"Flights":"float64",
"Distance":"float64",
"DistanceGroup":"int64",
"CarrierDelay":"float64",
"WeatherDelay":"float64",
"NASDelay":"float64",
"SecurityDelay":"float64",
"LateAircraftDelay":"float64",
"FirstDepTime":"float64",
"TotalAddGTime":"float64",
"LongestAddGTime":"float64",
"DivAirportLandings":"int64",
"DivReachedDest":"float64",
"DivActualElapsedTime":"float64",
"DivArrDelay":"float64",
"DivDistance":"float64",
"Div1Airport":"object",
"Div1AirportID":"float64",
"Div1AirportSeqID":"float64",
"Div1WheelsOn":"float64",
"Div1TotalGTime":"float64",
"Div1LongestGTime":"float64",
"Div1WheelsOff":"float64",
"Div1TailNum":"object",
"Div2Airport":"float64",
"Div2AirportID":"float64",
"Div2AirportSeqID":"float64",
"Div2WheelsOn":"float64",
"Div2TotalGTime":"float64",
"Div2LongestGTime":"float64",
"Div2WheelsOff":"float64",
"Div2TailNum":"float64",
"Div3Airport":"float64",
"Div3AirportID":"float64",
"Div3AirportSeqID":"float64",
"Div3WheelsOn":"float64",
"Div3TotalGTime":"float64",
"Div3LongestGTime":"float64",
"Div3WheelsOff":"float64",
"Div3TailNum":"float64",
"Div4Airport":"float64",
"Div4AirportID":"float64",
"Div4AirportSeqID":"float64",
"Div4WheelsOn":"float64",
"Div4TotalGTime":"float64",
"Div4LongestGTime":"float64",
"Div4WheelsOff":"float64",
"Div4TailNum":"float64",
"Div5Airport":"float64",
"Div5AirportID":"float64",
"Div5AirportSeqID":"float64",
"Div5WheelsOn":"float64",
"Div5TotalGTime":"float64",
"Div5LongestGTime":"float64",
"Div5WheelsOff":"float64",
"Div5TailNum":"float64"
}
import pandas as pd
filepath = '/anvil/projects/tdm/data/flights/2014.csv'
columns_to_read = [
'DepDelay', 'ArrDelay', 'Flight_Number_Reporting_Airline', 'Distance',
'CarrierDelay', 'WeatherDelay', 'CRSDepTime', 'CRSArrTime',
'DepTime', 'ArrTime', 'Origin', 'Dest', 'AirTime'
]
col_types = {
'DepDelay': 'float64', 'ArrDelay': 'float64', 'Flight_Number_Reporting_Airline': 'int64',
'Distance': 'float64', 'CarrierDelay': 'float64', 'WeatherDelay': 'float64',
'CRSDepTime': 'int64', 'CRSArrTime': 'int64', 'DepTime': 'int64', 'ArrTime': 'int64',
'Origin': 'object', 'Dest': 'object', 'AirTime': 'float64'
}
myDF = pd.read_csv(filepath, usecols=columns_to_read, nrows=100, dtype=col_types)
scheduled_flights = [
ScheduledFlight(
row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
row['CRSDepTime'], row['CRSArrTime']
) for index, row in myDF.iterrows()
]
2b. Load the data from myDF
into the ScheduledFlight class instances. (When you are finished, you should have a list of 100 ScheduledFlight instances.)
scheduled_flights = [
ScheduledFlight(
row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
row['CRSDepTime'], row['CRSArrTime']
) for index, row in myDF.iterrows()
]
3a. Create an empty dictionary named ontime_count
. Then use a for loop to assign values to ontime_count
from the 100 ScheduledFlight objects.
ontime_count = {}
for flight in scheduled_flights:
dest = flight.dest_airport_id
ontime = flight.is_ontime()
if ontime:
ontime_count[dest] = ontime_count.get(dest, 0) + 1
3b. Calculate the total number of flights that were on time, for each destination airport.
print(ontime_count)
{'ICT': 1, 'DFW': 12, 'TPA': 11, 'OGG': 7, 'SJC': 3, 'KOA': 4, 'SMF': 1, 'SEA': 7, 'PDX': 3, 'OAK': 2, 'HNL': 6, 'ANC': 2, 'LIH': 2, 'SAN': 2, 'BLI': 1, 'DCA': 2}
4a. Add a method called is_delayed()
to the class that indicates if the flight was delayed (either had a departure delay or an arrival delay).
class DelayedFlight(ScheduledFlight):
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time)
def is_delayed(self):
return self.dep_delay > 0 or self.arr_delay > 0
delayed_flights = [
DelayedFlight(
row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
row['CRSDepTime'], row['CRSArrTime']
) for index, row in myDF.iterrows()
]
4b. Calculate the total number of delayed flights, for each destination airport.
delayed_count = {}
for flight in delayed_flights:
dest = flight.dest_airport_id
if flight.is_delayed():
delayed_count[dest] = delayed_count.get(dest, 0) + 1
print(delayed_count)
{'ICT': 1, 'DFW': 19, 'TPA': 9, 'SAN': 1, 'HNL': 1, 'SEA': 2, 'OGG': 1}