More Classes with the Flight Dataset

This example is from TDM 102 Project 12 Spring 2024.

These example(s) depend on the database:

  • /anvil/projects/tdm/data/flights/2014.csv

Learn more about the dataset here.

You need to use 2 cores for your Jupyter Lab session for this example

You can use pd.set_option('display.max_columns', None) if you want to see all of the columns in a very wide data frame.

1a. In the previous project, you created a class named Flight, which contains attributes for the flight number, origin airport ID, destination airport ID, departure time, arrival time, departure delay, and arrival delay. Now let us use this class as a base class. Create a new subclass called ScheduledFlight. Add 2 more attributes to this new subclass: CRSDepTime and CRSArrTime.

class Flight:
    def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay):
        self.flight_number = flight_number
        self.origin_airport_id = origin_airport_id
        self.dest_airport_id = dest_airport_id
        self.dep_time = dep_time
        self.arr_time = arr_time
        self.dep_delay = dep_delay
        self.arr_delay = arr_delay

class ScheduledFlight(Flight):
    def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
        super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay)
        self.crs_dep_time = crs_dep_time
        self.crs_arr_time = crs_arr_time

1b. Add a method called is_ontime() to the class, which returns a boolean value that indicates if the flight departs on time and arrives on time.

class Flight:
    def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay):
        self.flight_number = flight_number
        self.origin_airport_id = origin_airport_id
        self.dest_airport_id = dest_airport_id
        self.dep_time = dep_time
        self.arr_time = arr_time
        self.dep_delay = dep_delay
        self.arr_delay = arr_delay

class ScheduledFlight(Flight):
    def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
        super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay)
        self.crs_dep_time = crs_dep_time
        self.crs_arr_time = crs_arr_time

    def is_ontime(self):
        return self.dep_delay <= 0 and self.arr_delay <= 0

2a. Create a DataFrame named myDF, to store data from the 2014.csv data set. It suffices to import (only) the columns listed below, and to (only) read in the first 100 rows. Although we provide the columns_to_read, please make (and use) a dictionary of col_types like we did in Question 1 of Project 10.

col_types = {"Year":"int64",
"Quarter":"int64",
"Month":"int64",
"DayofMonth":"int64",
"DayOfWeek":"int64",
"FlightDate":"object",
"Reporting_Airline":"object",
"DOT_ID_Reporting_Airline":"int64",
"IATA_CODE_Reporting_Airline":"object",
"Tail_Number":"object",
"Flight_Number_Reporting_Airline":"int64",
"OriginAirportID":"int64",
"OriginAirportSeqID":"int64",
"OriginCityMarketID":"int64",
"Origin":"object",
"OriginCityName":"object",
"OriginState":"object",
"OriginStateFips":"int64",
"OriginStateName":"object",
"OriginWac":"int64",
"DestAirportID":"int64",
"DestAirportSeqID":"int64",
"DestCityMarketID":"int64",
"Dest":"object",
"DestCityName":"object",
"DestState":"object",
"DestStateFips":"int64",
"DestStateName":"object",
"DestWac":"int64",
"CRSDepTime":"int64",
"DepTime":"float64",
"DepDelay":"float64",
"DepDelayMinutes":"float64",
"DepDel15":"float64",
"DepartureDelayGroups":"float64",
"DepTimeBlk":"object",
"TaxiOut":"float64",
"WheelsOff":"float64",
"WheelsOn":"float64",
"TaxiIn":"float64",
"CRSArrTime":"int64",
"ArrTime":"float64",
"ArrDelay":"float64",
"ArrDelayMinutes":"float64",
"ArrDel15":"float64",
"ArrivalDelayGroups":"float64",
"ArrTimeBlk":"object",
"Cancelled":"float64",
"CancellationCode":"object",
"Diverted":"float64",
"CRSElapsedTime":"float64",
"ActualElapsedTime":"float64",
"AirTime":"float64",
"Flights":"float64",
"Distance":"float64",
"DistanceGroup":"int64",
"CarrierDelay":"float64",
"WeatherDelay":"float64",
"NASDelay":"float64",
"SecurityDelay":"float64",
"LateAircraftDelay":"float64",
"FirstDepTime":"float64",
"TotalAddGTime":"float64",
"LongestAddGTime":"float64",
"DivAirportLandings":"int64",
"DivReachedDest":"float64",
"DivActualElapsedTime":"float64",
"DivArrDelay":"float64",
"DivDistance":"float64",
"Div1Airport":"object",
"Div1AirportID":"float64",
"Div1AirportSeqID":"float64",
"Div1WheelsOn":"float64",
"Div1TotalGTime":"float64",
"Div1LongestGTime":"float64",
"Div1WheelsOff":"float64",
"Div1TailNum":"object",
"Div2Airport":"float64",
"Div2AirportID":"float64",
"Div2AirportSeqID":"float64",
"Div2WheelsOn":"float64",
"Div2TotalGTime":"float64",
"Div2LongestGTime":"float64",
"Div2WheelsOff":"float64",
"Div2TailNum":"float64",
"Div3Airport":"float64",
"Div3AirportID":"float64",
"Div3AirportSeqID":"float64",
"Div3WheelsOn":"float64",
"Div3TotalGTime":"float64",
"Div3LongestGTime":"float64",
"Div3WheelsOff":"float64",
"Div3TailNum":"float64",
"Div4Airport":"float64",
"Div4AirportID":"float64",
"Div4AirportSeqID":"float64",
"Div4WheelsOn":"float64",
"Div4TotalGTime":"float64",
"Div4LongestGTime":"float64",
"Div4WheelsOff":"float64",
"Div4TailNum":"float64",
"Div5Airport":"float64",
"Div5AirportID":"float64",
"Div5AirportSeqID":"float64",
"Div5WheelsOn":"float64",
"Div5TotalGTime":"float64",
"Div5LongestGTime":"float64",
"Div5WheelsOff":"float64",
"Div5TailNum":"float64"
}
import pandas as pd


filepath = '/anvil/projects/tdm/data/flights/2014.csv'
columns_to_read = [
    'DepDelay', 'ArrDelay', 'Flight_Number_Reporting_Airline', 'Distance',
    'CarrierDelay', 'WeatherDelay', 'CRSDepTime', 'CRSArrTime',
    'DepTime', 'ArrTime', 'Origin', 'Dest', 'AirTime'
]


col_types = {
    'DepDelay': 'float64', 'ArrDelay': 'float64', 'Flight_Number_Reporting_Airline': 'int64',
    'Distance': 'float64', 'CarrierDelay': 'float64', 'WeatherDelay': 'float64',
    'CRSDepTime': 'int64', 'CRSArrTime': 'int64', 'DepTime': 'int64', 'ArrTime': 'int64',
    'Origin': 'object', 'Dest': 'object', 'AirTime': 'float64'
}


myDF = pd.read_csv(filepath, usecols=columns_to_read, nrows=100, dtype=col_types)


scheduled_flights = [
    ScheduledFlight(
        row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
        row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
        row['CRSDepTime'], row['CRSArrTime']
    ) for index, row in myDF.iterrows()
]

2b. Load the data from myDF into the ScheduledFlight class instances. (When you are finished, you should have a list of 100 ScheduledFlight instances.)

scheduled_flights = [
    ScheduledFlight(
        row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
        row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
        row['CRSDepTime'], row['CRSArrTime']
    ) for index, row in myDF.iterrows()
]

3a. Create an empty dictionary named ontime_count. Then use a for loop to assign values to ontime_count from the 100 ScheduledFlight objects.

ontime_count = {}
for flight in scheduled_flights:
    dest = flight.dest_airport_id
    ontime = flight.is_ontime()
    if ontime:
        ontime_count[dest] = ontime_count.get(dest, 0) + 1

3b. Calculate the total number of flights that were on time, for each destination airport.

print(ontime_count)
{'ICT': 1, 'DFW': 12, 'TPA': 11, 'OGG': 7, 'SJC': 3, 'KOA': 4, 'SMF': 1, 'SEA': 7, 'PDX': 3, 'OAK': 2, 'HNL': 6, 'ANC': 2, 'LIH': 2, 'SAN': 2, 'BLI': 1, 'DCA': 2}

4a. Add a method called is_delayed() to the class that indicates if the flight was delayed (either had a departure delay or an arrival delay).

class DelayedFlight(ScheduledFlight):
    def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
        super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time)

    def is_delayed(self):
        return self.dep_delay > 0 or self.arr_delay > 0


delayed_flights = [
    DelayedFlight(
        row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
        row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
        row['CRSDepTime'], row['CRSArrTime']
    ) for index, row in myDF.iterrows()
]

4b. Calculate the total number of delayed flights, for each destination airport.

delayed_count = {}
for flight in delayed_flights:
    dest = flight.dest_airport_id
    if flight.is_delayed():
        delayed_count[dest] = delayed_count.get(dest, 0) + 1

print(delayed_count)
{'ICT': 1, 'DFW': 19, 'TPA': 9, 'SAN': 1, 'HNL': 1, 'SEA': 2, 'OGG': 1}