def ERA_pick_times_in_order(self, row, midnight, cutoff): """ When the ERA is missing go through an list of alternatives: """ # Check for the ERA if pd.notnull(row['ERA_most_recent_minutes_after_midnight']): return row['ERA_most_recent_minutes_after_midnight'] # Check for the actual runway departure and air time # and if both exist: add the two together. # scheduled_air_time == estimated time from dep runway to arr runway elif pd.notnull(row['actual_runway_departure_minutes_after_midnight'])\ and pd.notnull(row['scheduled_air_time']): return row['actual_runway_departure_minutes_after_midnight'] + \ row['scheduled_air_time'] # Else us the gate arrival time and subtract the average time to # reach the gate for the given airport and airline elif pd.notnull(row['EGA_most_recent_minutes_after_midnight']) and \ pd.notnull(row['gate_delay_seconds']): return row['EGA_most_recent_minutes_after_midnight'] - \ row['gate_delay_seconds'] / float(60) # Else use the scheduled time elif pd.notnull( row['scheduled_runway_arrival_minutes_after_midnight']): return row['scheduled_runway_arrival_minutes_after_midnight'] # Else use the published arrival time elif pd.notnull(row['published_arrival_minutes_after_midnight']): return row['published_arrival_minutes_after_midnight'] # Else use the cutoff time as the estimate elif cutoff: return dut.minutes_difference(cutoff, midnight) # Otherwise there's a problem else: print row print "NO TIME TO USE"
def create_data(day): day.flight_history['departure_airport_timezone_offset'] = \ day.flight_history['departure_airport_timezone_offset'].apply(tfu.offset_func) day.flight_history['arrival_airport_timezone_offset'] = \ day.flight_history['arrival_airport_timezone_offset'].apply(tfu.offset_func) grouped = day.flight_history_events.groupby('flight_history_id') reduced_fhe = grouped.apply(fhe.reduce_fhe_group_to_one_row) reduced_fhe = reduced_fhe.reset_index() print "\tCreating file: all" joined = pd.DataFrame(None) joined = pd.merge(left=day.flight_history, right=reduced_fhe, on='flight_history_id', how='left', sort=False) joined = joined.replace("", np.nan) days_to_parse_arr = ['AGA_most_recent', 'ARA_most_recent', 'EGA_most_recent', 'ERA_most_recent'] days_to_parse_dep = ['AGD_most_recent', 'ARD_most_recent'] days_to_parse = ['AGA_most_recent', 'ARA_most_recent', 'EGA_most_recent', 'ERA_most_recent', 'AGD_most_recent', 'ARD_most_recent'] for d in days_to_parse_arr: joined[d] = joined[d] + joined['arrival_airport_timezone_offset'] for d in days_to_parse_dep: joined[d] = joined[d] + joined['departure_airport_timezone_offset'] for d in days_to_parse: joined[d] = joined[d].apply(dut.parse_to_utc) for c in date_columns(): joined[c + '_minutes_after_midnight'] = \ joined[c].apply(lambda x: float(dut.minutes_difference(x, day.midnight_time))) for d in unneces_cols(): del joined[d] joined = joined.replace("", np.nan) joined.to_csv('output_csv/parsed_fhe_' + day.folder_name + '_' + "all" + '_filtered_with_dates.csv', index=False, na_rep="MISSING") joined_test = pd.merge(left=day.test_data[['flight_history_id']], right=joined, on='flight_history_id', how='left', sort=False) joined_test.to_csv('output_csv/parsed_fhe_' + day.folder_name + '_' + "test" + '_filtered_with_dates.csv', index=False, na_rep="MISSING")
def add_column_asdi_time_est(self, day, data): date_columns = ['estimatedarrivalutc'] for c in date_columns: day.asdi_flight_plan[c + '_minutes_after_midnight'] = \ day.asdi_flight_plan[c].apply(lambda x: float(dut.minutes_difference(x, day.midnight_time))) data_with_asdi_est = pd.merge(left=data, right=day.asdi_flight_plan, on='flight_history_id', how='left', sort=False) return data_with_asdi_est
def add_column_position_groundspeed_received(self, day, data): day.asdi_position.rename( columns={"latitudedegrees": "curr_latitude", "longitudedegrees": "curr_longitude"}, inplace=True) day.asdi_position['received_minutes_after_midnight'] = \ day.asdi_position['received'].apply(lambda x: float(dut.minutes_difference(x, day.midnight_time))) data_with_curr_pos = pd.merge(left=data, right=day.asdi_position, on='flight_history_id', how='left', sort=False) return data_with_curr_pos
def add_column_position_groundspeed_received(self, day, data): day.asdi_position.rename(columns={ "latitudedegrees": "curr_latitude", "longitudedegrees": "curr_longitude" }, inplace=True) day.asdi_position['received_minutes_after_midnight'] = \ day.asdi_position['received'].apply(lambda x: float(dut.minutes_difference(x, day.midnight_time))) data_with_curr_pos = pd.merge(left=data, right=day.asdi_position, on='flight_history_id', how='left', sort=False) return data_with_curr_pos