Пример #1
0
gatwdata = gatwdata.dropna()
dubdata = dubdata.dropna()
dubdata = dubdata.drop_duplicates()
gatwdata = gatwdata.drop_duplicates()

# gatwdata = gatwdata[gatwdata['status'].str.contains("LANDED")]
import pandas as pd
pieces = [ gatwdata[gatwdata['status'].str.contains("LANDED")] , gatwdata[gatwdata['status'].str.contains("DEPARTED")] ]
gatwdata = pd.concat(pieces)

pieces = [ dubdata[dubdata['status'].str.contains("Departed")] , dubdata[dubdata['status'].str.contains("Arrived")] ]
dubdata = pd.concat(pieces)


dubdata['datescheduled'] = dubdata['scheduled']
dubdata.datescheduled = dubdata.datescheduled.map(lambda x: x.date() )

gatwdata['datescheduled'] = gatwdata['scheduled']
gatwdata.datescheduled = gatwdata.datescheduled.map(lambda x: x.date() )

#timekeepingres = pd.merge(gatwdata, dubdata, on=['datescheduled', 'flightno'], how='inner')
pieces = [pd.merge(gatwdata, dubdata, on=['datescheduled', 'flightno'], how='inner'), pd.merge(dubdata, gatwdata, on=['datescheduled', 'flightno'], how='inner')]
timekeepingres = pd.concat(pieces)


timekeepingres['flightduration'] = timekeepingres.datetimestatus_x - timekeepingres.datetimestatus_y
timekeepingres['flightduration'] = timekeepingres['flightduration'].map(lambda x: abs(x/np.timedelta64(1, 'm')))

timekeepingres['flightduration'].hist(bins=18)

timekeepingres.to_csv("/Users/aidanoboyle/Documents/timekeeping.transformed.csv")