gatwdata = gatwdata.dropna() dubdata = dubdata.dropna() dubdata = dubdata.drop_duplicates() gatwdata = gatwdata.drop_duplicates() # gatwdata = gatwdata[gatwdata['status'].str.contains("LANDED")] import pandas as pd pieces = [ gatwdata[gatwdata['status'].str.contains("LANDED")] , gatwdata[gatwdata['status'].str.contains("DEPARTED")] ] gatwdata = pd.concat(pieces) pieces = [ dubdata[dubdata['status'].str.contains("Departed")] , dubdata[dubdata['status'].str.contains("Arrived")] ] dubdata = pd.concat(pieces) dubdata['datescheduled'] = dubdata['scheduled'] dubdata.datescheduled = dubdata.datescheduled.map(lambda x: x.date() ) gatwdata['datescheduled'] = gatwdata['scheduled'] gatwdata.datescheduled = gatwdata.datescheduled.map(lambda x: x.date() ) #timekeepingres = pd.merge(gatwdata, dubdata, on=['datescheduled', 'flightno'], how='inner') pieces = [pd.merge(gatwdata, dubdata, on=['datescheduled', 'flightno'], how='inner'), pd.merge(dubdata, gatwdata, on=['datescheduled', 'flightno'], how='inner')] timekeepingres = pd.concat(pieces) timekeepingres['flightduration'] = timekeepingres.datetimestatus_x - timekeepingres.datetimestatus_y timekeepingres['flightduration'] = timekeepingres['flightduration'].map(lambda x: abs(x/np.timedelta64(1, 'm'))) timekeepingres['flightduration'].hist(bins=18) timekeepingres.to_csv("/Users/aidanoboyle/Documents/timekeeping.transformed.csv")