def split_file_based_on_times_filter_on_ids_streaming(self): input_dir = os.tempnam() os.mkdir(input_dir) input_file = os.path.join(input_dir, "raw.csv") data = [(str(x), str(y), z) for x, y, z in zip( range(200010), itertools.cycle(range(5)), itertools.cycle(["Andrew", "Ben", "Chris", "David", "Anthony"]))] f = open(input_file, "w") w = csv.writer(f) w.writerow(["id1", "id2", "name"]) w.writerows(data) f.close() ct1 = datetime(2012, 10, 25, 15, 00, tzinfo=tzutc()) ct2 = datetime(2012, 10, 26, 15, 00, tzinfo=tzutc()) cutoff_times = [ct1, ct2] ids_dict = { x: y for x, y in zip(cutoff_times, [set(["1", "3", "200005"]), set(["2", "100001", "200006"])]) } output_path = os.tempnam() os.mkdir(output_path) ids_back = utilities.split_file_based_on_times_filter_on_ids_streaming( input_file, output_path, "MyTestFolder", "test_output.csv", "id1", ids_dict, ids_to_track_column_name="id2", start_hours_offset=-9) self.assertEqual(set(["1", "3", "0"]), ids_back[ct1]) self.assertEqual(set(["2", "1", "1"]), ids_back[ct2]) f1path = os.path.join(output_path, "2012_10_25", "MyTestFolder", "test_output.csv") f2path = os.path.join(output_path, "2012_10_26", "MyTestFolder", "test_output.csv") f1 = open(f1path) f2 = open(f2path) f1data = [x for x in csv.reader(f1)] f2data = [x for x in csv.reader(f2)] self.assertEqual([["id1", "id2", "name"], ["1", "1", "Ben"], ["3", "3", "David"], ["200005", "0", "Andrew"]], f1data) self.assertEqual([["id1", "id2", "name"], ["2", "2", "Chris"], ["100001", "1", "Ben"], ["200006", "1", "Ben"]], f2data) # Clean up f1.close() f2.close() os.remove(f1path) os.remove(f2path) os.remove(input_file) for x in [ os.path.join(output_path, "2012_10_25", "MyTestFolder"), os.path.join(output_path, "2012_10_26", "MyTestFolder"), os.path.join(output_path, "2012_10_25"), os.path.join(output_path, "2012_10_26"), output_path, input_dir ]: os.rmdir(x)
def raw_data_to_training_days(raw_data_path, training_days_path, cutoff_times): #df_flight_history = flighthistory.get_df_flight_history_from_raw_format(os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv")) print(os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv")) days_flight_ids = flighthistory.process_flight_history_to_train_day_files( input_path = os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv"), output_path = training_days_path, output_folder_name = "FlightHistory", output_file_name = "flighthistory.csv", cutoff_times = cutoff_times, start_hours_offset = -9) print("Flight History Events") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "FlightHistory", "flighthistoryevents.csv"), training_days_path, "FlightHistory", "flighthistoryevents.csv", "flight_history_id", days_flight_ids) print("ASDI Flight Plan") days_flight_plan_ids = utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdiflightplan.csv"), training_days_path, "ASDI", "asdiflightplan.csv", "flighthistoryid", days_flight_ids, ids_to_track_column_name="asdiflightplanid") print("ASDI Position") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdiposition.csv"), training_days_path, "ASDI", "asdiposition.csv", "flighthistoryid", days_flight_ids) print("ASDI Airway") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdiairway.csv"), training_days_path, "ASDI", "asdiairway.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPFix") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpfix.csv"), training_days_path, "ASDI", "asdifpfix.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPCenter") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpcenter.csv"), training_days_path, "ASDI", "asdifpcenter.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPSector") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpsector.csv"), training_days_path, "ASDI", "asdifpsector.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPWaypoint") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpwaypoint.csv"), training_days_path, "ASDI", "asdifpwaypoint.csv", "asdiflightplanid", days_flight_plan_ids)
def split_file_based_on_times_filter_on_ids_streaming(self): input_dir = os.tempnam() os.mkdir(input_dir) input_file = os.path.join(input_dir, "raw.csv") data = [(str(x), str(y), z) for x,y,z in zip(range(200010), itertools.cycle(range(5)), itertools.cycle(["Andrew", "Ben", "Chris", "David", "Anthony"]))] f = open(input_file, "w") w = csv.writer(f) w.writerow(["id1", "id2", "name"]) w.writerows(data) f.close() ct1 = datetime(2012, 10, 25, 15, 00, tzinfo = tzutc()) ct2 = datetime(2012, 10, 26, 15, 00, tzinfo = tzutc()) cutoff_times = [ct1, ct2] ids_dict = {x:y for x,y in zip(cutoff_times, [set(["1", "3", "200005"]), set(["2", "100001", "200006"])])} output_path = os.tempnam() os.mkdir(output_path) ids_back = utilities.split_file_based_on_times_filter_on_ids_streaming( input_file, output_path, "MyTestFolder", "test_output.csv", "id1", ids_dict, ids_to_track_column_name = "id2", start_hours_offset = -9) self.assertEqual(set(["1", "3", "0"]), ids_back[ct1]) self.assertEqual(set(["2", "1", "1"]), ids_back[ct2]) f1path = os.path.join(output_path, "2012_10_25", "MyTestFolder", "test_output.csv") f2path = os.path.join(output_path, "2012_10_26", "MyTestFolder", "test_output.csv") f1 = open(f1path) f2 = open(f2path) f1data = [x for x in csv.reader(f1)] f2data = [x for x in csv.reader(f2)] self.assertEqual([["id1", "id2", "name"], ["1", "1", "Ben"], ["3", "3", "David"], ["200005", "0", "Andrew"]], f1data) self.assertEqual([["id1", "id2", "name"], ["2", "2", "Chris"], ["100001", "1", "Ben"], ["200006", "1", "Ben"]], f2data) # Clean up f1.close() f2.close() os.remove(f1path) os.remove(f2path) os.remove(input_file) for x in [os.path.join(output_path, "2012_10_25", "MyTestFolder"), os.path.join(output_path, "2012_10_26", "MyTestFolder"), os.path.join(output_path, "2012_10_25"), os.path.join(output_path, "2012_10_26"), output_path, input_dir]: os.rmdir(x)
def raw_data_to_training_days(raw_data_path, training_days_path, cutoff_times): # df_flight_history = flighthistory.get_df_flight_history_from_raw_format(os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv")) print (os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv")) days_flight_ids = flighthistory.process_flight_history_to_train_day_files( input_path=os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv"), output_path=training_days_path, output_folder_name="FlightHistory", output_file_name="flighthistory.csv", cutoff_times=cutoff_times, start_hours_offset=-9, ) print ("Flight History Events") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "FlightHistory", "flighthistoryevents.csv"), training_days_path, "FlightHistory", "flighthistoryevents.csv", "flight_history_id", days_flight_ids, ) print ("ASDI Flight Plan") days_flight_plan_ids = utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdiflightplan.csv"), training_days_path, "ASDI", "asdiflightplan.csv", "flighthistoryid", days_flight_ids, ids_to_track_column_name="asdiflightplanid", ) print ("ASDI Position") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdiposition.csv"), training_days_path, "ASDI", "asdiposition.csv", "flighthistoryid", days_flight_ids, ) print ("ASDI Airway") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdiairway.csv"), training_days_path, "ASDI", "asdiairway.csv", "asdiflightplanid", days_flight_plan_ids, ) print ("ASDI FPFix") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdifpfix.csv"), training_days_path, "ASDI", "asdifpfix.csv", "asdiflightplanid", days_flight_plan_ids, ) print ("ASDI FPCenter") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdifpcenter.csv"), training_days_path, "ASDI", "asdifpcenter.csv", "asdiflightplanid", days_flight_plan_ids, ) print ("ASDI FPSector") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdifpsector.csv"), training_days_path, "ASDI", "asdifpsector.csv", "asdiflightplanid", days_flight_plan_ids, ) print ("ASDI FPWaypoint") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "ASDI", "asdifpwaypoint.csv"), training_days_path, "ASDI", "asdifpwaypoint.csv", "asdiflightplanid", days_flight_plan_ids, ) for ct in cutoff_times: print ct day_output_path = os.path.join(training_days_path, utilities.get_day_str(ct, -9)) day_beginning, day_end = utilities.get_day_boundaries(ct, -9) if not os.path.exists(day_output_path): os.makedirs(day_output_path) weather.process_one_day(raw_data_path, day_output_path, day_beginning, day_end, "train")
def raw_data_to_training_days(raw_data_path, training_days_path, cutoff_times): #df_flight_history = flighthistory.get_df_flight_history_from_raw_format(os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv")) print(os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv")) days_flight_ids = flighthistory.process_flight_history_to_train_day_files( input_path = os.path.join(raw_data_path, "FlightHistory", "flighthistory.csv"), output_path = training_days_path, output_folder_name = "FlightHistory", output_file_name = "flighthistory.csv", cutoff_times = cutoff_times, start_hours_offset = -9) print("Flight History Events") utilities.split_file_based_on_times_filter_on_ids_streaming( os.path.join(raw_data_path, "FlightHistory", "flighthistoryevents.csv"), training_days_path, "FlightHistory", "flighthistoryevents.csv", "flight_history_id", days_flight_ids) print("ASDI Flight Plan") days_flight_plan_ids = utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdiflightplan.csv"), training_days_path, "ASDI", "asdiflightplan.csv", "flighthistoryid", days_flight_ids, ids_to_track_column_name="asdiflightplanid") print("ASDI Position") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdiposition.csv"), training_days_path, "ASDI", "asdiposition.csv", "flighthistoryid", days_flight_ids) print("ASDI Airway") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdiairway.csv"), training_days_path, "ASDI", "asdiairway.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPFix") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpfix.csv"), training_days_path, "ASDI", "asdifpfix.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPCenter") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpcenter.csv"), training_days_path, "ASDI", "asdifpcenter.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPSector") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpsector.csv"), training_days_path, "ASDI", "asdifpsector.csv", "asdiflightplanid", days_flight_plan_ids) print("ASDI FPWaypoint") utilities.split_file_based_on_times_filter_on_ids_streaming(os.path.join(raw_data_path, "ASDI", "asdifpwaypoint.csv"), training_days_path, "ASDI", "asdifpwaypoint.csv", "asdiflightplanid", days_flight_plan_ids) for ct in cutoff_times: print ct day_output_path = os.path.join(training_days_path, utilities.get_day_str(ct, -9)) day_beginning, day_end = utilities.get_day_boundaries(ct, -9) if not os.path.exists(day_output_path): os.makedirs(day_output_path) weather.process_one_day(raw_data_path, day_output_path, day_beginning, day_end, "train")