def uncompress(file_path): """ Uncompress in place file. If the file does not exist we return False If the file does not have a bz2 extension we don't uncompress it. returns a tuple of success and the path string describing the uncompressed file. Parameters ---------- file_path a valid path to a file to be uncompressed. """ if Path(file_path).exists(): if has_bz2_extension(file_path): uncompressed_path = file_path[:-4] uncompressor = BZ2Decompressor() with open(file_path, 'rb') as compressed: with open(uncompressed_path, 'wb') as uncompressed: for bytes in iter(compressed.readline, b''): uncompressed.write(uncompressor.decompress(bytes)) return (True, uncompressed_path) else: return (True, file_path) else: return (False, "File does not exist: " + file_path)
def compress(file_path): """ Compress in place file. If the file does not exist we return False If the file already has a bz2 extension we don't compress it. returns a tuple of success and the path string describing the compressed file. Parameters ---------- file_path a valid path to a file to be compressed. """ if Path(file_path).exists(): if not has_bz2_extension(file_path): compressed_path = file_path + BZ2_FILE_EXTENSION compressor = BZ2Compressor() with open(file_path, 'rb') as uncompressed: with open(compressed_path, 'wb') as compressed: for bytes in iter(uncompressed.readline, b''): compressed.write(compressor.compress(bytes)) compressed.write(compressor.flush()) return (True, compressed_path) else: return (True, file_path) else: return (False, "File does not exist: " + file_path)
def extract_next_day_items(filename, ids_df, date_fields=[]): """ Read filename into a pandas Dataframe and extract items in ids_df. It reads items (flights, positions or events) from filename into a pandas Dataframe and merges the items with ids_df on FLIGHT_ID as a UUID. It writes a copy of the Dataframe items WITHOUT mathing ids into a file called 'new_' + filename Returns a pandas DataFrame containing items with matching ids in the FLIGHT_ID column, with the flight ids from the NEW_FLIGHT_ID. """ # An empty data frame to return new_items_df = pd.DataFrame() next_df = pd.DataFrame() try: if date_fields: next_df = pd.read_csv(filename, parse_dates=date_fields, converters={'FLIGHT_ID': lambda x: UUID(x)}, memory_map=True) else: next_df = pd.read_csv(filename, converters={'FLIGHT_ID': lambda x: UUID(x)}, memory_map=True) log.info('%s read ok', filename) except EnvironmentError: log.error('could not read file: %s', filename) return new_items_df # return empty DataFrame # Create a new dataframe WITHOUT any items that are in ids_df new_next_df = next_df[(~next_df['FLIGHT_ID'].isin(ids_df.index))] # Output the new next items new_next_filename = 'new_' + filename try: is_bz2 = has_bz2_extension(filename) if is_bz2: new_next_filename = new_next_filename[:-BZ2_LENGTH] new_next_df.to_csv(new_next_filename, index=False, date_format=ISO8601_DATETIME_FORMAT) log.info('written file: %s', new_next_filename) except EnvironmentError: log.error('could not write file: %s', new_next_filename) return new_items_df # return empty DataFrame # get the new items from the next DataFrame new_items_df = pd.merge(ids_df, next_df, left_index=True, right_on='FLIGHT_ID') replace_old_flight_ids(new_items_df) return new_items_df # return new items
def merge_next_day_items(prev_filename, next_filename, ids_df, log): """ Gets the next days items (positions or events) that are the continuation of the previous days items them with the previous days items. It writes the new next days and previous days items to files prepended with new. it returns True if successful, False otherwise. """ new_items_df = get_next_day_items(next_filename, ids_df, log, date_fields=['TIME']) # free memory used by get_next_day_items gc.collect() prev_df = pd.DataFrame() try: prev_df = pd.read_csv(prev_filename, parse_dates=['TIME'], converters={'FLIGHT_ID': lambda x: UUID(x)}, memory_map=True) log.info('%s read ok', prev_filename) except EnvironmentError: log.error('could not read file: %s', prev_filename) return False # merge the new items into the previous DataFrame new_prev_df = pd.concat([prev_df, new_items_df], ignore_index=True) new_prev_df.sort_values(by=['FLIGHT_ID', 'TIME'], inplace=True) # Output the new previous items new_prev_filename = 'new_' + prev_filename try: is_bz2 = has_bz2_extension(prev_filename) if is_bz2: new_prev_filename = new_prev_filename[:-BZ2_LENGTH] new_prev_df.to_csv(new_prev_filename, index=False, date_format=ISO8601_DATETIME_FORMAT) log.info('written file: %s', new_prev_filename) except EnvironmentError: log.error('could not write file: %s', new_prev_filename) return False return True
def merge_flights(prev_flights_filename, next_flights_filename, ids_df, log): """ Gets the next days flights that are the continuation of the previous days flights and merges them with the previous days flights. It writes the new next days and previous days flights to files prepended with new. it returns True if successful, False otherwise. """ new_items_df = get_next_day_items(next_flights_filename, ids_df, log) # free memory used by get_next_day_items gc.collect() prev_flights_df = pd.DataFrame() try: prev_flights_df = pd.read_csv( prev_flights_filename, index_col='FLIGHT_ID', converters={'FLIGHT_ID': lambda x: UUID(x)}, memory_map=True) log.info('%s read ok', prev_flights_filename) except EnvironmentError: log.error('could not read file: %s', prev_flights_filename) return False # merge next days flight data with the previous days flight data update_flight_data(prev_flights_df, new_items_df) # Output the new previous flights new_prev_flights_filename = 'new_' + prev_flights_filename try: is_bz2 = has_bz2_extension(prev_flights_filename) if is_bz2: new_prev_flights_filename = new_prev_flights_filename[:-BZ2_LENGTH] prev_flights_df.to_csv(new_prev_flights_filename, index=True, date_format=ISO8601_DATETIME_FORMAT) log.info('written file: %s', new_prev_flights_filename) except EnvironmentError: log.error('could not write file: %s', new_prev_flights_filename) return False return True
def generate_positions(filename): """ Generate trajectory positions from a csv file. A python generator function to read a csv file containing positions. It read positions one flight at a time to minimise memory use. """ is_bz2 = has_bz2_extension(filename) with bz2.open(filename, 'rt', newline="") if (is_bz2) else \ open(filename, 'r') as file: try: # Skip the header row line = next(file) # Get the first position line line = next(file) fields = line.split(',') flight_id = fields[0] line_buffer = [line] line = next(file) while line: # Determine whether the flight_id has changed fields = line.split(',') if flight_id != fields[0]: flight_id = fields[0] yield line_buffer line_buffer = [line] else: # just add the line to the end of the buffer line_buffer.append(line) line = next(file) except StopIteration: # return the positions of the last flight yield line_buffer
def find_sector_intersections(filename, logging_msg_count=DEFAULT_LOGGING_COUNT): """ Find intersections between trajectories and airspace sectors. Parameters ---------- filename: a string The name of a trajectories file. logging_msg_count: int The number of trajectories between logging count messages. default DEFAULT_LOGGING_COUNT. Returns ------- An errno error_code if an error occured, zero otherwise. """ log.info(f'trajectories file: {filename}') trajectories_filename = os.path.basename(filename) is_bz2 = has_bz2_extension(filename) if is_bz2: # remove the .bz2 from the end of the filename trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION )] # Write the sector intersections into a csv file with output_filename output_filename = trajectories_filename.replace(TRAJECTORIES, SECTOR_INTERSECTIONS) output_filename = output_filename.replace(JSON_FILE_EXTENSION, CSV_FILE_EXTENSION) try: with open(output_filename, 'w') as file: file.write(AIRSPACE_INTERSECTION_FIELDS) flights_count = 0 zeros_count = 0 smoothed_trajectories = generate_SmoothedTrajectories(filename) for smooth_traj in smoothed_trajectories: try: flight_id = smooth_traj.flight_id sect_ints = find_trajectory_sector_intersections( smooth_traj) if not sect_ints.empty: sect_ints.to_csv( file, index=False, header=False, mode='a', date_format=ISO8601_DATETIME_US_FORMAT) else: zeros_count += 1 # log.warn(f'no intersections found with flight: {flight_id}') flights_count += 1 if not (flights_count % logging_msg_count): log.info(f'{flights_count} trajectories processed') except ValueError: log.exception( f'find_trajectory_sector_intersections id: {flight_id}' ) except StopIteration: pass log.info( f'find_sector_intersections finished for {flights_count} trajectories' ) log.info(f'{zeros_count} trajectories had no intersections') except EnvironmentError: log.error(f'could not write file: {output_filename}') return errno.EACCES return 0
def interpolate_trajectories(filename, straight_interval=DEFAULT_STRAIGHT_INTERVAL, turn_interval=DEFAULT_TURN_INTERVAL, logging_msg_count=DEFAULT_LOGGING_COUNT): """ Interpoates trajectory postions in a trajectories file. Outputs a postions file (in CSV format) with "trajectories" replaced by "ref_positions" in the filename. Parameters ---------- filename: a string The name of the trajectories JSON file. straight_interval: float The time between positions along a straight leg [Seconds], default DEFAULT_STRAIGHT_INTERVAL. turn_interval: float The time between positions while turning [Seconds], default DEFAULT_TURN_INTERVAL. logging_msg_count: int The number of trajectories between logging count messages. default DEFAULT_LOGGING_COUNT. """ trajectories_filename = os.path.basename(filename) is_bz2 = has_bz2_extension(filename) if is_bz2: # remove the .bz2 from the end of the filename trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION)] if trajectories_filename[-len(JSON_FILE_EXTENSION):] != JSON_FILE_EXTENSION: log.error(f'Invalid file type: {trajectories_filename}, must be a JSON file.') return errno.EINVAL log.info(f'trajectories file: {filename}') log.info(f'Straight interval: {straight_interval} seconds') log.info(f'Turn interval: {turn_interval} seconds') output_filename = trajectories_filename.replace(TRAJECTORIES, SYNTH_POSITIONS) output_filename = output_filename.replace(JSON_FILE_EXTENSION, CSV_FILE_EXTENSION) try: with open(output_filename, 'w') as file: file.write(POSITION_FIELDS) # Interpolate the smoothed_trajectories into reference_positions flights_count = 0 smoothed_trajectories = generate_SmoothedTrajectories(filename) for smooth_traj in smoothed_trajectories: try: flight_id = smooth_traj.flight_id ref_traj = interpolate_trajectory_positions(smooth_traj, straight_interval, turn_interval) ref_traj.to_csv(file, index=False, header=False, mode='a', date_format=ISO8601_DATETIME_US_FORMAT) flights_count += 1 if not (flights_count % logging_msg_count): log.info(f'{flights_count} trajectories interpolated') except ValueError: log.exception(f'interpolate_trajectory flight id: {flight_id}') except StopIteration: pass log.info(f'Finished interpolating {flights_count} trajectories.') except EnvironmentError: log.error(f'could not write file: {output_filename}') return errno.EACCES return 0
def clean_position_data(filename, max_speed=DEFAULT_MAX_SPEED, distance_accuracy=DEFAULT_DISTANCE_ACCURACY): """ Identify and remove invalid postions in a positions file. Outputs a positions file with "raw_" stripped from the start of the filename and an error metrics file. Parameters ---------- filename: a string The name of the raw positions file. max_speed: float The maximum speed betwen valid positions [Knots]. Default: DEFAULT_MAX_SPEED. distance_accuracy: float The accuracy of the positions [Nautical Miles]. Returns ------- An errno error_code if an error occured, zero otherwise. """ positions_filename = os.path.basename(filename) is_bz2 = has_bz2_extension(positions_filename) if is_bz2: # remove the .bz2 from the end of the filename positions_filename = positions_filename[:-len(BZ2_FILE_EXTENSION)] if positions_filename[-len(CSV_FILE_EXTENSION):] != CSV_FILE_EXTENSION: log.error(f'Invalid file type: {positions_filename}, must be a CSV file.') return errno.EINVAL log.info(f'positions file: {positions_filename}') log.info(f'Max speed: {max_speed} Knots') log.info(f'Distance accuracy: {distance_accuracy} NM') ########################################################################## # Create output filenames # strip raw_ from the start of the positions_filename output_filename = os.path.basename(positions_filename)[len(RAW) + 1:] error_metrics_filename = output_filename.replace(POSITIONS, ERROR_METRICS) ########################################################################## # Process the positions flights_count = 0 with open(output_filename, 'w') as output_file, \ open(error_metrics_filename, 'w') as error_file: output_file.write(POSITION_FIELDS) error_file.write(POSITION_ERROR_FIELDS) error_writer = csv.writer(error_file, lineterminator='\n') try: flight_positions = generate_positions(positions_filename) for position_lines in flight_positions: fields = position_lines[0].split(',') flight_id = fields[0] try: position_string = ''.join(position_lines) positions = pd.read_csv(StringIO(position_string), header=None, names=POSITION_FIELD_NAMES, parse_dates=['TIME']) invalid_positions, error_metrics = \ find_invalid_positions(positions, max_speed=max_speed, distance_accuracy=distance_accuracy) valid_positions = positions[~invalid_positions] valid_positions.to_csv(output_file, index=False, header=False, mode='a', date_format=ISO8601_DATETIME_FORMAT) error_metrics.insert(0, flight_id) error_writer.writerow(error_metrics) flights_count += 1 except (ValueError, TypeError): log.exception(f'find_invalid_positions flight id: {flight_id}') except StopIteration: pass log.info(f'written file: {output_filename}') log.info(f'written file: {error_metrics_filename}') except EnvironmentError: log.error(f'could not read file: {positions_filename}') return errno.ENOENT log.info(f'positions cleaned for {flights_count} flights') return 0
def analyse_position_data( filename, across_track_tolerance=DEFAULT_ACROSS_TRACK_TOLERANCE, time_method=MOVING_AVERAGE_SPEED, N=DEFAULT_MOVING_MEDIAN_SAMPLES, M=DEFAULT_MOVING_AVERAGE_SAMPLES, max_duration=DEFAULT_SPEED_MAX_DURATION, logging_msg_count=DEFAULT_LOGGING_COUNT): """ Analyse trajectory postions in a positions file. Outputs a trajectory file (in JSON format) with "positions" replaced by "trajectories" and with the time_method and across_track_tolerance at the start of the filename. It also outputs a trajectories metrics file in csv format. Parameters ---------- filename: a string The name of the positions file. across_track_tolerance: float The maximum across track distance [Nautical Miles], default DEFAULT_ACROSS_TRACK_TOLERANCE. method: string The smoothing method to use: 'mas', 'lm', 'trf' 'dogbox', default MOVING_AVERAGE_SPEED. N : integer The number of samples to consider for the speed moving median filter, default DEFAULT_MOVING_MEDIAN_SAMPLES. M : integer The number of samples to consider for the speed moving average filter, default DEFAULT_MOVING_AVERAGE_SAMPLES. max_duration: float The maximum time between points to smooth when calculating speed [Seconds], default DEFAULT_SPEED_MAX_DURATION. logging_msg_count: int The number of trajectories between logging count messages. default DEFAULT_LOGGING_COUNT. Returns ------- An errno error_code if an error occured, zero otherwise. """ positions_filename = os.path.basename(filename) is_bz2 = has_bz2_extension(positions_filename) if is_bz2: # remove the .bz2 from the end of the filename positions_filename = positions_filename[:-len(BZ2_FILE_EXTENSION)] if positions_filename[-len(CSV_FILE_EXTENSION):] != CSV_FILE_EXTENSION: log.error( f'Invalid file type: {positions_filename}, must be a CSV file.') return errno.EINVAL log.info(f'positions file: {filename}') log.info(f'across track tolerance: {across_track_tolerance} NM') log.info(f'time analysis method: {time_method}') if time_method == MOVING_AVERAGE_SPEED: log.info(f'moving median samples: {N}') log.info(f'moving average samples: {M}') log.info(f'speed filter maximum duration: {max_duration}') ########################################################################## # Create output filenames # add the time_method and tolerance to the front of the filename tolerance_string = str(across_track_tolerance).replace('.', '') positions_filename = '_'.join( [time_method, tolerance_string, positions_filename]) trajectory_filename = positions_filename.replace(POSITIONS, TRAJECTORIES) trajectory_filename = trajectory_filename.replace(CSV_FILE_EXTENSION, JSON_FILE_EXTENSION) traj_metrics_filename = positions_filename.replace(POSITIONS, TRAJ_METRICS) ########################################################################## # Process the positions flights_count = 0 with open(trajectory_filename, 'w') as output_file, \ open(traj_metrics_filename, 'w') as metrics_file: output_file.write( write_SmoothedTrajectories_json_header(time_method, across_track_tolerance, N, M, max_duration)) metrics_file.write(POSITION_METRICS_FIELDS) metrics_writer = csv.writer(metrics_file, lineterminator='\n') try: flight_positions = generate_positions(filename) for position_lines in flight_positions: # Ignore single point trajectories if len(position_lines) < 2: continue fields = position_lines[0].split(',') flight_id = fields[0] try: position_string = ''.join(position_lines) positions = pd.read_csv(StringIO(position_string), header=None, names=POSITION_FIELD_NAMES, parse_dates=['TIME']) smoothed_traj, quality_metrics = \ analyse_trajectory(flight_id, positions, across_track_tolerance, time_method, N, M, max_duration) string_list = smoothed_traj.dumps() if flights_count: # delimit with a comma between flights string_list.insert(0, ', ') output_file.write(''.join(string_list)) metrics_writer.writerow(quality_metrics) flights_count += 1 if not (flights_count % logging_msg_count): log.info(f'{flights_count} flights analysed') except (ValueError, IndexError, TypeError): log.exception(f'analyse_trajectory flight id: {flight_id}') except StopIteration: pass output_file.write(SMOOTHED_TRAJECTORY_JSON_FOOTER) log.info(f'written file: {trajectory_filename}') log.info(f'written file: {traj_metrics_filename}') except EnvironmentError: log.error(f'could not read file: {filename}') return errno.ENOENT log.info(f'analyse_trajectory finished for {flights_count} flights') return 0
def find_airport_intersections( flights_filename, trajectories_filename, radius=DEFAULT_RADIUS, airports_filename=DEFAULT_MOVEMENTS_AIRPORTS_FILENAME, distance_tolerance=DEFAULT_DISTANCE_TOLERANCE): """ Find intersections between trajectories and airport cylinders. Parameters ---------- flights_filename: a string The name of a flights file. trajectories_filename: a string The name of a trajectories file. radius: float The radius of the cylinder aroud each airport [Nautical Miles], default DEFAULT_RADIUS. airports_filename: a string The name of the airports file, default DEFAULT_MOVEMENTS_AIRPORTS_FILENAME. distance_tolerance: float The tolerance for path and cylinder distances, default DEFAULT_DISTANCE_TOLERANCE. Returns ------- An errno error_code if an error occured, zero otherwise. """ # Extract the date string from the filename and validate it flights_date = read_iso8601_date_string(flights_filename) if is_valid_iso8601_date(flights_date): log.info(f'flights file: {flights_filename}') else: log.error( f'flights file: {flights_filename}, invalid date: {flights_date}') return errno.EINVAL trajectories_date = read_iso8601_date_string(trajectories_filename, is_json=True) if is_valid_iso8601_date(trajectories_date): log.info(f'trajectories file: {trajectories_filename}') else: log.error(f'trajectories file, invalid date: {trajectories_date}') return errno.EINVAL if flights_date != trajectories_date: log.error( f'Files are not for the same date! Flights date: {flights_date}' f', trajectories date: {trajectories_date}') return errno.EINVAL log.info(f'flights file: {flights_filename}') log.info(f'trajectories file: {trajectories_filename}') log.info(f'radius: {radius} NM') log.info(f'distance_tolerance: {distance_tolerance} NM') airports_df = pd.DataFrame() try: airports_df = pd.read_csv(airports_filename, index_col='AIRPORT', memory_map=True) log.info(f'{airports_filename} read ok') except EnvironmentError: log.error(f'could not read file: {airports_filename}') return errno.ENOENT flights_df = pd.DataFrame() try: flights_df = pd.read_csv(flights_filename, usecols=['FLIGHT_ID', 'ADEP', 'ADES'], index_col='FLIGHT_ID', memory_map=True) log.info(f'{flights_filename} read ok') except EnvironmentError: log.error(f'could not read file: {flights_filename}') return errno.ENOENT # Determine the departure and arrival flights departures_df = pd.merge(flights_df, airports_df, left_on='ADEP', right_index=True) destinations_df = pd.merge(flights_df, airports_df, left_on='ADES', right_index=True) trajectories_filename = os.path.basename(trajectories_filename) is_bz2 = has_bz2_extension(trajectories_filename) if is_bz2: # remove the .bz2 from the end of the filename trajectories_filename = trajectories_filename[:-len(BZ2_FILE_EXTENSION )] # Write the airport_intersections into a csv file with output_filename output_filename = trajectories_filename.replace(TRAJECTORIES, AIRPORT_INTERSECTIONS) output_filename = output_filename.replace(JSON_FILE_EXTENSION, CSV_FILE_EXTENSION) try: with open(output_filename, 'w') as file: file.write(AIRPORT_INTERSECTION_FIELDS) flights_count = 0 smoothed_trajectories = generate_SmoothedTrajectories( trajectories_filename) for smooth_traj in smoothed_trajectories: try: flight_id = smooth_traj.flight_id is_departure = flight_id in departures_df.index is_arrival = flight_id in destinations_df.index if is_departure or is_arrival: traj_path = smooth_traj.path.ecef_path() if is_departure: dep_row = departures_df.loc[flight_id] departure = dep_row['ADEP'] if len(departure) == AIRPORT_NAME_LENGTH: latitude = dep_row['LATITUDE'] longitude = dep_row['LONGITUDE'] ref_point = global_Point3d(latitude, longitude) dep_intersection = find_airport_intersection( smooth_traj, traj_path, departure, ref_point, radius, False, distance_tolerance) if not dep_intersection.empty: dep_intersection.to_csv( file, index=False, header=False, mode='a', date_format=ISO8601_DATETIME_US_FORMAT) if is_arrival: dest_row = destinations_df.loc[flight_id] destination = dest_row['ADES'] if len(destination) == AIRPORT_NAME_LENGTH: latitude = dest_row['LATITUDE'] longitude = dest_row['LONGITUDE'] ref_point = global_Point3d(latitude, longitude) dest_intersection = find_airport_intersection( smooth_traj, traj_path, destination, ref_point, radius, True, distance_tolerance) if not dest_intersection.empty: dest_intersection.to_csv( file, index=False, header=False, mode='a', date_format=ISO8601_DATETIME_US_FORMAT) flights_count += 1 except ValueError: log.exception( f'find_airport_intersections id: {flight_id}') except StopIteration: pass log.info( f'find_airport_intersections finished for {flights_count} trajectories.' ) except EnvironmentError: log.error(f'could not write file: {output_filename}') return errno.EACCES return 0
def convert_fr24_data(filenames): flights_filename = filenames[0] points_filename = filenames[1] if flights_filename == points_filename: log.error( 'Files are the same! Flights filename: %s, points filename: %s', flights_filename, points_filename) return errno.EINVAL # Extract the date string from the filename and validate it flights_date = read_iso8601_date_string(flights_filename) if is_valid_iso8601_date(flights_date): log.info('fr24 flights file: %s', flights_filename) else: log.error('fr24 flights file: %s, invalid date: %s', flights_filename, flights_date) return errno.EINVAL # Extract the date string from the filename and validate it points_date = read_iso8601_date_string(points_filename) if is_valid_iso8601_date(points_date): log.info('fr24 points file: %s', points_filename) else: log.error('fr24 points file: %s, invalid date: %s', points_filename, points_date) return errno.EINVAL if flights_date != points_date: log.error( 'Files are not for the same date! Flights date: %s, points date: %s', flights_date, points_date) return errno.EINVAL # A dict to hold the ADS-B flights flights = {} # Read the ADS-B flights file into flights try: is_bz2 = has_bz2_extension(flights_filename) with bz2.open(flights_filename, 'rt', newline="") if (is_bz2) else \ open(flights_filename, 'r') as file: reader = csv.reader(file, delimiter=',') next(reader, None) # skip the headers for row in reader: flights.setdefault(row[AdsbFlightField.FLIGHT_ID], AdsbFlight(row)) except EnvironmentError: log.error('could not read file: %s', flights_filename) return errno.ENOENT log.info('fr24 flights read ok') # Read the ADS-B points file into flights try: is_bz2 = has_bz2_extension(points_filename) with bz2.open(points_filename, 'rt', newline="") if (is_bz2) else \ open(points_filename, 'r') as file: reader = csv.reader(file, delimiter=',') next(reader, None) # skip the headers for row in reader: if row[AdsbPointField.FLIGHT_ID] in flights: flights[row[AdsbPointField.FLIGHT_ID]].append(row) except EnvironmentError: log.error('could not read file: %s', points_filename) return errno.ENOENT log.info('fr24 points read ok') # sort positions in date time (of position) order for key, values in flights.items(): values.sort() log.info('fr24 points sorted') valid_flights = 0 # Output the ADS-B flight data for all flights output_files = create_convert_fr24_filenames(flights_date) flight_file = output_files[0] try: with open(flight_file, 'w') as file: file.write(FLIGHT_FIELDS) for key, values in sorted(flights.items()): if values.is_valid: print(values, file=file) valid_flights += 1 log.info('written file: %s', flight_file) except EnvironmentError: log.error('could not write file: %s', flight_file) # Output the ADS-B position data for all flights positions_file = output_files[1] try: with open(positions_file, 'w') as file: file.write(POSITION_FIELDS) for key, values in sorted(flights.items()): if values.is_valid: for pos in values.positions: print(pos, file=file) log.info('written file: %s', positions_file) except EnvironmentError: log.error('could not write file: %s', positions_file) return errno.EACCES log.info('fr24 conversion complete for %s flights on %s', valid_flights, points_date) return 0
def convert_apds_data(filename, stands_filename): # Extract the start and finish date strings from the filename start_date, finish_date = split_dual_date(os.path.basename(filename)) if not is_valid_iso8601_date(start_date): log.error('apds data file: %s, invalid start date: %s', filename, start_date) return errno.EINVAL # validate the finish date string from the filename if not is_valid_iso8601_date(finish_date): log.error('apds data file: %s, invalid finish date: %s', filename, finish_date) return errno.EINVAL log.info('apds data file: %s', filename) airport_stands_df = pd.DataFrame() if stands_filename: try: airport_stands_df = pd.read_csv(stands_filename, index_col=['ICAO_ID', 'STAND_ID'], memory_map=True) airport_stands_df.sort_index() except EnvironmentError: log.error('could not read file: %s', stands_filename) return errno.ENOENT log.info('airport stands file: %s', stands_filename) else: log.info('airport stands not provided') # A dict to hold the APDS flights flights = {} # Read the APDS flights file into flights try: is_bz2 = has_bz2_extension(filename) with bz2.open(filename, 'rt', newline="") if (is_bz2) else \ open(filename, 'r') as file: reader = csv.reader(file, delimiter=',') next(reader, None) # skip the headers for row in reader: flights.setdefault(row[ApdsField.APDS_ID], ApdsFlight(row, airport_stands_df)) except EnvironmentError: log.error('could not read file: %s', filename) return errno.ENOENT log.info('apds flights read ok') valid_flights = 0 # Output the APDS flight data # finish_date output_files = create_convert_apds_filenames(start_date, finish_date) flight_file = output_files[0] try: with open(flight_file, 'w') as file: file.write(FLIGHT_FIELDS) for key, value in sorted(flights.items()): print(value, file=file) valid_flights += 1 log.info('written file: %s', flight_file) except EnvironmentError: log.error('could not write file: %s', flight_file) # if airport stand data was provided if len(airport_stands_df): # Output the APDS position data positions_file = output_files[1] try: with open(positions_file, 'w') as file: file.write(POSITION_FIELDS) for key, value in sorted(flights.items()): for event in sorted(value.positions): print(event, file=file) log.info('written file: %s', positions_file) except EnvironmentError: log.error('could not write file: %s', positions_file) # Output the APDS event data event_file = output_files[2] try: with open(event_file, 'w') as file: file.write(FLIGHT_EVENT_FIELDS) for key, value in sorted(flights.items()): for event in sorted(value.events): print(event, file=file) log.info('written file: %s', event_file) except EnvironmentError: log.error('could not write file: %s', event_file) return errno.EACCES log.info('apds conversion complete for %s flights on %s', valid_flights, start_date) return 0