def test_write_segment__all_data(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' def test_hdf(dest): with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] self.assertTrue(all(ivv_result == self.ivv_data)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] self.assertTrue(all(wow_result == self.wow_data)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] self.assertTrue(all(dme_result == self.dme_data)) # Test mask is written. dme_mask_result = dme_group['mask'][:] self.assertTrue(all(dme_mask_result == self.dme_mask)) self.assertEqual(hdf_file.attrs['duration'], self.data_secs) segment = slice(None) dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4) self.assertEqual(dest, self.out_path) test_hdf(dest) dest = write_segment(self.hdf_path, segment, self.out_path, boundary=64) self.assertEqual(dest, self.out_path) test_hdf(dest)
def test_write_segment__all_data(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' def test_hdf(dest): with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] self.assertTrue(all(ivv_result == self.ivv_data)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] self.assertTrue(all(wow_result == self.wow_data)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] self.assertTrue(all(dme_result == self.dme_data)) # Test mask is written. dme_mask_result = dme_group['mask'][:] self.assertTrue(all(dme_mask_result == self.dme_mask)) self.assertEqual(hdf_file.attrs['duration'], self.data_secs) segment = slice(None) dest = write_segment(self.hdf_path, segment, self.out_path, supf_boundary=False) self.assertEqual(dest, self.out_path) test_hdf(dest) dest = write_segment(self.hdf_path, segment, self.out_path, supf_boundary=True) self.assertEqual(dest, self.out_path) test_hdf(dest)
def test_write_segment__start_only(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' segment = slice(50, None) frame_start = 48 # 48 is nearest frame boundary rounded down dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4) self.assertEqual(dest, self.out_path) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = np.ma.masked_array(ivv_group['data'][:], mask=ivv_group['mask'][:]) ivv_expected_result = np.arange( frame_start * self.ivv_frequency, self.data_secs * self.ivv_frequency, dtype=np.dtype(np.float)) ivv_expected_result = np.ma.masked_array( ivv_expected_result, mask=[True] * 2 + [False] * 78) print ivv_result print ivv_expected_result self.assertEqual(ivv_result.tolist(), ivv_expected_result.tolist()) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange( frame_start * self.wow_frequency, self.data_secs * self.wow_frequency, dtype=np.dtype(np.float)) self.assertEqual(wow_result.tolist(), wow_expected_result.tolist()) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] dme_expected_result = np.arange(12, 32, dtype=np.dtype(np.float)) self.assertEqual(dme_result.tolist(), dme_expected_result.tolist()) self.assertEqual(hdf_file.attrs['duration'], 80)
def test_write_segment__start_only(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' segment = slice(50, None) frame_start = 48 # 48 is nearest frame boundary rounded down dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4) self.assertEqual(dest, self.out_path) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = np.ma.masked_array(ivv_group['data'][:], mask=ivv_group['mask'][:]) ivv_expected_result = np.arange( frame_start * self.ivv_frequency, self.data_secs * self.ivv_frequency, dtype=np.float) ivv_expected_result = np.ma.masked_array( ivv_expected_result, mask=[True] * 2 + [False] * 78) self.assertEqual(ivv_result.tolist(), ivv_expected_result.tolist()) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange( frame_start * self.wow_frequency, self.data_secs * self.wow_frequency, dtype=np.float) self.assertEqual(wow_result.tolist(), wow_expected_result.tolist()) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] dme_expected_result = np.arange(12, 32, dtype=np.float) self.assertEqual(dme_result.tolist(), dme_expected_result.tolist()) self.assertEqual(hdf_file.attrs['duration'], 80)
def test_write_segment__start_only(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' segment = slice(50, None) dest = write_segment(self.hdf_path, segment, self.out_path, supf_boundary=False) self.assertEqual(dest, self.out_path) frame_boundary_segment = slice(48, None) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] ivv_expected_result = np.arange( frame_boundary_segment.start * self.ivv_frequency, self.data_secs * self.ivv_frequency, dtype=np.dtype(np.float)) self.assertTrue(all(ivv_result == ivv_expected_result)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange( frame_boundary_segment.start * self.wow_frequency, self.data_secs * self.wow_frequency, dtype=np.dtype(np.float)) self.assertTrue(all(wow_result == wow_expected_result)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] dme_expected_result = np.arange(12, 32, dtype=np.dtype(np.float)) self.assertTrue(all(dme_result == dme_expected_result)) self.assertEqual(hdf_file.attrs['duration'], 80)
def test_write_segment__stop_only(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' segment = slice(None, 70) dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4) self.assertEqual(dest, self.out_path) frame_boundary_segment = slice(None, 72) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] ivv_expected_result = np.arange( 0, frame_boundary_segment.stop * self.ivv_frequency, dtype=np.float) self.assertTrue(all(ivv_result == ivv_expected_result)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange( 0, frame_boundary_segment.stop * self.wow_frequency, dtype=np.float) self.assertTrue(list(wow_result), list(wow_expected_result)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] dme_expected_result = np.arange(0, 18, dtype=np.float) self.assertEqual(list(dme_result), list(dme_expected_result)) self.assertEqual(hdf_file.attrs['duration'], 72)
def split_hdf_to_segments(hdf_path, aircraft_info, fallback_dt=None, fallback_relative_to_start=True, draw=False, dest_dir=None): """ Main method - analyses an HDF file for flight segments and splits each flight into a new segment appropriately. :param hdf_path: path to HDF file :type hdf_path: string :param aircraft_info: Information which identify the aircraft, specfically with the keys 'Tail Number', 'MSN'... :type aircraft_info: Dict :param fallback_dt: A datetime which is as close to the end of the data file as possible. Used to replace elements of datetimes which are not available in the hdf file (e.g. YEAR not being recorded) :type fallback_dt: datetime :param draw: Whether to use matplotlib to plot the flight :type draw: Boolean :param dest_dir: Destination directory, if None, the source file directory is used :type dest_dir: str :returns: List of Segments :rtype: List of Segment recordtypes ('slice type part duration path hash') """ logger.info("Processing file: %s", hdf_path) if dest_dir is None: dest_dir = os.path.dirname(hdf_path) if draw: from analysis_engine.plot_flight import plot_essential plot_essential(hdf_path) with hdf_file(hdf_path) as hdf: superframe_present = hdf.superframe_present # Confirm aircraft tail for the entire datafile logger.info("Validating aircraft matches that recorded in data") validate_aircraft(aircraft_info, hdf) # now we know the Aircraft is correct, go and do the PRE FILE ANALYSIS if hooks.PRE_FILE_ANALYSIS: logger.info("Performing PRE_FILE_ANALYSIS analysis: %s", hooks.PRE_FILE_ANALYSIS.func_name) hooks.PRE_FILE_ANALYSIS(hdf, aircraft_info) else: logger.info("No PRE_FILE_ANALYSIS actions to perform") fallback_dt = calculate_fallback_dt(hdf, fallback_dt, fallback_relative_to_start) segment_tuples = split_segments(hdf) # process each segment (into a new file) having closed original hdf_path segments = [] previous_stop_dt = None for part, (segment_type, segment_slice) in enumerate(segment_tuples, start=1): # write segment to new split file (.001) basename = os.path.basename(hdf_path) dest_basename = os.path.splitext(basename)[0] + '.%03d.hdf5' % part dest_path = os.path.join(dest_dir, dest_basename) logger.debug("Writing segment %d: %s", part, dest_path) # ARINC 717 data has frames or superframes. ARINC 767 will be split # on a minimum boundary of 4 seconds for the analyser. boundary = 64 if superframe_present else 4 write_segment(hdf_path, segment_slice, dest_path, boundary=boundary) segment = append_segment_info(dest_path, segment_type, segment_slice, part, fallback_dt=fallback_dt) if previous_stop_dt and segment.start_dt < previous_stop_dt: # In theory, this should not happen - but be warned of superframe # padding? logger.warning( "Segment start_dt '%s' comes before the previous segment " "ended '%s'", segment.start_dt, previous_stop_dt) previous_stop_dt = segment.stop_dt if fallback_dt: # move the fallback_dt on to be relative to start of next segment fallback_dt += segment.stop_dt - segment.start_dt # plus a small gap between flights segments.append(segment) if draw: plot_essential(dest_path) if draw: # show all figures together from matplotlib.pyplot import show show() #close('all') # closes all figures return segments
def split_hdf_to_segments(hdf_path, aircraft_info, fallback_dt=None, validation_dt=None, fallback_relative_to_start=True, draw=False, dest_dir=None, pre_file_kwargs={}): """ Main method - analyses an HDF file for flight segments and splits each flight into a new segment appropriately. :param hdf_path: path to HDF file :type hdf_path: string :param aircraft_info: Information which identify the aircraft, specfically with the keys 'Tail Number', 'MSN'... :type aircraft_info: Dict :param fallback_dt: A datetime which is as close to the end of the data file as possible. Used to replace elements of datetimes which are not available in the hdf file (e.g. YEAR not being recorded) :type fallback_dt: datetime :param draw: Whether to use matplotlib to plot the flight :type draw: Boolean :param dest_dir: Destination directory, if None, the source file directory is used :type dest_dir: str :param pre_file_kwargs: Pre-file analysis keyword arguments. :type pre_file_kwargs: dict :returns: List of Segments :rtype: List of Segment recordtypes ('slice type part duration path hash') """ logger.debug("Processing file: %s", hdf_path) if dest_dir is None: dest_dir = os.path.dirname(hdf_path) if draw: from analysis_engine.plot_flight import plot_essential plot_essential(hdf_path) with hdf_file(hdf_path) as hdf: # Confirm aircraft tail for the entire datafile logger.debug("Validating aircraft matches that recorded in data") validate_aircraft(aircraft_info, hdf) # now we know the Aircraft is correct, go and do the PRE FILE ANALYSIS hook = hooks.PRE_FILE_ANALYSIS if hook: logger.debug( "Performing PRE_FILE_ANALYSIS action '%s' with options: %s", getattr(hook, 'func_name', getattr(hook, '__name__')), pre_file_kwargs) hook(hdf, aircraft_info, **pre_file_kwargs) else: logger.info("No PRE_FILE_ANALYSIS actions to perform") # ARINC 717 data has frames or superframes. ARINC 767 will be split # on a minimum boundary of 4 seconds for the analyser. boundary = 64 if hdf.superframe_present else 4 segment_tuples = split_segments(hdf, aircraft_info) frame_doubled = aircraft_info.get('Frame Doubled', False) fallback_dt = calculate_fallback_dt(hdf, fallback_dt, validation_dt, fallback_relative_to_start, frame_doubled) # process each segment (into a new file) having closed original hdf_path segments = [] previous_stop_dt = None for part, (segment_type, segment_slice, start_padding) in enumerate(segment_tuples, start=1): # write segment to new split file (.001) basename = os.path.basename(hdf_path) dest_basename = os.path.splitext(basename)[0] + '.%03d.hdf5' % part dest_path = os.path.join(dest_dir, dest_basename) logger.debug("Writing segment %d: %s", part, dest_path) write_segment(hdf_path, segment_slice, dest_path, boundary, submasks=('arinc', 'invalid_states', 'padding', 'saturation')) # adjust fallback time to account for any padding added at start of segment segment_start_dt = fallback_dt - timedelta(seconds=start_padding) segment = append_segment_info(dest_path, segment_type, segment_slice, part, fallback_dt=segment_start_dt, validation_dt=validation_dt, aircraft_info=aircraft_info) if previous_stop_dt and segment.start_dt < previous_stop_dt - timedelta( 0, 4): # In theory, this should not happen - but be warned of superframe # padding? logger.warning( "Segment start_dt '%s' comes before the previous segment " "ended '%s'", segment.start_dt, previous_stop_dt) previous_stop_dt = segment.stop_dt if fallback_dt: # move the fallback_dt on to be relative to start of next segment slice fallback_dt += timedelta(seconds=(segment_slice.stop - segment_slice.start)) segments.append(segment) if draw: plot_essential(dest_path) if draw: # show all figures together from matplotlib.pyplot import show show() #close('all') # closes all figures return segments
def test_write_segment__start_and_stop(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' segment = slice(10, 17) dest = write_segment(self.hdf_path, segment, self.out_path, boundary=4) self.assertEqual(dest, self.out_path) frame_boundary_segment = slice(8, 20) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = np.ma.masked_array(ivv_group['data'][:], mask=ivv_group['mask'][:]) ivv_expected_result = np.arange( frame_boundary_segment.start * self.ivv_frequency, frame_boundary_segment.stop * self.ivv_frequency, dtype=np.dtype(np.float)) ivv_expected_result = np.ma.masked_array(ivv_expected_result, mask=[True] * 2 + [False] * 7 + [True] * 3) self.assertEqual(ivv_result.tolist(), ivv_expected_result.tolist()) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange( frame_boundary_segment.start * self.wow_frequency, frame_boundary_segment.stop * self.wow_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(wow_result), list(wow_expected_result)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] # array([ 3., 4.]) dme_expected_result = np.arange(2, 5, dtype=np.dtype(np.float)) self.assertEqual(list(dme_result), list(dme_expected_result)) self.assertEqual( hdf_file.attrs['duration'], frame_boundary_segment.stop - frame_boundary_segment.start) # Write segment on superframe boundary. dest = write_segment(self.hdf_path, segment, self.out_path, boundary=64) self.assertEqual(dest, self.out_path) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] ivv_expected_result = np.arange(64 * self.ivv_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(ivv_result), list(ivv_expected_result)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange(64 * self.wow_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(wow_result), list(wow_expected_result)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] dme_expected_result = np.arange(64 * self.dme_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(dme_result), list(dme_expected_result)) self.assertEqual(hdf_file.attrs['duration'], 64)
def test_write_segment__start_and_stop(self): ''' Tests that the correct segment of the dataset within the path matching 'series/<Param Name>/data' defined by the slice has been written to the destination file while other datasets and attributes are unaffected. Slice has a start and stop. ''' segment = slice(10, 20) dest = write_segment(self.hdf_path, segment, self.out_path, supf_boundary=False) self.assertEqual(dest, self.out_path) frame_boundary_segment = slice(8, 20) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] ivv_expected_result = np.arange( frame_boundary_segment.start * self.ivv_frequency, frame_boundary_segment.stop * self.ivv_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(ivv_result), list(ivv_expected_result)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange( frame_boundary_segment.start * self.wow_frequency, frame_boundary_segment.stop * self.wow_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(wow_result), list(wow_expected_result)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] # array([ 2., 3., 4.]) dme_expected_result = np.arange(2, 5, dtype=np.dtype(np.float)) self.assertEqual(list(dme_result), list(dme_expected_result)) self.assertEqual( hdf_file.attrs['duration'], frame_boundary_segment.stop - frame_boundary_segment.start) # Write segment on superframe boundary. dest = write_segment(self.hdf_path, segment, self.out_path, supf_boundary=True) self.assertEqual(dest, self.out_path) with h5py.File(dest, 'r') as hdf_file: # 'IVV' - 1Hz parameter. ivv_group = hdf_file['series']['IVV'] self.assertEqual(ivv_group.attrs['frequency'], self.ivv_frequency) self.assertEqual(ivv_group.attrs['supf_offset'], self.ivv_supf_offset) ivv_result = ivv_group['data'][:] ivv_expected_result = np.arange(64 * self.ivv_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(ivv_result), list(ivv_expected_result)) # 'WOW' - 4Hz parameter. wow_group = hdf_file['series']['WOW'] self.assertEqual(wow_group.attrs['frequency'], self.wow_frequency) wow_result = wow_group['data'][:] wow_expected_result = np.arange(64 * self.wow_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(wow_result), list(wow_expected_result)) # 'DME' - 0.25Hz parameter. dme_group = hdf_file['series']['DME'] self.assertEqual(dme_group.attrs['frequency'], self.dme_frequency) dme_result = dme_group['data'][:] dme_expected_result = np.arange(64 * self.dme_frequency, dtype=np.dtype(np.float)) self.assertEqual(list(dme_result), list(dme_expected_result)) self.assertEqual(hdf_file.attrs['duration'], 64)
def split_hdf_to_segments(hdf_path, aircraft_info, fallback_dt=None, draw=False, dest_dir=None): """ Main method - analyses an HDF file for flight segments and splits each flight into a new segment appropriately. :param hdf_path: path to HDF file :type hdf_path: string :param aircraft_info: Information which identify the aircraft, specfically with the keys 'Tail Number', 'MSN'... :type aircraft_info: Dict :param fallback_dt: A datetime which is as close to the end of the data file as possible. Used to replace elements of datetimes which are not available in the hdf file (e.g. YEAR not being recorded) :type fallback_dt: datetime :param draw: Whether to use matplotlib to plot the flight :type draw: Boolean :param dest_dir: Destination directory, if None, the source file directory is used :type dest_dir: str :returns: List of Segments :rtype: List of Segment recordtypes ('slice type part duration path hash') """ logger.info("Processing file: %s", hdf_path) if dest_dir is None: dest_dir = os.path.dirname(hdf_path) if draw: from analysis_engine.plot_flight import plot_essential plot_essential(hdf_path) with hdf_file(hdf_path) as hdf: superframe_present = hdf.superframe_present # Confirm aircraft tail for the entire datafile logger.info("Validating aircraft matches that recorded in data") validate_aircraft(aircraft_info, hdf) # now we know the Aircraft is correct, go and do the PRE FILE ANALYSIS if hooks.PRE_FILE_ANALYSIS: logger.info("Performing PRE_FILE_ANALYSIS analysis: %s", hooks.PRE_FILE_ANALYSIS.func_name) hooks.PRE_FILE_ANALYSIS(hdf, aircraft_info) else: logger.info("No PRE_FILE_ANALYSIS actions to perform") segment_tuples = split_segments(hdf) if fallback_dt: # fallback_dt is relative to the end of the data; remove the data # duration to make it relative to the start of the data secs = hdf.duration fallback_dt -= timedelta(seconds=secs) logger.info("Reduced fallback_dt by %ddays %dhr %dmin to %s", secs // 86400, secs % 86400 // 3600, secs % 86400 % 3600 // 60, fallback_dt) # process each segment (into a new file) having closed original hdf_path segments = [] previous_stop_dt = None for part, (segment_type, segment_slice) in enumerate(segment_tuples, start=1): # write segment to new split file (.001) basename = os.path.basename(hdf_path) dest_basename = os.path.splitext(basename)[0] + '.%03d.hdf5' % part dest_path = os.path.join(dest_dir, dest_basename) logger.debug("Writing segment %d: %s", part, dest_path) write_segment(hdf_path, segment_slice, dest_path, supf_boundary=superframe_present) segment = append_segment_info(dest_path, segment_type, segment_slice, part, fallback_dt=fallback_dt) if previous_stop_dt and segment.start_dt < previous_stop_dt: # In theory, this should not happen - but be warned of superframe # padding? logger.warning( "Segment start_dt '%s' comes before the previous segment " "ended '%s'", segment.start_dt, previous_stop_dt) previous_stop_dt = segment.stop_dt if fallback_dt: # move the fallback_dt on to be relative to start of next segment fallback_dt += segment.stop_dt - segment.start_dt segments.append(segment) if draw: plot_essential(dest_path) if draw: # show all figures together from matplotlib.pyplot import show show() #close('all') # closes all figures return segments