def setUp(self): super(TestFindProfile, self).setUp() sr = SlocumReader(ctd_filepath) self.df = sr.standardize() self.profiled_dataset = assign_profiles(self.df, tsint=10)
def process_dataset(file, reader_class, tsint=None, filter_z=None, filter_points=None, filter_time=None, filter_distance=None): # Check filename if file is None: raise ValueError('Must specify path to combined ASCII file') try: reader = reader_class(file) data = reader.standardize() if 'z' not in data.columns: L.warning("No Z axis found - Skipping {}".format(file)) return None, None if 't' not in data.columns: L.warning("No T axis found - Skipping {}".format(file)) return None, None # Find profile breaks profiles = assign_profiles(data, tsint=tsint) # Shortcut for empty dataframes if profiles is None: return None, None # Filter data original_profiles = len(profiles.profile.unique()) filtered, rm_depth = filter_profile_depth(profiles, below=filter_z, reindex=False) filtered, rm_points = filter_profile_number_of_points(filtered, points_condition=filter_points, reindex=False) filtered, rm_time = filter_profile_timeperiod(filtered, timespan_condition=filter_time, reindex=False) filtered, rm_distance = filter_profile_distance(filtered, distance_condition=filter_distance, reindex=True) total_filtered = rm_depth + rm_points + rm_time + rm_distance L.info( ( 'Filtered {}/{} profiles from {}'.format(total_filtered, original_profiles, file), 'Depth ({}m): {}'.format(filter_z, rm_depth), 'Points ({}): {}'.format(filter_points, rm_points), 'Time ({}s): {}'.format(filter_time, rm_time), 'Distance ({}m): {}'.format(filter_distance, rm_distance), ) ) # Downscale profile # filtered['profile'] = pd.to_numeric(filtered.profile, downcast='integer') filtered['profile'] = filtered.profile.astype('int32') # Profiles are 1-indexed, so add one to each filtered['profile'] = filtered.profile.values + 1 # TODO: Backfill U/V? # TODO: Backfill X/Y? except ValueError as e: L.exception('{} - Skipping'.format(e)) raise return filtered, reader.mode
def process_dataset(file, reader_class, tsint=None, filter_z=None, filter_points=None, filter_time=None, filter_distance=None, z_axis_method=1): # Check filename if file is None: raise ValueError('Must specify path to combined ASCII file') try: reader = reader_class(file) data = reader.standardize(z_axis_method=z_axis_method) extras = reader.extras() if 'z' not in data.columns: L.warning("No Z axis found - Skipping {}".format(file)) return None, None, None if 't' not in data.columns: L.warning("No T axis found - Skipping {}".format(file)) return None, None, None # Find profile breaks profiles = assign_profiles(data, tsint=tsint) # Shortcut for empty dataframes if profiles is None: return None, None, None # Filter data original_profiles = len(profiles.profile.unique()) filtered, rm_depth, did_depth = filter_profile_depth(profiles, below=filter_z, reindex=False) filtered, rm_points, did_points = filter_profile_number_of_points(filtered, points_condition=filter_points, reindex=False) filtered, rm_time, did_time = filter_profile_timeperiod(filtered, timespan_condition=filter_time, reindex=False) filtered, rm_distance, did_distance = filter_profile_distance(filtered, distance_condition=filter_distance, reindex=True) total_filtered = rm_depth + rm_points + rm_time + rm_distance L.info( ( 'Filtered {}/{} profiles from {}'.format(total_filtered, original_profiles, os.path.basename(file)), 'Depth ({}m): {}'.format(did_depth, rm_depth), 'Points ({}): {}'.format(did_points, rm_points), 'Time ({}s): {}'.format(did_time, rm_time), 'Distance ({}m): {}'.format(did_distance, rm_distance), ) ) # Downscale profile # filtered['profile'] = pd.to_numeric(filtered.profile, downcast='integer') filtered['profile'] = filtered.profile.astype('int32') # Profiles are 1-indexed, so add one to each filtered['profile'] = filtered.profile.values + 1 # TODO: Backfill U/V? # TODO: Backfill X/Y? # Combine extra data, which has a time index already # This puts the profile information into the extras # dataframe if not extras.empty: try: merge = pd.merge_asof( extras, filtered[['t', 'profile']], left_index=True, right_index=False, right_on='t', direction='nearest', tolerance=pd.Timedelta(minutes=10) ).set_index(extras.index) extras['profile'] = merge.profile.ffill() except BaseException as e: L.error(f"Could not merge 'extras' data, skipping: {e}") except ValueError as e: L.exception('{} - Skipping'.format(e)) raise return filtered, extras, reader.mode