Пример #1
0
    def setUp(self):
        super(TestFindProfile, self).setUp()

        sr = SlocumReader(ctd_filepath)
        self.df = sr.standardize()

        self.profiled_dataset = assign_profiles(self.df, tsint=10)
Пример #2
0
def process_dataset(file, reader_class, tsint=None, filter_z=None, filter_points=None, filter_time=None, filter_distance=None):

    # Check filename
    if file is None:
        raise ValueError('Must specify path to combined ASCII file')

    try:
        reader = reader_class(file)
        data = reader.standardize()

        if 'z' not in data.columns:
            L.warning("No Z axis found - Skipping {}".format(file))
            return None, None

        if 't' not in data.columns:
            L.warning("No T axis found - Skipping {}".format(file))
            return None, None

        # Find profile breaks
        profiles = assign_profiles(data, tsint=tsint)

        # Shortcut for empty dataframes
        if profiles is None:
            return None, None

        # Filter data
        original_profiles = len(profiles.profile.unique())
        filtered, rm_depth    = filter_profile_depth(profiles, below=filter_z, reindex=False)
        filtered, rm_points   = filter_profile_number_of_points(filtered, points_condition=filter_points, reindex=False)
        filtered, rm_time     = filter_profile_timeperiod(filtered, timespan_condition=filter_time, reindex=False)
        filtered, rm_distance = filter_profile_distance(filtered, distance_condition=filter_distance, reindex=True)
        total_filtered = rm_depth + rm_points + rm_time + rm_distance
        L.info(
            (
                'Filtered {}/{} profiles from {}'.format(total_filtered, original_profiles, file),
                'Depth ({}m): {}'.format(filter_z, rm_depth),
                'Points ({}): {}'.format(filter_points, rm_points),
                'Time ({}s): {}'.format(filter_time, rm_time),
                'Distance ({}m): {}'.format(filter_distance, rm_distance),
            )
        )

        # Downscale profile
        # filtered['profile'] = pd.to_numeric(filtered.profile, downcast='integer')
        filtered['profile'] = filtered.profile.astype('int32')
        # Profiles are 1-indexed, so add one to each
        filtered['profile'] = filtered.profile.values + 1

        # TODO: Backfill U/V?
        # TODO: Backfill X/Y?

    except ValueError as e:
        L.exception('{} - Skipping'.format(e))
        raise

    return filtered, reader.mode
Пример #3
0
def process_dataset(file,
                    reader_class,
                    tsint=None,
                    filter_z=None,
                    filter_points=None,
                    filter_time=None,
                    filter_distance=None,
                    z_axis_method=1):

    # Check filename
    if file is None:
        raise ValueError('Must specify path to combined ASCII file')

    try:
        reader = reader_class(file)
        data = reader.standardize(z_axis_method=z_axis_method)
        extras = reader.extras()

        if 'z' not in data.columns:
            L.warning("No Z axis found - Skipping {}".format(file))
            return None, None, None

        if 't' not in data.columns:
            L.warning("No T axis found - Skipping {}".format(file))
            return None, None, None

        # Find profile breaks
        profiles = assign_profiles(data, tsint=tsint)
        # Shortcut for empty dataframes
        if profiles is None:
            return None, None, None

        # Filter data
        original_profiles = len(profiles.profile.unique())
        filtered, rm_depth,    did_depth    = filter_profile_depth(profiles, below=filter_z, reindex=False)
        filtered, rm_points,   did_points   = filter_profile_number_of_points(filtered, points_condition=filter_points, reindex=False)
        filtered, rm_time,     did_time     = filter_profile_timeperiod(filtered, timespan_condition=filter_time, reindex=False)
        filtered, rm_distance, did_distance = filter_profile_distance(filtered, distance_condition=filter_distance, reindex=True)
        total_filtered = rm_depth + rm_points + rm_time + rm_distance
        L.info(
            (
                'Filtered {}/{} profiles from {}'.format(total_filtered, original_profiles, os.path.basename(file)),
                'Depth ({}m): {}'.format(did_depth, rm_depth),
                'Points ({}): {}'.format(did_points, rm_points),
                'Time ({}s): {}'.format(did_time, rm_time),
                'Distance ({}m): {}'.format(did_distance, rm_distance),
            )
        )

        # Downscale profile
        # filtered['profile'] = pd.to_numeric(filtered.profile, downcast='integer')
        filtered['profile'] = filtered.profile.astype('int32')
        # Profiles are 1-indexed, so add one to each
        filtered['profile'] = filtered.profile.values + 1

        # TODO: Backfill U/V?
        # TODO: Backfill X/Y?

        # Combine extra data, which has a time index already
        # This puts the profile information into the extras
        # dataframe
        if not extras.empty:
            try:
                merge = pd.merge_asof(
                    extras,
                    filtered[['t', 'profile']],
                    left_index=True,
                    right_index=False,
                    right_on='t',
                    direction='nearest',
                    tolerance=pd.Timedelta(minutes=10)
                ).set_index(extras.index)
                extras['profile'] = merge.profile.ffill()

            except BaseException as e:
                L.error(f"Could not merge 'extras' data, skipping: {e}")

    except ValueError as e:
        L.exception('{} - Skipping'.format(e))
        raise

    return filtered, extras, reader.mode