Exemplo n.º 1
0
    def calculated_metadata(self, df=None, geometries=True, clean_cols=True, clean_rows=True):
        if df is None:
            df = self.to_dataframe(clean_cols=clean_cols, clean_rows=clean_rows)

        trajectories = {}
        for tid, tgroup in df.groupby('trajectory'):
            tgroup = tgroup.sort_values('t')
            first_row = tgroup.iloc[0]
            first_loc = Point(first_row.x, first_row.y)

            geometry = None
            if geometries:
                coords = list(unique_justseen(zip(tgroup.x, tgroup.y)))
                if len(coords) > 1:
                    geometry = LineString(coords)
                elif coords == 1:
                    geometry = first_loc

            trajectory = namedtuple('Trajectory', ['min_z', 'max_z', 'min_t', 'max_t', 'first_loc', 'geometry'])
            trajectories[tid] = trajectory(
                min_z=tgroup.z.min(),
                max_z=tgroup.z.max(),
                min_t=tgroup.t.min(),
                max_t=tgroup.t.max(),
                first_loc=first_loc,
                geometry=geometry
            )

        meta = namedtuple('Metadata', ['min_t', 'max_t', 'trajectories'])
        return meta(
            min_t=df.t.min(),
            max_t=df.t.max(),
            trajectories=trajectories
        )
Exemplo n.º 2
0
def trajectory_calculated_metadata(df, axes, geometries=True):
    trajectories = {}
    for tid, tgroup in df.groupby(axes.trajectory):
        tgroup = tgroup.sort_values(axes.t)

        if geometries:
            null_coordinates = tgroup[axes.x].isnull() | tgroup[
                axes.y].isnull()
            coords = list(
                unique_justseen(
                    zip(tgroup.loc[~null_coordinates, axes.x].tolist(),
                        tgroup.loc[~null_coordinates, axes.y].tolist())))
        else:
            # Calculate the geometry as the linestring between all of the profile points
            first_row = tgroup.iloc[0]
            coords = [(first_row[axes.x], first_row[axes.y])]

        geometry = None
        if len(coords) > 1:
            geometry = LineString(coords)
        elif len(coords) == 1:
            geometry = Point(coords[0])

        trajectories[tid] = trajectory_meta(min_z=tgroup[axes.z].min(),
                                            max_z=tgroup[axes.z].max(),
                                            min_t=tgroup[axes.t].min(),
                                            max_t=tgroup[axes.t].max(),
                                            geometry=geometry)

    return trajectories_meta(min_z=df[axes.z].min(),
                             max_z=df[axes.z].max(),
                             min_t=df[axes.t].min(),
                             max_t=df[axes.t].max(),
                             trajectories=trajectories)
Exemplo n.º 3
0
    def calculated_metadata(self,
                            df=None,
                            geometries=True,
                            clean_cols=True,
                            clean_rows=True):
        if df is None:
            df = self.to_dataframe(clean_cols=clean_cols,
                                   clean_rows=clean_rows)

        profiles = {}
        for pid, pgroup in df.groupby('profile'):
            pgroup = pgroup.sort_values('t')
            first_row = pgroup.iloc[0]
            profile = namedtuple('Profile',
                                 ['min_z', 'max_z', 't', 'x', 'y', 'loc'])
            profiles[pid] = profile(min_z=pgroup.z.min(),
                                    max_z=pgroup.z.max(),
                                    t=first_row.t,
                                    x=first_row.x,
                                    y=first_row.y,
                                    loc=Point(first_row.x, first_row.y))

        geometry = None
        first_row = df.iloc[0]
        first_loc = Point(first_row.x, first_row.y)
        if geometries:
            coords = list(unique_justseen(zip(df.x, df.y)))
            if len(coords) > 1:
                geometry = LineString(coords)
            elif len(coords) == 1:
                geometry = first_loc

        meta = namedtuple('Metadata', [
            'min_z', 'max_z', 'min_t', 'max_t', 'profiles', 'first_loc',
            'geometry'
        ])
        return meta(min_z=df.z.min(),
                    max_z=df.z.max(),
                    min_t=df.t.min(),
                    max_t=df.t.max(),
                    profiles=profiles,
                    first_loc=first_loc,
                    geometry=geometry)
Exemplo n.º 4
0
def get_temporal_attributes(df, axes=None):
    """ Use values in a dataframe to set temporal attributes for the eventual netCDF file
    Attribute names come from https://www.nodc.noaa.gov/data/formats/netcdf/v2.0/

    :param df: data (Pandas DataFrame)
    :param axes: keys (x,y,z,t) are associated with actual column names (dictionary). z in meters.
    :return: nested dictionary of variable and global attributes
    """

    axes = get_default_axes(axes)
    mint = df[axes.t].min()
    maxt = df[axes.t].max()

    times = pd.DatetimeIndex(unique_justseen(df[axes.t]))
    dt_index_diff = times[1:] - times[:-1]
    dt_counts = dt_index_diff.value_counts(sort=True)

    if dt_counts.size > 0 and dt_counts.values[0] / (len(times) - 1) > 0.75:
        mode_value = dt_counts.index[0]
    else:
        # Calculate a static resolution
        mode_value = ((maxt - mint) / len(times))

    return {
        'variables': {
            axes.t: {
                'attributes': {
                    'actual_min': mint.strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'actual_max': maxt.strftime('%Y-%m-%dT%H:%M:%SZ'),
                }
            },
        },
        'attributes': {
            'time_coverage_start': mint.strftime('%Y-%m-%dT%H:%M:%SZ'),
            'time_coverage_end': maxt.strftime('%Y-%m-%dT%H:%M:%SZ'),
            'time_coverage_duration': (maxt - mint).round('1S').isoformat(),
            'time_coverage_resolution': mode_value.round('1S').isoformat()
        }
    }
def profile_calculated_metadata(df, axes, geometries=True):
    profiles = {}
    for pid, pgroup in df.groupby(axes.profile):
        pgroup = pgroup.sort_values(axes.t)
        first_row = pgroup.iloc[0]
        profiles[pid] = profile_meta(min_z=pgroup[axes.z].min(),
                                     max_z=pgroup[axes.z].max(),
                                     t=first_row[axes.t],
                                     x=first_row[axes.x],
                                     y=first_row[axes.y],
                                     id=pid,
                                     geometry=Point(first_row[axes.x],
                                                    first_row[axes.y]))

    if geometries:
        null_coordinates = df[axes.x].isnull() | df[axes.y].isnull()
        coords = list(
            unique_justseen(
                zip(df.loc[~null_coordinates, axes.x].tolist(),
                    df.loc[~null_coordinates, axes.y].tolist())))
    else:
        # Calculate the geometry as the linestring between all of the profile points
        coords = [p.geometry for _, p in profiles.items()]

    geometry = None
    if len(coords) > 1:
        geometry = LineString(coords)
    elif len(coords) == 1:
        geometry = Point(coords[0])

    return profiles_meta(min_z=df[axes.z].min(),
                         max_z=df[axes.z].max(),
                         min_t=df[axes.t].min(),
                         max_t=df[axes.t].max(),
                         profiles=profiles,
                         geometry=geometry)