Exemplo n.º 1
0
    def test_timeseries_extra_values(self):
        """
        This will map directly to the time variable and ignore any time indexes
        that are not found.  The 'times' parameter to add_variable should be
        the same length as the values parameter.
        """
        filename = 'test_timeseries_extra_values.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25, 26, 27, 28]
        value_times = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, times=value_times)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('temperature').size == len(times)
        assert (nc.variables.get('temperature')[:] == np.asarray(values[0:6])).all()
Exemplo n.º 2
0
    def test_timeseries_profile(self):
        filename = 'test_timeseries_profile.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z').positive == 'down'
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
Exemplo n.º 3
0
    def test_timeseries_many_variables(self):
        filename = 'test_timeseries_many_variables.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        bottom_values = [30, 31, 32, 33, 34, 35]
        full_masked = values.view(np.ma.MaskedArray)
        full_masked.mask = True
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature',        values=values, attributes=attrs)
        ts.add_variable('salinity',           values=values.reshape((len(times), len(verticals))))
        ts.add_variable('dissolved_oxygen',   values=full_masked, fillvalue=full_masked.fill_value)
        ts.add_variable('bottom_temperature', values=bottom_values, verticals=[60], unlink_from_profile=True, attributes=attrs)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
        assert (nc.variables.get('salinity')[:] == values.reshape((len(times), len(verticals)))).all()
        assert nc.variables.get('dissolved_oxygen')[:].mask.all()
Exemplo n.º 4
0
    def test_timeseries_profile_with_bottom_temperature(self):
        filename = 'test_timeseries_profile_with_bottom_temperature.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        bottom_values = [30, 31, 32, 33, 34, 35]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.add_variable('bottom_temperature', values=bottom_values, verticals=[60], unlink_from_profile=True, attributes=attrs)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert nc.variables.get('sensor_depth') is not None
        assert nc.variables.get('bottom_temperature').size == len(times)

        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
        assert (nc.variables.get('bottom_temperature')[:] == np.asarray(bottom_values)).all()
Exemplo n.º 5
0
    def test_from_variable(self):

        filename = 'test_urn_from_variable.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     vertical_datum='NAVD88')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#vertical_datum=navd88'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     cell_methods='time: variance (interval: PT1H comment: sampled instantaneously)')
        ts.add_variable('temperature2', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature2'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:variance;interval=pt1h'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     cell_methods='time: variance time: mean (interval: PT1H comment: sampled instantaneously)')
        ts.add_variable('temperature3', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature3'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:mean,time:variance;interval=pt1h'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     cell_methods='time: variance time: mean (interval: PT1H comment: sampled instantaneously)',
                     discriminant='2')
        ts.add_variable('temperature4', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature4'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount-2#cell_methods=time:mean,time:variance;interval=pt1h'

        ts.close()
Exemplo n.º 6
0
    def test_timeseries_profile_fill_value_in_z(self):
        filename = 'test_timeseries_profile_fill_value_in_z.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        # Vertical fills MUST be at the BEGINNING of the array!!!!
        verticals = [self.fillvalue, 0]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [self.fillvalue, 20, self.fillvalue, 21, self.fillvalue, 22, self.fillvalue, 23, self.fillvalue, 24, self.fillvalue, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, fillvalue=self.fillvalue)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)

        assert nc.variables.get('temperature')[:][0][1] == 20
        assert nc.variables.get('temperature')[:].mask[0][0] == True

        assert nc.variables.get('temperature')[:][1][1] == 21
        assert nc.variables.get('temperature')[:].mask[1][0] == True

        assert nc.variables.get('temperature')[:][2][1] == 22
        assert nc.variables.get('temperature')[:].mask[2][0] == True

        assert nc.variables.get('temperature')[:][3][1] == 23
        assert nc.variables.get('temperature')[:].mask[3][0] == True

        assert nc.variables.get('temperature')[:][4][1] == 24
        assert nc.variables.get('temperature')[:].mask[4][0] == True

        assert nc.variables.get('temperature')[:][5][1] == 25
        assert nc.variables.get('temperature')[:].mask[5][0] == True

        assert (nc.variables.get('temperature')[:] == np.asarray(values).reshape((len(times), len(verticals)))).all()
Exemplo n.º 7
0
    def test_timeseries_profile_unsorted_time_and_z(self):
        filename = 'test_timeseries_profile_unsorted_time_and_z.nc'
        times = [5000, 1000, 2000, 3000, 4000, 0]
        verticals = [0, 50]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, fillvalue=self.fillvalue)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)

        assert nc.variables.get('temperature')[:][0][0] == 25
        assert nc.variables.get('temperature')[:][0][1] == 25
        assert nc.variables.get('temperature')[:][1][0] == 21
        assert nc.variables.get('temperature')[:][1][1] == 21
        assert nc.variables.get('temperature')[:][2][0] == 22
        assert nc.variables.get('temperature')[:][2][1] == 22
        assert nc.variables.get('temperature')[:][3][0] == 23
        assert nc.variables.get('temperature')[:][3][1] == 23
        assert nc.variables.get('temperature')[:][4][0] == 24
        assert nc.variables.get('temperature')[:][4][1] == 24
        assert nc.variables.get('temperature')[:][5][0] == 20
        assert nc.variables.get('temperature')[:][5][1] == 20
Exemplo n.º 8
0
def parse_type_1(output_format, site_id, contents, output, csv_link):
    """
    # ---------------------------------- WARNING ----------------------------------------
    # The data you have obtained from this automated U.S. Geological Survey database
    # have not received Director's approval and as such are provisional and subject to
    # revision.  The data are released on the condition that neither the USGS nor the
    # United States Government may be held liable for any damages resulting from its use.
    # Additional info: http://waterdata.usgs.gov/ga/nwis/help/?provisional
    #
    # File-format description:  http://waterdata.usgs.gov/nwis/?tab_delimited_format_info
    # Automated-retrieval info: http://waterdata.usgs.gov/nwis/?automated_retrieval_info
    #
    # Contact:   [email protected]
    # retrieved: 2012-11-20 12:05:22 EST       (caww01)
    #
    # Data for the following 1 site(s) are contained in this file
    #    USGS 395740074482628 South Branch Rancocas Cr at S Main St nr Lumberton
    # -----------------------------------------------------------------------------------
    #
    # Data provided for site 395740074482628
    #    DD parameter   Description
    #    03   00035     Wind speed, miles per hour
    #    07   00025     Barometric pressure, millimeters of mercury
    #    09   00045     Precipitation, total, inches
    #    19   63160     Stream water level elevation above NAVD 1988, in feet
    #
    # Data-value qualification codes included in this output:
    #     P  Provisional data subject to revision.
    #
    agency_cd   site_no datetime    tz_cd   03_00035    03_00035_cd 07_00025    07_00025_cd 09_00045    09_00045_cd 19_63160    19_63160_cd
    5s  15s 20d 6s  14n 10s 14n 10s 14n 10s 14n 10s
    USGS    395740074482628 2012-10-28 13:00    EST 4.2 P   755 P           3.22    P
    USGS    395740074482628 2012-10-28 13:15    EST 6.4 P   754 P   0.00    P   3.36    P
    USGS    395740074482628 2012-10-28 13:30    EST 3.6 P   754 P   0.00    P   3.50    P
    USGS    395740074482628 2012-10-28 13:45    EST 3.2 P   754 P   0.00    P   3.63    P
    USGS    395740074482628 2012-10-28 14:00    EST 7.0 P   754 P   0.00    P   3.76    P
    USGS    395740074482628 2012-10-28 14:15    EST 4.0 P   754 P   0.00    P   3.87    P
    ...
    """
    # lat/lon point: http://waterservices.usgs.gov/nwis/site/?sites=395740074482628

    variable_map = {
        '01_00065' : {'long_name' : 'Gage height', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
        '03_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'},
        '04_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'},
        '05_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'},
        '06_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'},
        '04_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'},
        '02_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'},
        '05_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'},
        '07_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'},
        '09_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'},
        '03_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'},
        '08_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'},
        '09_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'},
        '06_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'},
        '07_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'},
        '08_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'},
        '05_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'},
        '06_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'},
        '07_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'},
        '19_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
        '01_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
    }

    # Get metadata from a seperate endpoint.
    d = requests.get("http://waterservices.usgs.gov/nwis/site/?sites={!s}".format(site_id))
    try:
        d.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.error("Could not find lat/lon endpoint for station {!s}, skipping. Status code: {!s}".format(site_id, d.status_code))
        return
    _, hz, dz = split_file(d.text, "agency_cd")
    # Strip off the one line after the headers
    dz = dz[1:]
    dfz  = pd.DataFrame(dz, columns=hz)
    lat  = float(dfz["dec_lat_va"][0])
    lon  = float(dfz["dec_long_va"][0])
    sensor_vertical_datum = dfz["alt_datum_cd"][0] or "NAVD88"
    try:
        z = float(dfz["alt_va"][0])
    except ValueError:
        z = 0.
    loc  = "POINT({!s} {!s} {!s})".format(lon, lat, z)
    name = dfz["station_nm"][0]

    comments, headers, data = split_file(contents, "agency_cd")
    df = pd.DataFrame(data, columns=headers)

    fillvalue = -9999.9

    # Combine date columns
    dates = df["datetime"]
    tz = df["tz_cd"]
    new_dates = list()
    for i in range(len(dates)):
        try:
            new_dates.append(parse(dates[i] + " " + tz[i]).astimezone(pytz.utc))
        except BaseException:
            # Remove row.  Bad date.
            df.drop(i, axis=0, inplace=True)
            continue
    df['time'] = new_dates
    df['depth'] = [ z for x in range(len(df['time'])) ]

    # Strip out "_cd" columns (quality checks for USGS)
    for h in headers:
        if "_cd" in h:
            df.drop(h, axis=1, inplace=True)

    # Add global attributes to appear in the resulting NetCDF file
    global_attributes = dict(
        title=name,
        summary='USGS Hurricane Sandy Rapid Response Stations.  Data acquired from "http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.',
        keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id,
        keywords_vocaublary="None",
        naming_authority='gov.usgs',
        id=site_id,
        cdm_data_type="Station",
        history="NetCDF file generated from {!s}".format(csv_link),
        creator="USGS",
        creator_url="http://waterdata.usgs.gov",
        creator_institution="USGS",
        creator_urn="gov.usgs",
        publisher="Axiom Data Science",
        publisher_uri="http://axiomdatascience.com",
        processing_level="None",
        acknowledgement="None",
        geospatial_bounds=loc,
        geospatial_lat_min=lat,
        geospatial_lat_max=lat,
        geospatial_lon_min=lon,
        geospatial_lon_max=lon,
        license="Freely Distributed",
        date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat()
    )

    def to_floats(x):
        try:
            return float(x)
        except ValueError:
            return fillvalue

    min_time = df['time'].min()
    max_time = df['time'].max()

    full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id)
    if output_format == 'cf16':
        output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
        times = [ calendar.timegm(x.timetuple()) for x in df["time"] ]
        verticals = df['depth'].values
        ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals, vertical_axis_name='z')

    for var in df.columns:
        if var in ['datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd']:
            continue

        try:
            var_meta = variable_map[var]
        except KeyError:
            logger.error("Variable {!s} was not found in variable map!".format(var))
            continue

        # Convert to floats
        df[var] = df[var].map(to_floats)

        # Change feet to meters
        if var_meta["units"] in ["feet", "ft"]:
            df[var] = np.asarray([ v * 0.3048 if v != fillvalue else v for v in df[var] ])
            var_meta["units"] = "meters"

        if output_format == 'axiom':
            full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"])
            output_directory = os.path.join(output, full_sensor_urn)
            output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
            ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var, vertical_axis_name='height')
            ts.add_instrument_metadata(urn=full_sensor_urn)
            ts.close()
        elif output_format == 'cf16':
            # Variable names shouldn't start with a number
            try:
                int(var[0])
                variable_name = 'v_{}'.format(var)
            except:
                variable_name = var
            ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum)

    if output_format == 'cf16':
        ts.close()
Exemplo n.º 9
0
def parse_type_2(output_format, site_id, contents, output, csv_link):
    """
    # These data are provisional and subject to revision.
    # Data processed as of 12/05/2012 11:54:29.
    # Data collected as part of Hurricane Sandy (2012) Storm Tide project.
    # Data are archived at http://water.usgs.gov/floods/events/2012/isaac/index.php
    # Elevation determined from GPS surveys (NAVD 88).
    # Time datum is GMT (Greenwich Mean Time).
    # Water density estimated on basis of sensor location
    #   where saltwater = 63.989 lb/ft3       (Saltwater = dissolved solids concentration greater than 20000 milligrams per liter)
    #   where brackish water = 63.052 lb/ft3  (Brackish water = dissolved solids concentration between 1000 and 20000 milligrams per liter)
    #   where freshwater = 62.428 lb/ft3      (Freshwater = dissolved solids concentration less than 1000 milligrams per liter)
    # The equation used to compute elevation from recorded pressure is
    #  (((sp-bp)*144)/d)+e
    # Where sp = surge pressure in psi; bp = barometric pressure in psi;
    #  d = water density in lb/ft3; and e = elevation of sensor in ft above NAVD 88.
    # Barometric data from nearest pressure sensor. Location for the barometric sensor is listed below.
    # Elevation is computer-rounded to two decimal places.
    #      Sensor information
    # Site id = SSS-NY-WES-001WL
    # Site type = water level
    # Horizontal datum used is NAD 83
    # Sensor location latitude 40.942755
    # Sensor location longitude -73.719828
    # Sensor elevation above NAVD 88 = -3.97 ft
    # Lowest recordable water elevation is -3.90 ft
    # Water density value used = 63.989 lb/ft3
    # Barometric sensor site (source of bp) = SSS-NY-WES-002BP
    # Barometric sensor location latitude 40.90754368
    # Barometric sensor location longitude -73.8692184

    date_time_GMT   elevation   nearest_barometric_sensor_psi
    10-28-2012 06:00:00 0.88    14.5145
    10-28-2012 06:00:30 0.86    14.5145
    10-28-2012 06:01:00 0.85    14.5170
    10-28-2012 06:01:30 0.85    14.5145
    10-28-2012 06:02:00 0.84    14.5170
    10-28-2012 06:02:30 0.81    14.5145
    10-28-2012 06:03:00 0.76    14.5145
    ...
    """

    variable_map = {
        'elevation' : {'long_name' : 'Water Level Elevation above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
    }

    def to_floats(x):
        try:
            return float(x)
        except ValueError:
            return fillvalue

    comments, headers, data = split_file(contents, "date_time_GMT")
    df = pd.DataFrame(data, columns=headers)
    fillvalue = -9999.9

    lat     = None
    lon     = None
    z       = 0
    name    = site_id
    sensor_vertical_datum = "NAVD88"

    for c in comments:
        if "Sensor location latitude" in c:
            lat = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1])
        elif "Sensor location longitude" in c:
            lon = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1])
        elif "Site id" in c:
            site_id = filter(None, map(lambda x: x.strip(), c.split(" ")))[-1]
            name = site_id
        elif "Sensor elevation" in c:
            sensor_vertical_datum = "".join(c.split("=")[0].split(" ")[4:6])
            l = filter(None, map(lambda x: x.strip(), c.split(" ")))
            z = float(l[-2])
            if l[-1] in ["feet", "ft"]:
                z *= 0.3048

    loc = "POINT({!s} {!s} {!s})".format(lon, lat, z)
    df['time'] = df["date_time_GMT"].map(lambda x: parse(x + " UTC"))
    df['depth'] = [ z for x in range(len(df['time'])) ]

    # Add global attributes to appear in the resulting NetCDF file
    global_attributes = dict(
        title=name,
        summary='USGS Hurricane Sandy Rapid Response Stations.  Data acquired from http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.',
        keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id,
        keywords_vocaublary="None",
        naming_authority='gov.usgs',
        id=site_id,
        cdm_data_type="Station",
        history="NetCDF file generated from {!s}".format(csv_link),
        creator="USGS",
        creator_url="http://waterdata.usgs.gov",
        creator_institution="USGS",
        creator_urn="gov.usgs",
        publisher="Axiom Data Science",
        publisher_uri="http://axiomdatascience.com",
        processing_level="None",
        acknowledgement="None",
        geospatial_bounds=loc,
        geospatial_lat_min=lat,
        geospatial_lat_max=lat,
        geospatial_lon_min=lon,
        geospatial_lon_max=lon,
        license="Freely Distributed",
        date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat()
    )

    full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id)
    min_time = df["time"].min()
    max_time = df["time"].max()

    if output_format == 'cf16':
        times = [ calendar.timegm(x.timetuple()) for x in df['time'] ]
        verticals = df['depth'].values
        output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
        ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals)

    for var in df.columns:
        if var in ['date_time_GMT', 'time', 'depth']:
            continue
        try:
            int(var[0])
            variable_name = 'v_{}'.format(var)
        except:
            variable_name = var

        try:
            var_meta = variable_map[var]
        except KeyError:
            logger.error("Variable {!s} was not found in variable map!".format(var))
            continue

        # Convert to floats
        df[var] = df[var].map(to_floats)
        if var_meta["units"] in ["feet", "ft"]:
            df[var] = [ v * 0.3048 if v != fillvalue else v for v in df[var] ]
            var_meta["units"] = "meters"

        if output_format == 'axiom':
            full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"])
            output_directory = os.path.join(output, full_sensor_urn)
            output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
            ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var)
            ts.add_instrument_metadata(urn=full_sensor_urn)
            ts.close()
        elif output_format == 'cf16':
            ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum)

    if output_format == 'cf16':
        ts.close()
Exemplo n.º 10
0
def main(output, download_folder, do_download, projects, csv_metadata_file, filesubset=None):
    project_metadata = dict()
    with open(csv_metadata_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row['project_name']
            if isinstance(project_name, str) and project_name[0] == '#':
                continue
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata, filesubset)
        except KeyboardInterrupt:
            logger.exception('Error downloading datasets from THREDDS')
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, "*"))

    for down_file in downloaded_files:

        if filesubset is not None:
            if os.path.basename(down_file).lower() not in filesubset:
                # aka "9631ecp-a.nc"
                # Skip this file!
                continue

        if projects:
            tmpnc = netCDF4.Dataset(down_file)
            project_name, _ = tmpnc.id.split("/")
            nc_close(tmpnc)
            if project_name.lower() not in projects:
                # Skip this project!
                continue

        _, temp_file = tempfile.mkstemp(prefix='cmg_collector', suffix='nc')
        shutil.copy(down_file, temp_file)

        nc = None
        try:
            # Cleanup to CF-1.6
            first_time = normalize_time(temp_file)
            normalize_epic_codes(temp_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            mooring_id   = None
            latitude     = None
            longitude    = None

            nc = netCDF4.Dataset(temp_file)

            project_name, _ = nc.id.split("/")
            feature_name, _ = os.path.splitext(os.path.basename(down_file))

            fname = os.path.basename(down_file)
            try:
                if int(fname[0]) <= 9 and int(fname[0]) >= 2:
                    # 1.) everything with first char between 2-9 is 3-digit
                    mooring_id = int(fname[0:3])
                elif int(fname[0]) == 1:
                    # 2.) if MOORING starts with 1, and data is newer than 2014, it's 4 digit, otherwise 3 digit.
                    if first_time > datetime(2014, 1, 1, 0):
                        # 4 digit if after Jan 1, 2014
                        mooring_id = int(fname[0:4])
                    else:
                        # 3 digit if before
                        mooring_id = int(fname[0:3])
            except ValueError:
                logger.exception("Could not create a suitable station_id. Skipping {0}.".format(down_file))
                continue

            try:
                latitude  = nc.variables.get("lat")[0]
                longitude = nc.variables.get("lon")[0]
            except IndexError:
                latitude  = nc.variables.get("lat")[:]
                longitude = nc.variables.get("lon")[:]

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info("Translating {0} into CF1.6 format: {1}".format(down_file, os.path.abspath(os.path.join(output_directory, file_name))))

            if not os.path.isdir(output_directory):
                os.makedirs(output_directory)

            file_global_attributes = { k : getattr(nc, k) for k in nc.ncattrs() }
            file_global_attributes.update(global_attributes)
            file_global_attributes['id'] = feature_name
            file_global_attributes['title'] = '{0} - {1}'.format(project_name, os.path.basename(down_file))
            file_global_attributes['MOORING'] = mooring_id
            file_global_attributes['original_filename'] = fname
            file_global_attributes['original_folder'] = project_name
            if project_name in project_metadata:
                for k, v in project_metadata[project_name].items():
                    if v and k.lower() not in ['id', 'title', 'catalog_xml', 'project_name']:
                        file_global_attributes[k] = v

            times  = nc.variables.get('time')[:]

            # Get all depth values
            depth_variables = []
            for dv in nc.variables:
                depth_variables += [ x for x in nc.variables.get(dv).dimensions if 'depth' in x ]
            depth_variables = sorted(list(set(depth_variables)))
            depth_values = np.asarray([ nc.variables.get(x)[:] for x in depth_variables ]).flatten()

            # Convert everything to positive up, unless it is specifically specified as "up" already
            depth_conversion = -1.0
            if depth_variables:
                pull_positive = nc.variables.get(depth_variables[0])
                if pull_positive and hasattr(pull_positive, 'positive') and pull_positive.positive.lower() == 'up':
                    depth_conversion = 1.0

            depth_values = depth_values * depth_conversion
            ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=depth_values, output_filename=file_name, vertical_positive='up')

            v = []
            for other in sorted(nc.variables):  # Sorted for a reason... don't change!
                if other in coord_vars:
                    continue

                old_var = nc.variables.get(other)
                variable_attributes = { k : getattr(old_var, k) for k in old_var.ncattrs() }
                # Remove/rename some attributes
                # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67
                if 'valid_range' in variable_attributes:
                    del variable_attributes['valid_range']
                if 'minimum' in variable_attributes:
                    variable_attributes['actual_min'] = variable_attributes['minimum']
                    del variable_attributes['minimum']
                if 'maximum' in variable_attributes:
                    variable_attributes['actual_max'] = variable_attributes['maximum']
                    del variable_attributes['maximum']
                if 'sensor_depth' in variable_attributes:
                    # Convert to the correct positive "up" or "down"
                    variable_attributes['sensor_depth'] = variable_attributes['sensor_depth'] * depth_conversion

                fillvalue = None
                if hasattr(old_var, "_FillValue"):
                    fillvalue = old_var._FillValue

                # Figure out if this is a variable that is repeated at different depths
                # as different variable names.   Assumes sorted.
                new_var_name = other.split('_')[0]
                if new_var_name in ts.ncd.variables:
                    # Already in new file (processed when the first was encountered in the loop below)
                    continue

                # Get the depth index
                depth_variable = [ x for x in old_var.dimensions if 'depth' in x ]
                if depth_variable and len(old_var.dimensions) > 1 and 'time' in old_var.dimensions:
                    depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(depth_variable[0])[:] * depth_conversion)))

                    # Find other variable names like this one
                    depth_indexes = [(other, depth_index)]
                    for search_var in sorted(nc.variables):
                        # If they have different depth dimension names we need to combine them into one variable
                        if search_var != other and search_var.split('_')[0] == new_var_name and \
                           depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]:
                            # Found a match at a different depth
                            search_depth_variable = [ x for x in nc.variables.get(search_var).dimensions if 'depth' in x ]
                            depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(search_depth_variable[0])[:] * depth_conversion)))
                            depth_indexes.append((search_var, depth_index))
                            logger.info("Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)".format(search_var, other, new_var_name))

                    values = np.ma.empty((times.size, len(depth_values)))
                    values.fill_value = fillvalue
                    values.mask = True
                    for nm, index in depth_indexes:
                        values[:, index] = np.squeeze(nc.variables.get(nm)[:])

                    # If we just have one index we want to use the original name
                    if len(depth_indexes) == 1:
                        # Just use the original variable name
                        new_var_name = other

                    # Create this one, should be the first we encounter for this type
                    ts.add_variable(new_var_name, values=values, times=times, fillvalue=fillvalue, attributes=variable_attributes)
                elif len(old_var.dimensions) == 1 and old_var.dimensions[0] == 'time':
                    # A single time dimensioned variable, like pitch, roll, record count, etc.
                    ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                elif depth_variable and 'time' not in old_var.dimensions:
                    # Metadata variable like bin distance
                    meta_var = ts.ncd.createVariable(other, old_var.dtype, ('z',), fill_value=fillvalue)
                    for k, v in variable_attributes.iteritems():
                        if k != '_FillValue':
                            meta_var.setncattr(k, v)

                    meta_var[:] = old_var[:]
                elif depth_values.size == 1 and not depth_variable and 'time' in old_var.dimensions:
                    # There is a single depth_value for most variables, but this one does not have a depth dimension
                    # Instead, it has a sensor_depth attribute that defines the Z index.  These need to be put into
                    # a different file to remain CF compliant.
                    new_file_name = file_name.replace('.nc', '_{}.nc'.format(other))
                    new_ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=[old_var.sensor_depth*depth_conversion], output_filename=new_file_name, vertical_positive='up')
                    new_ts.add_variable(other, values=old_var[:], times=times, verticals=[old_var.sensor_depth*depth_conversion], fillvalue=fillvalue, attributes=variable_attributes)
                    new_ts.close()
                elif depth_values.size > 1 and not depth_variable and 'time' in old_var.dimensions:
                    if hasattr(old_var, 'sensor_depth'):
                        # An ADCP or profiling dataset, but this variable is measued at a single depth.
                        # Example: Bottom Temperature on an ADCP
                        ts.add_variable(other, values=old_var[:], times=times, verticals=[old_var.sensor_depth*depth_conversion], unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                    else:
                        ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                else:
                    ts.add_variable(other, values=old_var[:], times=times, fillvalue=fillvalue, attributes=variable_attributes)

                ts.ncd.sync()
            ts.ncd.close()

        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            nc_close(nc)
            if os.path.isfile(temp_file):
                os.remove(temp_file)
Exemplo n.º 11
0
class TestTimeseriesTimeBounds(unittest.TestCase):

    def setUp(self):
        self.output_directory = os.path.join(os.path.dirname(__file__), "output")
        self.latitude = 34
        self.longitude = -72
        self.station_name = "PytoolsTestStation"
        self.global_attributes = dict(id='this.is.the.id')

        self.filename = 'test_timeseries_bounds.nc'
        self.times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0]
        self.ts = TimeSeries(output_directory=self.output_directory,
                             latitude=self.latitude,
                             longitude=self.longitude,
                             station_name=self.station_name,
                             global_attributes=self.global_attributes,
                             output_filename=self.filename,
                             times=self.times,
                             verticals=verticals)

        self.values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        self.ts.add_variable('temperature', values=self.values, attributes=attrs)

    def tearDown(self):
        self.ts.close()
        os.remove(os.path.join(self.output_directory, self.filename))

    def test_time_bounds_start(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='start')
        self.ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [0,    1000],
                                                                    [1000, 2000],
                                                                    [2000, 3000],
                                                                    [3000, 4000],
                                                                    [4000, 5000],
                                                                    [5000, 6000]
                                                                ])).all()
        nc.close()

    def test_time_bounds_middle(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='middle')
        self.ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [ -500,  500],
                                                                    [  500, 1500],
                                                                    [ 1500, 2500],
                                                                    [ 2500, 3500],
                                                                    [ 3500, 4500],
                                                                    [ 4500, 5500]
                                                                ])).all()
        nc.close()

    def test_time_bounds_end(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='end')
        self.ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [-1000,    0],
                                                                    [    0, 1000],
                                                                    [ 1000, 2000],
                                                                    [ 2000, 3000],
                                                                    [ 3000, 4000],
                                                                    [ 4000, 5000]
                                                                ])).all()
        nc.close()