示例#1
0
    def test_timeseries_profile_duplicate_heights(self):
        filename = 'test_timeseries_profile_duplicate_heights.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 0, 0, 1, 1, 1]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], 2)
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 1)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(list(set(verticals)))
        assert nc.variables.get('temperature').size == len(times) * len(list(set(verticals)))

        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), 2))).all()
示例#2
0
    def test_timeseries_profile_with_shape(self):
        filename = 'test_timeseries_profile_with_shape.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals)).reshape((len(times), len(verticals)))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
示例#3
0
    def test_timeseries_extra_values(self):
        """
        This will map directly to the time variable and ignore any time indexes
        that are not found.  The 'times' parameter to add_variable should be
        the same length as the values parameter.
        """
        filename = 'test_timeseries_extra_values.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25, 26, 27, 28]
        value_times = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, times=value_times)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('temperature').size == len(times)
        assert (nc.variables.get('temperature')[:] == np.asarray(values[0:6])).all()
示例#4
0
    def test_instrumnet_metadata_variable(self):
        filename = 'test_timeseries.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None

        gats = copy(self.global_attributes)
        gats['naming_authority'] = 'pyaxiom'
        gats['geospatial_bounds_vertical_crs'] = 'NAVD88'

        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=gats,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, create_instrument_variable=True, sensor_vertical_datum='bar')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.geospatial_bounds_vertical_crs == 'NAVD88'  # First one set

        datavar = nc.variables.get('temperature')
        instrument_var_name = datavar.instrument
        instvar = nc.variables[instrument_var_name]
        assert instvar.short_name == 'sea_water_temperature'
        assert instvar.ioos_code == urnify(gats['naming_authority'], gats['id'], attrs)
示例#5
0
    def test_extracting_dataframe_some_masked_heights(self):
        filename = 'test_extracting_dataframe_some_masked_heights.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [-9999.9, 7.8, 7.9]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals,
                        vertical_fill=-9999.9)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z')[:].dtype == np.float64
        assert np.allclose(nc.variables.get('z')[:], np.ma.array([np.nan, 7.8, 7.9], mask=[1, 0, 0]))
        assert nc.variables.get('temperature').size == len(times) * len(verticals)

        df = get_dataframe_from_variable(nc, nc.variables.get('temperature'))
        assert not df['depth'].dropna().empty
示例#6
0
    def test_history_append_to_list(self):
        filename = 'test_history_append.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        gats = copy(self.global_attributes)

        gats['history'] = 'this is some history\nsome other history\nsome more'
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=gats,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        history = nc.history.split('\n')
        assert len(history) == 4
        assert history[0] == 'this is some history'
        assert history[1] == 'some other history'
        assert history[2] == 'some more'
        assert 'File created using pyaxiom' in history[3]
示例#7
0
    def test_station_name_as_urn_override_with_globals(self):
        filename = 'test_station_name_as_urn_override_with_globals.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        gats = copy(self.global_attributes)
        gats['title'] = "My Title Override"
        gats['summary'] = "My Summary Override"

        urn = 'urn:ioos:station:myauthority:mylabel'

        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=urn,
                        global_attributes=gats,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables['platform'].ioos_code == urn
        assert nc.variables['platform'].short_name == gats['title']
        assert nc.variables['platform'].long_name == gats['summary']
示例#8
0
    def test_timeseries_profile(self):
        filename = 'test_timeseries_profile.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z').positive == 'down'
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
示例#9
0
    def test_timeseries_profile_with_bottom_temperature(self):
        filename = 'test_timeseries_profile_with_bottom_temperature.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        bottom_values = [30, 31, 32, 33, 34, 35]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.add_variable('bottom_temperature', values=bottom_values, verticals=[60], unlink_from_profile=True, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert nc.variables.get('sensor_depth') is not None
        assert nc.variables.get('bottom_temperature').size == len(times)

        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
        assert (nc.variables.get('bottom_temperature')[:] == np.asarray(bottom_values)).all()
示例#10
0
    def test_timeseries_profile_fill_value_in_z(self):
        filename = 'test_timeseries_profile_fill_value_in_z.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        # Vertical fills MUST be at the BEGINNING of the array!!!!
        verticals = [self.fillvalue, 0]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [
            self.fillvalue, 20, self.fillvalue, 21, self.fillvalue, 22,
            self.fillvalue, 23, self.fillvalue, 24, self.fillvalue, 25
        ]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature',
                        values=values,
                        attributes=attrs,
                        fillvalue=self.fillvalue)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '0')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 0)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z')[:].dtype == np.float64
        assert nc.variables.get(
            'temperature').size == len(times) * len(verticals)

        assert nc.variables.get('temperature')[:][0][1] == 20
        assert nc.variables.get('temperature')[:].mask[0][0] == True  # noqa

        assert nc.variables.get('temperature')[:][1][1] == 21
        assert nc.variables.get('temperature')[:].mask[1][0] == True  # noqa

        assert nc.variables.get('temperature')[:][2][1] == 22
        assert nc.variables.get('temperature')[:].mask[2][0] == True  # noqa

        assert nc.variables.get('temperature')[:][3][1] == 23
        assert nc.variables.get('temperature')[:].mask[3][0] == True  # noqa

        assert nc.variables.get('temperature')[:][4][1] == 24
        assert nc.variables.get('temperature')[:].mask[4][0] == True  # noqa

        assert nc.variables.get('temperature')[:][5][1] == 25
        assert nc.variables.get('temperature')[:].mask[5][0] == True  # noqa

        assert (
            nc.variables.get('temperature')[:] == np.asarray(values).reshape(
                (len(times), len(verticals)))).all()
示例#11
0
    def test_timeseries_many_variables(self):
        filename = 'test_timeseries_many_variables.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        bottom_values = [30, 31, 32, 33, 34, 35]
        full_masked = values.view(np.ma.MaskedArray)
        full_masked.mask = True
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature',        values=values, attributes=attrs)
        ts.add_variable('salinity',           values=values.reshape((len(times), len(verticals))))
        ts.add_variable('dissolved_oxygen',   values=full_masked, fillvalue=full_masked.fill_value)
        ts.add_variable('bottom_temperature', values=bottom_values, verticals=[60], unlink_from_profile=True, attributes=attrs)
        ts.close()

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None
        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
        assert (nc.variables.get('salinity')[:] == values.reshape((len(times), len(verticals)))).all()
        assert nc.variables.get('dissolved_oxygen')[:].mask.all()
示例#12
0
    def test_timeseries_profile_fill_value_in_z(self):
        filename = 'test_timeseries_profile_fill_value_in_z.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        # Vertical fills MUST be at the BEGINNING of the array!!!!
        verticals = [self.fillvalue, 0]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [self.fillvalue, 20, self.fillvalue, 21, self.fillvalue, 22, self.fillvalue, 23, self.fillvalue, 24, self.fillvalue, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, fillvalue=self.fillvalue)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '0')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 0)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)

        assert nc.variables.get('temperature')[:][0][1] == 20
        assert nc.variables.get('temperature')[:].mask[0][0] == True

        assert nc.variables.get('temperature')[:][1][1] == 21
        assert nc.variables.get('temperature')[:].mask[1][0] == True

        assert nc.variables.get('temperature')[:][2][1] == 22
        assert nc.variables.get('temperature')[:].mask[2][0] == True

        assert nc.variables.get('temperature')[:][3][1] == 23
        assert nc.variables.get('temperature')[:].mask[3][0] == True

        assert nc.variables.get('temperature')[:][4][1] == 24
        assert nc.variables.get('temperature')[:].mask[4][0] == True

        assert nc.variables.get('temperature')[:][5][1] == 25
        assert nc.variables.get('temperature')[:].mask[5][0] == True

        assert (nc.variables.get('temperature')[:] == np.asarray(values).reshape((len(times), len(verticals)))).all()
示例#13
0
    def test_timeseries_profile_unsorted_time_and_z(self):
        filename = 'test_timeseries_profile_unsorted_time_and_z.nc'
        times = [5000, 1000, 2000, 3000, 4000, 0]
        verticals = [0, 50]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature',
                        values=values,
                        attributes=attrs,
                        fillvalue=self.fillvalue)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '50')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 50)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z')[:].dtype == np.int32
        assert nc.variables.get(
            'temperature').size == len(times) * len(verticals)

        assert nc.variables.get('temperature')[:][0][0] == 25
        assert nc.variables.get('temperature')[:][0][1] == 25
        assert nc.variables.get('temperature')[:][1][0] == 21
        assert nc.variables.get('temperature')[:][1][1] == 21
        assert nc.variables.get('temperature')[:][2][0] == 22
        assert nc.variables.get('temperature')[:][2][1] == 22
        assert nc.variables.get('temperature')[:][3][0] == 23
        assert nc.variables.get('temperature')[:][3][1] == 23
        assert nc.variables.get('temperature')[:][4][0] == 24
        assert nc.variables.get('temperature')[:][4][1] == 24
        assert nc.variables.get('temperature')[:][5][0] == 20
        assert nc.variables.get('temperature')[:][5][1] == 20
示例#14
0
    def test_timeseries_profile_extra_values(self):
        """
        This will map directly to the time variable and ignore any time indexes
        that are not found.  The 'times' parameter to add_variable should be
        the same length as the values parameter.
        """
        filename = 'test_timeseries_profile_extra_values.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25, 26, 27, 28],
                           len(verticals))
        new_times = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
        values_times = np.repeat(new_times, len(verticals))
        values_verticals = np.repeat(verticals, len(new_times))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature',
                        values=values,
                        attributes=attrs,
                        times=values_times,
                        verticals=values_verticals)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get(
            'temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == np.repeat(
            [20, 21, 22, 23, 24, 25], len(verticals)).reshape(
                (len(times), len(verticals)))).all()
示例#15
0
    def test_extracting_dataframe_ordered_masked_heights(self):
        filename = 'test_extracting_dataframe_ordered_masked_heights.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [np.nan, 7.8]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals,
                        vertical_fill=np.nan)

        values = np.asarray([[20, 21], [22, 23], [24, 25], [30, 31], [32, 33],
                             [34, 35]])
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z')[:].dtype == np.float64

        # The height order is sorted!
        assert np.allclose(
            nc.variables.get('z')[:], np.ma.array([7.8, np.nan], mask=[0, 1]))
        assert nc.variables.get(
            'temperature').size == len(times) * len(verticals)

        # Be sure the values are re-arranged because the height order is sorted!
        assert np.isclose(nc.variables.get('temperature')[:][0][0], 21)
        assert np.isclose(nc.variables.get('temperature')[:][1][0], 23)
        assert np.isclose(nc.variables.get('temperature')[:][2][0], 25)
        assert np.isclose(nc.variables.get('temperature')[:][3][0], 31)
        assert np.isclose(nc.variables.get('temperature')[:][4][0], 33)
        assert np.isclose(nc.variables.get('temperature')[:][5][0], 35)

        df = get_dataframe_from_variable(nc, nc.variables.get('temperature'))
        assert not df['depth'].dropna().empty
示例#16
0
    def test_from_variable(self):

        filename = 'test_urn_from_variable.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     vertical_datum='NAVD88')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#vertical_datum=navd88'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(
            standard_name='lwe_thickness_of_precipitation_amount',
            cell_methods=
            'time: variance (interval: PT1H comment: sampled instantaneously)')
        ts.add_variable('temperature2', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature2'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:variance;interval=pt1h'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(
            standard_name='lwe_thickness_of_precipitation_amount',
            cell_methods=
            'time: variance time: mean (interval: PT1H comment: sampled instantaneously)'
        )
        ts.add_variable('temperature3', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature3'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:mean,time:variance;interval=pt1h'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(
            standard_name='lwe_thickness_of_precipitation_amount',
            cell_methods=
            'time: variance time: mean (interval: PT1H comment: sampled instantaneously)',
            discriminant='2')
        ts.add_variable('temperature4', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature4'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount-2#cell_methods=time:mean,time:variance;interval=pt1h'
示例#17
0
    def test_timeseries_extra_values(self):
        """
        This will map directly to the time variable and ignore any time indexes
        that are not found.  The 'times' parameter to add_variable should be
        the same length as the values parameter.
        """
        filename = 'test_timeseries_extra_values.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25, 26, 27, 28]
        value_times = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature',
                        values=values,
                        attributes=attrs,
                        times=value_times)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '0')

        # No verticals, so these were not set
        with self.assertRaises(AttributeError):
            nc.geospatial_vertical_min
        with self.assertRaises(AttributeError):
            nc.geospatial_vertical_max

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('temperature').size == len(times)
        assert (nc.variables.get('temperature')[:] == np.asarray(
            values[0:6])).all()
示例#18
0
    def test_timeseries_profile_unsorted_time_and_z(self):
        filename = 'test_timeseries_profile_unsorted_time_and_z.nc'
        times = [5000, 1000, 2000, 3000, 4000, 0]
        verticals = [0, 50]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, fillvalue=self.fillvalue)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '50')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 50)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)

        assert nc.variables.get('temperature')[:][0][0] == 25
        assert nc.variables.get('temperature')[:][0][1] == 25
        assert nc.variables.get('temperature')[:][1][0] == 21
        assert nc.variables.get('temperature')[:][1][1] == 21
        assert nc.variables.get('temperature')[:][2][0] == 22
        assert nc.variables.get('temperature')[:][2][1] == 22
        assert nc.variables.get('temperature')[:][3][0] == 23
        assert nc.variables.get('temperature')[:][3][1] == 23
        assert nc.variables.get('temperature')[:][4][0] == 24
        assert nc.variables.get('temperature')[:][4][1] == 24
        assert nc.variables.get('temperature')[:][5][0] == 20
        assert nc.variables.get('temperature')[:][5][1] == 20
示例#19
0
    def test_timeseries_profile(self):
        filename = 'test_timeseries_profile.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        # Basic metadata on all timeseries
        self.assertEqual(nc.cdm_data_type, 'Station')
        self.assertEqual(nc.geospatial_lat_units, 'degrees_north')
        self.assertEqual(nc.geospatial_lon_units, 'degrees_east')
        self.assertEqual(nc.geospatial_vertical_units, 'meters')
        self.assertEqual(nc.geospatial_vertical_positive, 'down')
        self.assertEqual(nc.featureType, 'timeSeriesProfile')
        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z').positive == 'down'
        assert nc.variables.get('z')[:].dtype == np.int32
        assert nc.variables.get(
            'temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape(
            (len(times), len(verticals)))).all()
示例#20
0
    def test_timeseries(self):
        filename = 'test_timeseries.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        # Basic metadata on all timeseries
        self.assertEqual(nc.cdm_data_type, 'Station')
        self.assertEqual(nc.geospatial_lat_units, 'degrees_north')
        self.assertEqual(nc.geospatial_lon_units, 'degrees_east')
        self.assertEqual(nc.geospatial_vertical_units, 'meters')
        self.assertEqual(nc.geospatial_vertical_positive, 'down')
        self.assertEqual(nc.featureType, 'timeSeries')
        self.assertEqual(nc.geospatial_vertical_resolution, '0')

        # No verticals, so these were not set
        with self.assertRaises(AttributeError):
            nc.geospatial_vertical_min
        with self.assertRaises(AttributeError):
            nc.geospatial_vertical_max

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('temperature').size == len(times)
        assert (nc.variables.get('temperature')[:] == np.asarray(values)).all()
示例#21
0
    def test_from_variable(self):

        filename = 'test_urn_from_variable.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     vertical_datum='NAVD88')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#vertical_datum=navd88'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     cell_methods='time: variance (interval: PT1H comment: sampled instantaneously)')
        ts.add_variable('temperature2', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature2'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:variance;interval=pt1h'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     cell_methods='time: variance time: mean (interval: PT1H comment: sampled instantaneously)')
        ts.add_variable('temperature3', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature3'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:mean,time:variance;interval=pt1h'

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='lwe_thickness_of_precipitation_amount',
                     cell_methods='time: variance time: mean (interval: PT1H comment: sampled instantaneously)',
                     discriminant='2')
        ts.add_variable('temperature4', values=values, attributes=attrs)
        ts.ncd.sync()
        urn = urnify('axiom', 'foo', ts.ncd.variables['temperature4'])
        assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount-2#cell_methods=time:mean,time:variance;interval=pt1h'

        ts.close()
示例#22
0
    def test_timeseries(self):
        filename = 'test_timeseries.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = None
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        # Basic metadata on all timeseries
        self.assertEqual(nc.cdm_data_type, 'Station')
        self.assertEqual(nc.geospatial_lat_units, 'degrees_north')
        self.assertEqual(nc.geospatial_lon_units, 'degrees_east')
        self.assertEqual(nc.geospatial_vertical_units, 'meters')
        self.assertEqual(nc.geospatial_vertical_positive, 'down')
        self.assertEqual(nc.featureType, 'timeSeries')
        self.assertEqual(nc.geospatial_vertical_resolution, '0')

        # No verticals, so these were not set
        with self.assertRaises(AttributeError):
            nc.geospatial_vertical_min
        with self.assertRaises(AttributeError):
            nc.geospatial_vertical_max

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('temperature').size == len(times)
        assert (nc.variables.get('temperature')[:] == np.asarray(values)).all()
示例#23
0
    def test_timeseries_many_variables(self):
        filename = 'test_timeseries_many_variables.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        bottom_values = [30, 31, 32, 33, 34, 35]
        full_masked = values.view(np.ma.MaskedArray)
        full_masked.mask = True
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)
        ts.add_variable('salinity',
                        values=values.reshape((len(times), len(verticals))))
        ts.add_variable('dissolved_oxygen',
                        values=full_masked,
                        fillvalue=full_masked.fill_value)
        ts.add_variable('bottom_temperature',
                        values=bottom_values,
                        verticals=[60],
                        unlink_from_profile=True,
                        attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('time')[:].dtype == np.int32
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z')[:].dtype == np.int32
        assert nc.variables.get(
            'temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape(
            (len(times), len(verticals)))).all()
        assert (nc.variables.get('salinity')[:] == values.reshape(
            (len(times), len(verticals)))).all()
        assert nc.variables.get('dissolved_oxygen')[:].mask.all()
示例#24
0
    def test_timeseries_profile_extra_values(self):
        """
        This will map directly to the time variable and ignore any time indexes
        that are not found.  The 'times' parameter to add_variable should be
        the same length as the values parameter.
        """
        filename = 'test_timeseries_profile_extra_values.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25, 26, 27, 28], len(verticals))
        new_times = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]
        values_times = np.repeat(new_times, len(verticals))
        values_verticals = np.repeat(verticals, len(new_times))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs, times=values_times, verticals=values_verticals)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == np.repeat([20, 21, 22, 23, 24, 25], len(verticals)).reshape((len(times), len(verticals)))).all()
示例#25
0
    def test_timeseries_profile(self):
        filename = 'test_timeseries_profile.nc'
        times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0, 1, 2]
        ts = TimeSeries(output_directory=self.output_directory,
                        latitude=self.latitude,
                        longitude=self.longitude,
                        station_name=self.station_name,
                        global_attributes=self.global_attributes,
                        output_filename=filename,
                        times=times,
                        verticals=verticals)

        values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals))
        attrs = dict(standard_name='sea_water_temperature')
        ts.add_variable('temperature', values=values, attributes=attrs)

        nc = netCDF4.Dataset(os.path.join(self.output_directory, filename))
        assert nc is not None

        # Basic metadata on all timeseries
        self.assertEqual(nc.cdm_data_type, 'Station')
        self.assertEqual(nc.geospatial_lat_units, 'degrees_north')
        self.assertEqual(nc.geospatial_lon_units, 'degrees_east')
        self.assertEqual(nc.geospatial_vertical_units, 'meters')
        self.assertEqual(nc.geospatial_vertical_positive, 'down')
        self.assertEqual(nc.featureType, 'timeSeriesProfile')
        self.assertEqual(nc.geospatial_vertical_resolution, '1 1')
        self.assertEqual(nc.geospatial_vertical_min, 0)
        self.assertEqual(nc.geospatial_vertical_max, 2)

        assert nc.variables.get('time').size == len(times)
        assert nc.variables.get('z').size == len(verticals)
        assert nc.variables.get('z').positive == 'down'
        assert nc.variables.get('temperature').size == len(times) * len(verticals)
        assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
示例#26
0
def main(output, download_folder, do_download, projects, csv_metadata_file, filesubset=None):
    project_metadata = dict()
    with open(csv_metadata_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row['project_name']
            if isinstance(project_name, str) and project_name[0] == '#':
                continue
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata, filesubset)
        except KeyboardInterrupt:
            logger.exception('Error downloading datasets from THREDDS')
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, "*"))

    for down_file in downloaded_files:

        if filesubset is not None:
            if os.path.basename(down_file).lower() not in filesubset:
                # aka "9631ecp-a.nc"
                # Skip this file!
                continue

        if projects:
            tmpnc = netCDF4.Dataset(down_file)
            project_name, _ = tmpnc.id.split("/")
            nc_close(tmpnc)
            if project_name.lower() not in projects:
                # Skip this project!
                continue

        _, temp_file = tempfile.mkstemp(prefix='cmg_collector', suffix='nc')
        shutil.copy(down_file, temp_file)

        nc = None
        try:
            # Cleanup to CF-1.6
            first_time = normalize_time(temp_file)
            normalize_epic_codes(temp_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            mooring_id   = None
            latitude     = None
            longitude    = None

            nc = netCDF4.Dataset(temp_file)

            project_name, _ = nc.id.split("/")
            feature_name, _ = os.path.splitext(os.path.basename(down_file))

            fname = os.path.basename(down_file)
            try:
                if int(fname[0]) <= 9 and int(fname[0]) >= 2:
                    # 1.) everything with first char between 2-9 is 3-digit
                    mooring_id = int(fname[0:3])
                elif int(fname[0]) == 1:
                    # 2.) if MOORING starts with 1, and data is newer than 2014, it's 4 digit, otherwise 3 digit.
                    if first_time > datetime(2014, 1, 1, 0):
                        # 4 digit if after Jan 1, 2014
                        mooring_id = int(fname[0:4])
                    else:
                        # 3 digit if before
                        mooring_id = int(fname[0:3])
            except ValueError:
                logger.exception("Could not create a suitable station_id. Skipping {0}.".format(down_file))
                continue

            try:
                latitude  = nc.variables.get("lat")[0]
                longitude = nc.variables.get("lon")[0]
            except IndexError:
                latitude  = nc.variables.get("lat")[:]
                longitude = nc.variables.get("lon")[:]

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info("Translating {0} into CF1.6 format: {1}".format(down_file, os.path.abspath(os.path.join(output_directory, file_name))))

            if not os.path.isdir(output_directory):
                os.makedirs(output_directory)

            file_global_attributes = { k : getattr(nc, k) for k in nc.ncattrs() }
            file_global_attributes.update(global_attributes)
            file_global_attributes['id'] = feature_name
            file_global_attributes['title'] = '{0} - {1}'.format(project_name, os.path.basename(down_file))
            file_global_attributes['MOORING'] = mooring_id
            file_global_attributes['original_filename'] = fname
            file_global_attributes['original_folder'] = project_name
            if project_name in project_metadata:
                for k, v in project_metadata[project_name].items():
                    if v and k.lower() not in ['id', 'title', 'catalog_xml', 'project_name']:
                        file_global_attributes[k] = v

            times  = nc.variables.get('time')[:]

            # Get all depth values
            depth_variables = []
            for dv in nc.variables:
                depth_variables += [ x for x in nc.variables.get(dv).dimensions if 'depth' in x ]
            depth_variables = sorted(list(set(depth_variables)))
            depth_values = np.asarray([ nc.variables.get(x)[:] for x in depth_variables ]).flatten()

            # Convert everything to positive up, unless it is specifically specified as "up" already
            depth_conversion = -1.0
            if depth_variables:
                pull_positive = nc.variables.get(depth_variables[0])
                if pull_positive and hasattr(pull_positive, 'positive') and pull_positive.positive.lower() == 'up':
                    depth_conversion = 1.0

            depth_values = depth_values * depth_conversion
            ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=depth_values, output_filename=file_name, vertical_positive='up')

            v = []
            for other in sorted(nc.variables):  # Sorted for a reason... don't change!
                if other in coord_vars:
                    continue

                old_var = nc.variables.get(other)
                variable_attributes = { k : getattr(old_var, k) for k in old_var.ncattrs() }
                # Remove/rename some attributes
                # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67
                if 'valid_range' in variable_attributes:
                    del variable_attributes['valid_range']
                if 'minimum' in variable_attributes:
                    variable_attributes['actual_min'] = variable_attributes['minimum']
                    del variable_attributes['minimum']
                if 'maximum' in variable_attributes:
                    variable_attributes['actual_max'] = variable_attributes['maximum']
                    del variable_attributes['maximum']
                if 'sensor_depth' in variable_attributes:
                    # Convert to the correct positive "up" or "down"
                    variable_attributes['sensor_depth'] = variable_attributes['sensor_depth'] * depth_conversion

                fillvalue = None
                if hasattr(old_var, "_FillValue"):
                    fillvalue = old_var._FillValue

                # Figure out if this is a variable that is repeated at different depths
                # as different variable names.   Assumes sorted.
                new_var_name = other.split('_')[0]
                if new_var_name in ts.ncd.variables:
                    # Already in new file (processed when the first was encountered in the loop below)
                    continue

                # Get the depth index
                depth_variable = [ x for x in old_var.dimensions if 'depth' in x ]
                if depth_variable and len(old_var.dimensions) > 1 and 'time' in old_var.dimensions:
                    depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(depth_variable[0])[:] * depth_conversion)))

                    # Find other variable names like this one
                    depth_indexes = [(other, depth_index)]
                    for search_var in sorted(nc.variables):
                        # If they have different depth dimension names we need to combine them into one variable
                        if search_var != other and search_var.split('_')[0] == new_var_name and \
                           depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]:
                            # Found a match at a different depth
                            search_depth_variable = [ x for x in nc.variables.get(search_var).dimensions if 'depth' in x ]
                            depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(search_depth_variable[0])[:] * depth_conversion)))
                            depth_indexes.append((search_var, depth_index))
                            logger.info("Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)".format(search_var, other, new_var_name))

                    values = np.ma.empty((times.size, len(depth_values)))
                    values.fill_value = fillvalue
                    values.mask = True
                    for nm, index in depth_indexes:
                        values[:, index] = np.squeeze(nc.variables.get(nm)[:])

                    # If we just have one index we want to use the original name
                    if len(depth_indexes) == 1:
                        # Just use the original variable name
                        new_var_name = other

                    # Create this one, should be the first we encounter for this type
                    ts.add_variable(new_var_name, values=values, times=times, fillvalue=fillvalue, attributes=variable_attributes)
                elif len(old_var.dimensions) == 1 and old_var.dimensions[0] == 'time':
                    # A single time dimensioned variable, like pitch, roll, record count, etc.
                    ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                elif depth_variable and 'time' not in old_var.dimensions:
                    # Metadata variable like bin distance
                    meta_var = ts.ncd.createVariable(other, old_var.dtype, ('z',), fill_value=fillvalue)
                    for k, v in variable_attributes.iteritems():
                        if k != '_FillValue':
                            meta_var.setncattr(k, v)

                    meta_var[:] = old_var[:]
                elif depth_values.size == 1 and not depth_variable and 'time' in old_var.dimensions:
                    # There is a single depth_value for most variables, but this one does not have a depth dimension
                    # Instead, it has a sensor_depth attribute that defines the Z index.  These need to be put into
                    # a different file to remain CF compliant.
                    new_file_name = file_name.replace('.nc', '_{}.nc'.format(other))
                    new_ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=[old_var.sensor_depth*depth_conversion], output_filename=new_file_name, vertical_positive='up')
                    new_ts.add_variable(other, values=old_var[:], times=times, verticals=[old_var.sensor_depth*depth_conversion], fillvalue=fillvalue, attributes=variable_attributes)
                    new_ts.close()
                elif depth_values.size > 1 and not depth_variable and 'time' in old_var.dimensions:
                    if hasattr(old_var, 'sensor_depth'):
                        # An ADCP or profiling dataset, but this variable is measued at a single depth.
                        # Example: Bottom Temperature on an ADCP
                        ts.add_variable(other, values=old_var[:], times=times, verticals=[old_var.sensor_depth*depth_conversion], unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                    else:
                        ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                else:
                    ts.add_variable(other, values=old_var[:], times=times, fillvalue=fillvalue, attributes=variable_attributes)

                ts.ncd.sync()
            ts.ncd.close()

        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            nc_close(nc)
            if os.path.isfile(temp_file):
                os.remove(temp_file)
示例#27
0
def main(output, download_folder, do_download, projects, csv_metadata_file, filesubset=None):
    project_metadata = dict()
    with open(csv_metadata_file, "r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row["project_name"]
            if isinstance(project_name, str) and project_name[0] == "#":
                continue
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata, filesubset)
        except KeyboardInterrupt:
            logger.exception("Error downloading datasets from THREDDS")
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, "**", "*"))

    for down_file in sorted(downloaded_files):

        _, temp_file = tempfile.mkstemp(prefix="cmg_collector", suffix="nc")
        try:

            if filesubset is not None:
                if os.path.basename(down_file).lower() not in filesubset:
                    # aka "9631ecp-a.nc"
                    # Skip this file!
                    continue

            project_name = os.path.basename(os.path.dirname(down_file))
            if projects:
                if project_name.lower() not in projects:
                    # Skip this project!
                    continue
            shutil.copy(down_file, temp_file)

            # Cleanup to CF-1.6
            try:
                first_time = normalize_time(temp_file)
            except (TypeError, ValueError, IndexError):
                logger.error("Could not normalize the time variable. Skipping {0}.".format(down_file))
                continue
            except OverflowError:
                logger.error("Dates out of range. Skipping {0}.".format(down_file))
                continue

            normalize_epic_codes(temp_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            mooring_id = None
            latitude = None
            longitude = None

            fname = os.path.basename(down_file)
            feature_name, file_ext = os.path.splitext(os.path.basename(down_file))
            try:
                if int(fname[0]) <= 9 and int(fname[0]) >= 2:
                    # 1.) everything with first char between 2-9 is 3-digit
                    mooring_id = int(fname[0:3])
                elif int(fname[0]) == 1:
                    # 2.) if MOORING starts with 1, and data is newer than 2014, it's 4 digit, otherwise 3 digit.
                    if first_time > datetime(2014, 1, 1, 0):
                        # 4 digit if after Jan 1, 2014
                        mooring_id = int(fname[0:4])
                    else:
                        # 3 digit if before
                        mooring_id = int(fname[0:3])
            except ValueError:
                logger.exception("Could not create a suitable station_id. Skipping {0}.".format(down_file))
                continue

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info(
                "Translating {0} into CF1.6 format: {1}".format(
                    down_file, os.path.abspath(os.path.join(output_directory, file_name))
                )
            )

            with EnhancedDataset(temp_file) as nc:

                try:
                    latitude = nc.variables.get("lat")[0]
                    longitude = nc.variables.get("lon")[0]
                except IndexError:
                    latitude = nc.variables.get("lat")[:]
                    longitude = nc.variables.get("lon")[:]
                except TypeError:
                    logger.error("Could not find lat/lon variables. Skipping {0}.".format(down_file))

                file_global_attributes = {k: getattr(nc, k) for k in nc.ncattrs()}
                file_global_attributes.update(global_attributes)
                file_global_attributes["id"] = feature_name
                file_global_attributes["title"] = os.path.basename(down_file)
                file_global_attributes["description"] = "{0} - {1}".format(project_name, os.path.basename(down_file))
                file_global_attributes["MOORING"] = mooring_id
                file_global_attributes["original_filename"] = fname
                file_global_attributes["original_folder"] = project_name
                if project_name in project_metadata:
                    for k, v in project_metadata[project_name].items():
                        if v and k.lower() not in ["id", "title", "catalog_xml", "project_name"]:
                            file_global_attributes[k] = v

                times = nc.variables.get("time")[:]

                # Get all depth values
                depth_variables = []
                for dv in nc.variables:
                    depth_variables += [x for x in nc.variables.get(dv).dimensions if "depth" in x]
                depth_variables = sorted(list(set(depth_variables)))

                try:
                    assert depth_variables
                    depth_values = np.asarray([nc.variables.get(x)[:] for x in depth_variables]).flatten()
                except (AssertionError, TypeError):
                    logger.warning("No depth variables found in {}, skipping.".format(down_file))
                    continue

                # Convert everything to positive up, unless it is specifically specified as "up" already
                depth_conversion = -1.0
                if depth_variables:
                    pull_positive = nc.variables.get(depth_variables[0])
                    if hasattr(pull_positive, "positive") and pull_positive.positive.lower() == "up":
                        depth_conversion = 1.0
                depth_values = depth_values * depth_conversion

                if not os.path.isdir(output_directory):
                    os.makedirs(output_directory)
                ts = TimeSeries(
                    output_directory,
                    latitude,
                    longitude,
                    feature_name,
                    file_global_attributes,
                    times=times,
                    verticals=depth_values,
                    output_filename=file_name,
                    vertical_positive="up",
                )

                # Set the platform type from the global attribute 'platform_type', defaulting to 'fixed'
                with EnhancedDataset(ts.out_file, "a") as onc:
                    platform_type = getattr(onc, "platform_type", "fixed").lower()
                    onc.variables["platform"].setncattr("type", platform_type)
                    onc.variables["platform"].setncattr("nodc_name", "FIXED PLATFORM, MOORINGS")

                v = []
                depth_files = []
                for other in sorted(nc.variables):  # Sorted for a reason... don't change!
                    try:
                        if other in coord_vars:
                            continue

                        old_var = nc.variables.get(other)
                        variable_attributes = {k: getattr(old_var, k) for k in old_var.ncattrs()}
                        # Remove/rename some attributes
                        # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67
                        if "valid_range" in variable_attributes:
                            del variable_attributes["valid_range"]
                        if "minimum" in variable_attributes:
                            variable_attributes["actual_min"] = variable_attributes["minimum"]
                            del variable_attributes["minimum"]
                        if "maximum" in variable_attributes:
                            variable_attributes["actual_max"] = variable_attributes["maximum"]
                            del variable_attributes["maximum"]
                        if "sensor_depth" in variable_attributes:
                            # Convert to the correct positive "up" or "down"
                            variable_attributes["sensor_depth"] = variable_attributes["sensor_depth"] * depth_conversion

                        fillvalue = None
                        if hasattr(old_var, "_FillValue"):
                            fillvalue = old_var._FillValue

                        # Figure out if this is a variable that is repeated at different depths
                        # as different variable names.   Assumes sorted.
                        new_var_name = other.split("_")[0]
                        if new_var_name in ts.ncd.variables:
                            # Already in new file (processed when the first was encountered in the loop below)
                            continue

                        # Get the depth index
                        depth_variable = [x for x in old_var.dimensions if "depth" in x]
                        if depth_variable and len(old_var.dimensions) > 1 and "time" in old_var.dimensions:
                            depth_index = np.squeeze(
                                np.where(depth_values == (nc.variables.get(depth_variable[0])[:] * depth_conversion))
                            )

                            # Find other variable names like this one
                            depth_indexes = [(other, depth_index)]
                            for search_var in sorted(nc.variables):
                                # If they have different depth dimension names we need to combine them into one variable
                                if (
                                    search_var != other
                                    and search_var.split("_")[0] == new_var_name
                                    and depth_variable[0]
                                    != [x for x in nc.variables[search_var].dimensions if "depth" in x][0]
                                ):
                                    # Found a match at a different depth
                                    search_depth_variable = [
                                        x for x in nc.variables.get(search_var).dimensions if "depth" in x
                                    ]
                                    depth_index = np.squeeze(
                                        np.where(
                                            depth_values
                                            == (nc.variables.get(search_depth_variable[0])[:] * depth_conversion)
                                        )
                                    )
                                    depth_indexes.append((search_var, depth_index))
                                    logger.info(
                                        "Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)".format(
                                            search_var, other, new_var_name
                                        )
                                    )

                            values = np.ma.empty((times.size, len(depth_values)), dtype=old_var.dtype)
                            values.fill_value = fillvalue
                            values.mask = True
                            inconsistent = False
                            for nm, index in depth_indexes:
                                try:
                                    values[:, index] = np.squeeze(nc.variables.get(nm)[:])
                                except ValueError:
                                    inconsistent = True
                                    break

                            # If we just have one index we want to use the original name
                            if len(depth_indexes) == 1:
                                # Just use the original variable name
                                new_var_name = other

                            if inconsistent is True:
                                # Incorrect array size, most likely a strange variable
                                ts.add_variable_object(old_var, dimension_map=dict(depth="z"), reduce_dims=True)
                            else:
                                # Create this one, should be the first we encounter for this type
                                ts.add_variable(
                                    new_var_name,
                                    values=values,
                                    times=times,
                                    fillvalue=fillvalue,
                                    attributes=variable_attributes,
                                )

                        elif len(old_var.dimensions) == 1 and old_var.dimensions[0] == "time":
                            # A single time dimensioned variable, like pitch, roll, record count, etc.
                            ts.add_variable(
                                other,
                                values=old_var[:],
                                times=times,
                                unlink_from_profile=True,
                                fillvalue=fillvalue,
                                attributes=variable_attributes,
                            )
                        elif (
                            old_var.ndim <= 3
                            and hasattr(old_var, "sensor_depth")
                            and (
                                (depth_values.size == 1 and not depth_variable and "time" in old_var.dimensions)
                                or (
                                    depth_values.size > 1
                                    and not depth_variable
                                    and "time" in old_var.dimensions
                                    and "sensor_depth" in ts.ncd.variables
                                )
                            )
                        ):

                            if "sensor_depth" in ts.ncd.variables and np.isclose(
                                ts.ncd.variables["sensor_depth"][:], old_var.sensor_depth * depth_conversion
                            ):
                                ts.add_variable(
                                    other,
                                    values=old_var[:],
                                    times=times,
                                    unlink_from_profile=True,
                                    verticals=[old_var.sensor_depth * depth_conversion],
                                    fillvalue=fillvalue,
                                    attributes=variable_attributes,
                                )
                            else:
                                # Search through secondary files that have been created for detached variables at a certain depth and
                                # try to match this variable with one of the depths.
                                found_df = False
                                for dfts in depth_files:
                                    if isinstance(old_var.sensor_depth, np.ndarray):
                                        # Well, this is a bad file.
                                        raise ValueError(
                                            "The sensor_depth attribute has more than one value, please fix the source NetCDF: {}".format(
                                                down_file
                                            )
                                        )
                                    if np.isclose(
                                        dfts.ncd.variables[ts.vertical_axis_name][:],
                                        old_var.sensor_depth * depth_conversion,
                                    ):
                                        dfts.add_variable(
                                            other,
                                            values=old_var[:],
                                            times=times,
                                            unlink_from_profile=True,
                                            verticals=[old_var.sensor_depth * depth_conversion],
                                            fillvalue=fillvalue,
                                            attributes=variable_attributes,
                                        )
                                        found_df = True
                                        break

                                # If we couldn't match the current or one of the existing secondary depth files, create a new one.
                                if found_df is False:
                                    new_file_name = file_name.replace(
                                        file_ext, "_z{}{}".format(len(depth_files) + 1, file_ext)
                                    )
                                    fga = copy(file_global_attributes)
                                    fga["id"] = os.path.splitext(new_file_name)[0]
                                    fga["title"] = "{0} - {1}".format(os.path.basename(down_file), other)
                                    fga["description"] = "{0} - {1} - {2}".format(
                                        project_name, os.path.basename(down_file), other
                                    )
                                    new_ts = TimeSeries(
                                        output_directory,
                                        latitude,
                                        longitude,
                                        feature_name,
                                        fga,
                                        times=times,
                                        verticals=[old_var.sensor_depth * depth_conversion],
                                        output_filename=new_file_name,
                                        vertical_positive="up",
                                    )
                                    new_ts.add_variable(
                                        other,
                                        values=old_var[:],
                                        times=times,
                                        verticals=[old_var.sensor_depth * depth_conversion],
                                        fillvalue=fillvalue,
                                        attributes=variable_attributes,
                                    )
                                    depth_files.append(new_ts)
                        elif old_var.ndim <= 3 and (
                            depth_values.size > 1 and not depth_variable and "time" in old_var.dimensions
                        ):
                            if hasattr(old_var, "sensor_depth"):
                                # An ADCP or profiling dataset, but this variable is measued at a single depth.
                                # Example: Bottom Temperature on an ADCP
                                # Skip things with a dimension over 3 (some beam variables like `brange`)
                                ts.add_variable(
                                    other,
                                    values=old_var[:],
                                    times=times,
                                    unlink_from_profile=True,
                                    verticals=[old_var.sensor_depth * depth_conversion],
                                    fillvalue=fillvalue,
                                    attributes=variable_attributes,
                                )
                            else:
                                ts.add_variable(
                                    other,
                                    values=old_var[:],
                                    times=times,
                                    unlink_from_profile=True,
                                    fillvalue=fillvalue,
                                    attributes=variable_attributes,
                                )
                        else:
                            if "time" in old_var.dimensions and old_var.ndim <= 3:
                                ts.add_variable(
                                    other,
                                    values=old_var[:],
                                    times=times,
                                    fillvalue=fillvalue,
                                    attributes=variable_attributes,
                                )
                            else:
                                ts.add_variable_object(old_var, dimension_map=dict(depth="z"), reduce_dims=True)

                    except BaseException:
                        logger.exception("Error processing variable {0} in {1}. Skipping it.".format(other, down_file))
        except KeyboardInterrupt:
            logger.info("Breaking out of Translate loop!")
            break
        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            if os.path.isfile(temp_file):
                os.remove(temp_file)

# ### Add data variables

# In[10]:

df.columns.tolist()


# In[11]:

for c in df.columns:
    if c in ts._nc.variables:
        print("Skipping '{}' (already in file)".format(c))
        continue
    if c in ['time', 'lat', 'lon', 'depth', 'cpm_date_time_string']:
        print("Skipping axis '{}' (already in file)".format(c))
        continue
    print("Adding {}".format(c))
    try:
        ts.add_variable(c, df[c].values)
    except:
        print('skipping, hit object')
        


# In[ ]:



示例#29
0
def main(output, download_folder, do_download, projects, csv_metadata_file, filesubset=None, since=None):
    project_metadata = dict()
    with open(csv_metadata_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row['project_name']
            if isinstance(project_name, str) and project_name[0] == '#':
                continue
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata, filesubset, since)
        except KeyboardInterrupt:
            logger.exception('Error downloading datasets from THREDDS')
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, '**', '*'))
        if since is not None:
            def should_keep(d):
                modt = datetime.utcfromtimestamp(os.path.getmtime(d)).replace(tzinfo=pytz.utc)
                return modt >= since
            downloaded_files = [ dl for dl in downloaded_files if should_keep(dl) ]

    for down_file in sorted(downloaded_files):

        temp_fd, temp_file = tempfile.mkstemp(prefix='cmg_collector', suffix='nc')
        try:

            if filesubset is not None:
                if os.path.basename(down_file).lower() not in filesubset:
                    # aka "9631ecp-a.nc"
                    # Skip this file!
                    continue

            project_name = os.path.basename(os.path.dirname(down_file))
            if projects:
                if project_name.lower() not in projects:
                    # Skip this project!
                    continue
            shutil.copy(down_file, temp_file)

            # Cleanup to CF-1.6
            try:
                first_time = normalize_time(temp_file)
            except (TypeError, ValueError, IndexError):
                logger.exception("Could not normalize the time variable. Skipping {0}.".format(down_file))
                continue
            except OverflowError:
                logger.error("Dates out of range. Skipping {0}.".format(down_file))
                continue

            normalize_variable_attribute_types(temp_file)
            normalize_epic_codes(temp_file, down_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            mooring_id   = None
            latitude     = None
            longitude    = None

            fname = os.path.basename(down_file)
            feature_name, file_ext = os.path.splitext(os.path.basename(down_file))
            try:
                if int(fname[0]) <= 9 and int(fname[0]) >= 2:
                    # 1.) everything with first char between 2-9 is 3-digit
                    mooring_id = int(fname[0:3])
                elif int(fname[0]) == 1:
                    # 2.) if MOORING starts with 1, and data is newer than 2014, it's 4 digit, otherwise 3 digit.
                    if first_time > datetime(2014, 1, 1, 0):
                        # 4 digit if after Jan 1, 2014
                        mooring_id = int(fname[0:4])
                    else:
                        # 3 digit if before
                        mooring_id = int(fname[0:3])
            except ValueError:
                logger.exception("Could not create a suitable station_id. Skipping {0}.".format(down_file))
                continue

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info("Translating {0} into CF1.6 format: {1}".format(down_file, os.path.abspath(os.path.join(output_directory, file_name))))

            with EnhancedDataset(temp_file) as nc:

                try:
                    latitude  = nc.variables.get("lat")[0]
                    longitude = nc.variables.get("lon")[0]
                except IndexError:
                    latitude  = nc.variables.get("lat")[:]
                    longitude = nc.variables.get("lon")[:]
                except TypeError:
                    logger.error("Could not find lat/lon variables. Skipping {0}.".format(down_file))

                file_global_attributes = { k : getattr(nc, k) for k in nc.ncattrs() }
                file_global_attributes.update(global_attributes)
                file_global_attributes['id'] = feature_name
                file_global_attributes['MOORING'] = mooring_id
                file_global_attributes['original_filename'] = fname
                file_global_attributes['original_folder'] = project_name

                no_override = ['id', 'MOORING', 'original_filename', 'original_folder', 'catalog_xml', 'project_name']
                if project_name in project_metadata:
                    for k, v in project_metadata[project_name].items():
                        if v and k.lower() not in no_override:
                            file_global_attributes[k] = v

                if 'summary' in file_global_attributes:
                    # Save the original summary
                    file_global_attributes['WHOI_Buoy_Group_summary'] = file_global_attributes['summary']

                # Better title/summary for discovery via catalogs
                project_title = file_global_attributes.get('project_title', project_name).strip()
                project_summary = file_global_attributes.get('project_summary', '').strip()
                file_global_attributes['title'] = 'USGS-CMG time-series data: {0} - {1} - {2}'.format(project_name, mooring_id, feature_name)
                file_global_attributes['summary'] = 'USGS-CMG time-series data from the {} project, mooring {} and package {}. {}'.format(project_title, mooring_id, feature_name, project_summary).strip()

                times  = nc.variables.get('time')[:]

                # Get all depth values
                depth_variables = []
                for dv in nc.variables:
                    depth_variables += [ x for x in nc.variables.get(dv).dimensions if 'depth' in x ]
                depth_variables = sorted(list(set(depth_variables)))

                try:
                    assert depth_variables
                    depth_values = np.asarray([ nc.variables.get(x)[:] for x in depth_variables ]).flatten()
                except (AssertionError, TypeError):
                    logger.warning("No depth variables found in {}, skipping.".format(down_file))
                    continue

                # Convert everything to positive up, unless it is specifically specified as "up" already
                depth_conversion = -1.0
                if depth_variables:
                    pull_positive = nc.variables.get(depth_variables[0])
                    if hasattr(pull_positive, 'positive') and pull_positive.positive.lower() == 'up':
                        depth_conversion = 1.0
                depth_values = depth_values * depth_conversion

                if not os.path.isdir(output_directory):
                    os.makedirs(output_directory)
                ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=depth_values, output_filename=file_name, vertical_positive='up')

                # Set the platform type from the global attribute 'platform_type', defaulting to 'fixed'
                with EnhancedDataset(ts.out_file, 'a') as onc:
                    platform_type = getattr(onc, 'platform_type', 'fixed').lower()
                    onc.variables['platform'].setncattr('type', platform_type)
                    onc.variables['platform'].setncattr('nodc_name', "FIXED PLATFORM, MOORINGS")
                    # Add ERDDAP variables
                    onc.cdm_data_type = "TimeSeries"
                    onc.cdm_timeseries_variables = "latitude,longitude,z,feature_type_instance"

                v = []
                depth_files = []
                for other in sorted(nc.variables):  # Sorted for a reason... don't change!
                    try:
                        if other in coord_vars:
                            continue

                        ovsd = None  # old var sensor depth
                        old_var = nc.variables.get(other)
                        variable_attributes = { k : getattr(old_var, k) for k in old_var.ncattrs() }
                        # Remove/rename some attributes
                        # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67
                        if 'valid_range' in variable_attributes:
                            del variable_attributes['valid_range']
                        if 'minimum' in variable_attributes:
                            variable_attributes['actual_min'] = variable_attributes['minimum']
                            del variable_attributes['minimum']
                        if 'maximum' in variable_attributes:
                            variable_attributes['actual_max'] = variable_attributes['maximum']
                            del variable_attributes['maximum']
                        if 'sensor_depth' in variable_attributes:
                            # sensor_depth is ALWAYS positive "down", so don't convert!
                            # This is contrary to the "positive" attribute on the Z axis.
                            # variable_attributes['sensor_depth'] = variable_attributes['sensor_depth'] * -1
                            # Round the sensor_depth attribute
                            variable_attributes['sensor_depth'] = np.around(variable_attributes['sensor_depth'], decimals=4)
                            ovsd = np.around(old_var.sensor_depth * depth_conversion, decimals=4)

                        fillvalue = None
                        if hasattr(old_var, "_FillValue"):
                            fillvalue = old_var._FillValue

                        # Figure out if this is a variable that is repeated at different depths
                        # as different variable names.   Assumes sorted.
                        new_var_name = other.split('_')[0]
                        if new_var_name in ts.ncd.variables:
                            # Already in new file (processed when the first was encountered in the loop below)
                            continue

                        # Get the depth index
                        depth_variable = [ x for x in old_var.dimensions if 'depth' in x ]
                        if depth_variable and len(old_var.dimensions) > 1 and 'time' in old_var.dimensions:
                            depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(depth_variable[0])[:] * depth_conversion)))

                            # Find other variable names like this one
                            depth_indexes = [(other, depth_index)]
                            for search_var in sorted(nc.variables):
                                # If they have different depth dimension names we need to combine them into one variable
                                if search_var != other and search_var.split('_')[0] == new_var_name and \
                                   depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]:
                                    # Found a match at a different depth
                                    search_depth_variable = [ x for x in nc.variables.get(search_var).dimensions if 'depth' in x ]
                                    depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(search_depth_variable[0])[:] * depth_conversion)))
                                    depth_indexes.append((search_var, depth_index))
                                    logger.info("Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)".format(search_var, other, new_var_name))

                            values = np.ma.empty((times.size, len(depth_values)), dtype=old_var.dtype)
                            values.fill_value = values.dtype.type(fillvalue)
                            fillvalue = values.dtype.type(fillvalue)
                            values.mask = True
                            inconsistent = False
                            for nm, index in depth_indexes:
                                try:
                                    values[:, index] = np.squeeze(nc.variables.get(nm)[:])
                                except ValueError:
                                    inconsistent = True
                                    break

                            # If we just have one index we want to use the original name
                            if len(depth_indexes) == 1:
                                # Just use the original variable name
                                new_var_name = other

                            if inconsistent is True:
                                # Incorrect array size, most likely a strange variable
                                ts.add_variable_object(old_var, dimension_map=dict(depth='z'), reduce_dims=True)
                            else:
                                # Create this one, should be the first we encounter for this type
                                ts.add_variable(new_var_name, values=values, times=times, fillvalue=fillvalue, attributes=variable_attributes)

                        elif len(old_var.dimensions) == 1 and old_var.dimensions[0] == 'time':
                            # A single time dimensioned variable, like pitch, roll, record count, etc.
                            ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                        elif old_var.ndim <= 3 and ovsd and \
                                ((depth_values.size == 1 and not depth_variable and 'time' in old_var.dimensions) or
                                 (depth_values.size  > 1 and not depth_variable and 'time' in old_var.dimensions and 'sensor_depth' in ts.ncd.variables)):

                            if 'sensor_depth' in ts.ncd.variables and np.isclose(ts.ncd.variables['sensor_depth'][:], ovsd):
                                ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, verticals=[ovsd], fillvalue=fillvalue, attributes=variable_attributes)
                            else:
                                # Search through secondary files that have been created for detached variables at a certain depth and
                                # try to match this variable with one of the depths.
                                found_df = False
                                for dfts in depth_files:
                                    if isinstance(ovsd, np.ndarray):
                                        # Well, this is a bad file.
                                        raise ValueError("The sensor_depth attribute has more than one value, please fix the source NetCDF: {}".format(down_file))
                                    if np.isclose(dfts.ncd.variables[ts.vertical_axis_name][:], ovsd):
                                        dfts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, verticals=[ovsd], fillvalue=fillvalue, attributes=variable_attributes)
                                        found_df = True
                                        break

                                # If we couldn't match the current or one of the existing secondary depth files, create a new one.
                                if found_df is False:
                                    new_file_name = file_name.replace(file_ext, '_z{}{}'.format(len(depth_files) + 1, file_ext))
                                    fga = copy(file_global_attributes)
                                    fga['id'] = os.path.splitext(new_file_name)[0]
                                    new_ts = TimeSeries(output_directory, latitude, longitude, feature_name, fga, times=times, verticals=[ovsd], output_filename=new_file_name, vertical_positive='up')
                                    new_ts.add_variable(other, values=old_var[:], times=times, verticals=[ovsd], fillvalue=fillvalue, attributes=variable_attributes)
                                    depth_files.append(new_ts)
                        elif old_var.ndim <= 3 and (depth_values.size > 1 and not depth_variable and 'time' in old_var.dimensions):
                            if ovsd:
                                # An ADCP or profiling dataset, but this variable is measued at a single depth.
                                # Example: Bottom Temperature on an ADCP
                                # Skip things with a dimension over 3 (some beam variables like `brange`)
                                ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, verticals=[ovsd], fillvalue=fillvalue, attributes=variable_attributes)
                            else:
                                ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                        else:
                            if 'time' in old_var.dimensions and old_var.ndim <= 3:
                                ts.add_variable(other, values=old_var[:], times=times, fillvalue=fillvalue, attributes=variable_attributes)
                            else:
                                ts.add_variable_object(old_var, dimension_map=dict(depth='z'), reduce_dims=True)

                    except BaseException:
                        logger.exception("Error processing variable {0} in {1}. Skipping it.".format(other, down_file))
        except KeyboardInterrupt:
            logger.info("Breaking out of Translate loop!")
            break
        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            try:
                for df in depth_files:
                    del df
            except NameError:
                pass
            try:
                del ts
            except NameError:
                pass
            os.close(temp_fd)
            if os.path.isfile(temp_file):
                os.remove(temp_file)
示例#30
0
def parse_type_1(output_format, site_id, contents, output, csv_link):
    """
    # ---------------------------------- WARNING ----------------------------------------
    # The data you have obtained from this automated U.S. Geological Survey database
    # have not received Director's approval and as such are provisional and subject to
    # revision.  The data are released on the condition that neither the USGS nor the
    # United States Government may be held liable for any damages resulting from its use.
    # Additional info: http://waterdata.usgs.gov/ga/nwis/help/?provisional
    #
    # File-format description:  http://waterdata.usgs.gov/nwis/?tab_delimited_format_info
    # Automated-retrieval info: http://waterdata.usgs.gov/nwis/?automated_retrieval_info
    #
    # Contact:   [email protected]
    # retrieved: 2012-11-20 12:05:22 EST       (caww01)
    #
    # Data for the following 1 site(s) are contained in this file
    #    USGS 395740074482628 South Branch Rancocas Cr at S Main St nr Lumberton
    # -----------------------------------------------------------------------------------
    #
    # Data provided for site 395740074482628
    #    DD parameter   Description
    #    03   00035     Wind speed, miles per hour
    #    07   00025     Barometric pressure, millimeters of mercury
    #    09   00045     Precipitation, total, inches
    #    19   63160     Stream water level elevation above NAVD 1988, in feet
    #
    # Data-value qualification codes included in this output:
    #     P  Provisional data subject to revision.
    #
    agency_cd   site_no datetime    tz_cd   03_00035    03_00035_cd 07_00025    07_00025_cd 09_00045    09_00045_cd 19_63160    19_63160_cd
    5s  15s 20d 6s  14n 10s 14n 10s 14n 10s 14n 10s
    USGS    395740074482628 2012-10-28 13:00    EST 4.2 P   755 P           3.22    P
    USGS    395740074482628 2012-10-28 13:15    EST 6.4 P   754 P   0.00    P   3.36    P
    USGS    395740074482628 2012-10-28 13:30    EST 3.6 P   754 P   0.00    P   3.50    P
    USGS    395740074482628 2012-10-28 13:45    EST 3.2 P   754 P   0.00    P   3.63    P
    USGS    395740074482628 2012-10-28 14:00    EST 7.0 P   754 P   0.00    P   3.76    P
    USGS    395740074482628 2012-10-28 14:15    EST 4.0 P   754 P   0.00    P   3.87    P
    ...
    """
    # lat/lon point: http://waterservices.usgs.gov/nwis/site/?sites=395740074482628

    variable_map = {
        '01_00065' : {'long_name' : 'Gage height', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
        '03_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'},
        '04_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'},
        '05_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'},
        '06_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'},
        '04_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'},
        '02_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'},
        '05_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'},
        '07_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'},
        '09_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'},
        '03_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'},
        '08_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'},
        '09_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'},
        '06_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'},
        '07_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'},
        '08_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'},
        '05_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'},
        '06_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'},
        '07_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'},
        '19_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
        '01_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
    }

    # Get metadata from a seperate endpoint.
    d = requests.get("http://waterservices.usgs.gov/nwis/site/?sites={!s}".format(site_id))
    try:
        d.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.error("Could not find lat/lon endpoint for station {!s}, skipping. Status code: {!s}".format(site_id, d.status_code))
        return
    _, hz, dz = split_file(d.text, "agency_cd")
    # Strip off the one line after the headers
    dz = dz[1:]
    dfz  = pd.DataFrame(dz, columns=hz)
    lat  = float(dfz["dec_lat_va"][0])
    lon  = float(dfz["dec_long_va"][0])
    sensor_vertical_datum = dfz["alt_datum_cd"][0] or "NAVD88"
    try:
        z = float(dfz["alt_va"][0])
    except ValueError:
        z = 0.
    loc  = "POINT({!s} {!s} {!s})".format(lon, lat, z)
    name = dfz["station_nm"][0]

    comments, headers, data = split_file(contents, "agency_cd")
    df = pd.DataFrame(data, columns=headers)

    fillvalue = -9999.9

    # Combine date columns
    dates = df["datetime"]
    tz = df["tz_cd"]
    new_dates = list()
    for i in range(len(dates)):
        try:
            new_dates.append(parse(dates[i] + " " + tz[i]).astimezone(pytz.utc))
        except BaseException:
            # Remove row.  Bad date.
            df.drop(i, axis=0, inplace=True)
            continue
    df['time'] = new_dates
    df['depth'] = [ z for x in range(len(df['time'])) ]

    # Strip out "_cd" columns (quality checks for USGS)
    for h in headers:
        if "_cd" in h:
            df.drop(h, axis=1, inplace=True)

    # Add global attributes to appear in the resulting NetCDF file
    global_attributes = dict(
        title=name,
        summary='USGS Hurricane Sandy Rapid Response Stations.  Data acquired from "http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.',
        keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id,
        keywords_vocaublary="None",
        naming_authority='gov.usgs',
        id=site_id,
        cdm_data_type="Station",
        history="NetCDF file generated from {!s}".format(csv_link),
        creator="USGS",
        creator_url="http://waterdata.usgs.gov",
        creator_institution="USGS",
        creator_urn="gov.usgs",
        publisher="Axiom Data Science",
        publisher_uri="http://axiomdatascience.com",
        processing_level="None",
        acknowledgement="None",
        geospatial_bounds=loc,
        geospatial_lat_min=lat,
        geospatial_lat_max=lat,
        geospatial_lon_min=lon,
        geospatial_lon_max=lon,
        license="Freely Distributed",
        date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat()
    )

    def to_floats(x):
        try:
            return float(x)
        except ValueError:
            return fillvalue

    min_time = df['time'].min()
    max_time = df['time'].max()

    full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id)
    if output_format == 'cf16':
        output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
        times = [ calendar.timegm(x.timetuple()) for x in df["time"] ]
        verticals = df['depth'].values
        ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals, vertical_axis_name='z')

    for var in df.columns:
        if var in ['datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd']:
            continue

        try:
            var_meta = variable_map[var]
        except KeyError:
            logger.error("Variable {!s} was not found in variable map!".format(var))
            continue

        # Convert to floats
        df[var] = df[var].map(to_floats)

        # Change feet to meters
        if var_meta["units"] in ["feet", "ft"]:
            df[var] = np.asarray([ v * 0.3048 if v != fillvalue else v for v in df[var] ])
            var_meta["units"] = "meters"

        if output_format == 'axiom':
            full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"])
            output_directory = os.path.join(output, full_sensor_urn)
            output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
            ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var, vertical_axis_name='height')
            ts.add_instrument_metadata(urn=full_sensor_urn)
            ts.close()
        elif output_format == 'cf16':
            # Variable names shouldn't start with a number
            try:
                int(var[0])
                variable_name = 'v_{}'.format(var)
            except:
                variable_name = var
            ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum)

    if output_format == 'cf16':
        ts.close()
示例#31
0
def parse_type_2(output_format, site_id, contents, output, csv_link):
    """
    # These data are provisional and subject to revision.
    # Data processed as of 12/05/2012 11:54:29.
    # Data collected as part of Hurricane Sandy (2012) Storm Tide project.
    # Data are archived at http://water.usgs.gov/floods/events/2012/isaac/index.php
    # Elevation determined from GPS surveys (NAVD 88).
    # Time datum is GMT (Greenwich Mean Time).
    # Water density estimated on basis of sensor location
    #   where saltwater = 63.989 lb/ft3       (Saltwater = dissolved solids concentration greater than 20000 milligrams per liter)
    #   where brackish water = 63.052 lb/ft3  (Brackish water = dissolved solids concentration between 1000 and 20000 milligrams per liter)
    #   where freshwater = 62.428 lb/ft3      (Freshwater = dissolved solids concentration less than 1000 milligrams per liter)
    # The equation used to compute elevation from recorded pressure is
    #  (((sp-bp)*144)/d)+e
    # Where sp = surge pressure in psi; bp = barometric pressure in psi;
    #  d = water density in lb/ft3; and e = elevation of sensor in ft above NAVD 88.
    # Barometric data from nearest pressure sensor. Location for the barometric sensor is listed below.
    # Elevation is computer-rounded to two decimal places.
    #      Sensor information
    # Site id = SSS-NY-WES-001WL
    # Site type = water level
    # Horizontal datum used is NAD 83
    # Sensor location latitude 40.942755
    # Sensor location longitude -73.719828
    # Sensor elevation above NAVD 88 = -3.97 ft
    # Lowest recordable water elevation is -3.90 ft
    # Water density value used = 63.989 lb/ft3
    # Barometric sensor site (source of bp) = SSS-NY-WES-002BP
    # Barometric sensor location latitude 40.90754368
    # Barometric sensor location longitude -73.8692184

    date_time_GMT   elevation   nearest_barometric_sensor_psi
    10-28-2012 06:00:00 0.88    14.5145
    10-28-2012 06:00:30 0.86    14.5145
    10-28-2012 06:01:00 0.85    14.5170
    10-28-2012 06:01:30 0.85    14.5145
    10-28-2012 06:02:00 0.84    14.5170
    10-28-2012 06:02:30 0.81    14.5145
    10-28-2012 06:03:00 0.76    14.5145
    ...
    """

    variable_map = {
        'elevation' : {'long_name' : 'Water Level Elevation above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'},
    }

    def to_floats(x):
        try:
            return float(x)
        except ValueError:
            return fillvalue

    comments, headers, data = split_file(contents, "date_time_GMT")
    df = pd.DataFrame(data, columns=headers)
    fillvalue = -9999.9

    lat     = None
    lon     = None
    z       = 0
    name    = site_id
    sensor_vertical_datum = "NAVD88"

    for c in comments:
        if "Sensor location latitude" in c:
            lat = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1])
        elif "Sensor location longitude" in c:
            lon = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1])
        elif "Site id" in c:
            site_id = filter(None, map(lambda x: x.strip(), c.split(" ")))[-1]
            name = site_id
        elif "Sensor elevation" in c:
            sensor_vertical_datum = "".join(c.split("=")[0].split(" ")[4:6])
            l = filter(None, map(lambda x: x.strip(), c.split(" ")))
            z = float(l[-2])
            if l[-1] in ["feet", "ft"]:
                z *= 0.3048

    loc = "POINT({!s} {!s} {!s})".format(lon, lat, z)
    df['time'] = df["date_time_GMT"].map(lambda x: parse(x + " UTC"))
    df['depth'] = [ z for x in range(len(df['time'])) ]

    # Add global attributes to appear in the resulting NetCDF file
    global_attributes = dict(
        title=name,
        summary='USGS Hurricane Sandy Rapid Response Stations.  Data acquired from http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.',
        keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id,
        keywords_vocaublary="None",
        naming_authority='gov.usgs',
        id=site_id,
        cdm_data_type="Station",
        history="NetCDF file generated from {!s}".format(csv_link),
        creator="USGS",
        creator_url="http://waterdata.usgs.gov",
        creator_institution="USGS",
        creator_urn="gov.usgs",
        publisher="Axiom Data Science",
        publisher_uri="http://axiomdatascience.com",
        processing_level="None",
        acknowledgement="None",
        geospatial_bounds=loc,
        geospatial_lat_min=lat,
        geospatial_lat_max=lat,
        geospatial_lon_min=lon,
        geospatial_lon_max=lon,
        license="Freely Distributed",
        date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat()
    )

    full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id)
    min_time = df["time"].min()
    max_time = df["time"].max()

    if output_format == 'cf16':
        times = [ calendar.timegm(x.timetuple()) for x in df['time'] ]
        verticals = df['depth'].values
        output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
        ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals)

    for var in df.columns:
        if var in ['date_time_GMT', 'time', 'depth']:
            continue
        try:
            int(var[0])
            variable_name = 'v_{}'.format(var)
        except:
            variable_name = var

        try:
            var_meta = variable_map[var]
        except KeyError:
            logger.error("Variable {!s} was not found in variable map!".format(var))
            continue

        # Convert to floats
        df[var] = df[var].map(to_floats)
        if var_meta["units"] in ["feet", "ft"]:
            df[var] = [ v * 0.3048 if v != fillvalue else v for v in df[var] ]
            var_meta["units"] = "meters"

        if output_format == 'axiom':
            full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"])
            output_directory = os.path.join(output, full_sensor_urn)
            output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S'))
            ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var)
            ts.add_instrument_metadata(urn=full_sensor_urn)
            ts.close()
        elif output_format == 'cf16':
            ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum)

    if output_format == 'cf16':
        ts.close()
示例#32
0
class TestTimeseriesTimeBounds(unittest.TestCase):

    def setUp(self):
        self.output_directory = os.path.join(os.path.dirname(__file__), "output")
        self.latitude = 34
        self.longitude = -72
        self.station_name = "PytoolsTestStation"
        self.global_attributes = dict(id='this.is.the.id')

        self.filename = 'test_timeseries_bounds.nc'
        self.times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0]
        self.ts = TimeSeries(output_directory=self.output_directory,
                             latitude=self.latitude,
                             longitude=self.longitude,
                             station_name=self.station_name,
                             global_attributes=self.global_attributes,
                             output_filename=self.filename,
                             times=self.times,
                             verticals=verticals)

        self.values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        self.ts.add_variable('temperature', values=self.values, attributes=attrs)

    def tearDown(self):
        os.remove(os.path.join(self.output_directory, self.filename))

    def test_time_bounds_start(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='start')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [0,    1000],
                                                                    [1000, 2000],
                                                                    [2000, 3000],
                                                                    [3000, 4000],
                                                                    [4000, 5000],
                                                                    [5000, 6000]
                                                                ])).all()
        nc.close()

    def test_time_bounds_middle(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='middle')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [ -500,  500],
                                                                    [  500, 1500],
                                                                    [ 1500, 2500],
                                                                    [ 2500, 3500],
                                                                    [ 3500, 4500],
                                                                    [ 4500, 5500]
                                                                ])).all()
        nc.close()

    def test_time_bounds_end(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='end')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [-1000,    0],
                                                                    [    0, 1000],
                                                                    [ 1000, 2000],
                                                                    [ 2000, 3000],
                                                                    [ 3000, 4000],
                                                                    [ 4000, 5000]
                                                                ])).all()
        nc.close()
示例#33
0
        return re.sub(r'[^_a-zA-Z0-9]', "_", name)
    return name

# <codecell>

import os
out_file = os.path.join(output_dir, output_file)
if os.path.isfile(out_file):
    os.remove(out_file)

from pyaxiom.netcdf.sensors import TimeSeries
ts = TimeSeries(output_dir,
                latitude=0.39,
                longitude=36.7,
                station_name='urn:ioos:station:edu.princeton.ecohydrolab:MainTower',
                global_attributes={},
                times=pd_to_secs(df),
                verticals=[10],
                output_filename=output_file)

# <codecell>

for c in df.columns[::-1]:
    # Add units based on column name?
    var_attributes = dict()
    ts.add_variable(cf_safe_name(c), df[c].values, attributes=var_attributes, fillvalue=-9999.9)

# <codecell>


示例#34
0
def main(output_format, output, do_download, download_folder, filesubset=None):

    if do_download is True:

        try:
            os.makedirs(download_folder)
        except OSError:
            pass

        waf = 'http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/'

        r = requests.get(waf)
        soup = BeautifulSoup(r.text, "lxml")

        for link in soup.find_all('a'):

            # Skip non .txt files
            site_id, ext = os.path.splitext(link['href'])
            if ext != ".txt":
                continue

            if filesubset and site_id.lower() not in filesubset:
                # Skip this file!
                continue

            csv_link = waf + link['href']
            logger.info("Downloading '{}'".format(csv_link))
            d = requests.get(csv_link)
            try:
                d.raise_for_status()
            except requests.exceptions.HTTPError:
                logger.error(
                    "Could not download: {!s}, skipping. Status code: {!s}".
                    format(csv_link, d.status_code))
                continue

            with open(
                    os.path.join(download_folder, os.path.basename(csv_link)),
                    'wt') as f:
                f.write(d.text)

    # Yes, this uses lots of RAM, but we need to match up lon/lat positions later on.
    results = []
    for datafile in os.listdir(download_folder):

        site_id = os.path.splitext(os.path.basename(datafile))[0]

        if filesubset and site_id.lower() not in filesubset:
            # Skip this file!
            continue

        with open(os.path.join(download_folder, datafile)) as d:
            contents = d.read()
            r = None
            for line in contents.split("\n"):
                if "agency_cd" in line:
                    r = parse_type_1(output_format, site_id, contents, output)
                    break
                elif "date_time_GMT" in line:
                    r = parse_type_2(output_format, site_id, contents, output)
                    break
                else:
                    continue

            if r is None:
                logger.error('Could not process file: {}'.format(datafile))
            else:
                logger.info("Processed {}".format(datafile))
                results.append(r)

    results = sorted(results, key=attrgetter('lon', 'lat'))
    gresults = groupby(results, attrgetter('lon', 'lat'))

    for (glon, glat), group in gresults:

        groups = [x for x in list(group) if x]

        # Strip off the variable type if need be
        gsite = groups[0].site
        if gsite[-2:] in ['WV', 'BP', 'WL']:
            gsite = gsite[:-2]

        for result in groups:

            gas = get_globals(glat, glon, result.z, result.name, gsite)
            station_urn = IoosUrn(asset_type='station',
                                  authority=gas['naming_authority'],
                                  label=gsite)

            if output_format == 'cf16':
                # If CF, a file for each result dataframe
                times = [
                    calendar.timegm(x.timetuple()) for x in result.df['time']
                ]
                verticals = result.df['depth'].values
                output_filename = '{}.nc'.format(result.site)
                ts = TimeSeries(output,
                                latitude=glat,
                                longitude=glon,
                                station_name=gsite,
                                global_attributes=gas,
                                output_filename=output_filename,
                                times=times,
                                verticals=verticals)

            for var in result.df.columns:
                if var in [
                        'date_time_GMT', 'datetime', 'time', 'depth', 'tz_cd',
                        'site_no', 'agency_cd'
                ]:
                    continue

                try:
                    var_meta = copy(variable_map[var])
                except KeyError:
                    logger.error(
                        "Variable {!s} was not found in variable map!".format(
                            var))
                    continue

                # Convert to floats
                result.df[var] = result.df[var].map(to_floats)
                if var_meta["units"].lower() in ["feet", "ft"]:
                    result.df[var] = result.df[var].apply(
                        lambda x: None if pd.isnull(x) else x * 0.3048)
                    var_meta["units"] = "meters"
                elif var_meta["units"].lower() in ["psi"]:
                    result.df[var] = result.df[var].apply(
                        lambda x: None if pd.isnull(x) else x * 68.9476)
                    var_meta["units"] = "mbar"
                elif var_meta["units"].lower() in ['millimeters of mercury']:
                    result.df[var] = result.df[var].apply(
                        lambda x: None if pd.isnull(x) else x * 1.33322)
                    var_meta["units"] = "mbar"

                # Now put the fillvalue we want to be interpreted
                result.df.fillna(fillvalue, inplace=True)

                if output_format == 'axiom':
                    # If Axiom, a file for each variable
                    output_directory = os.path.join(output, gsite)
                    output_filename = '{}_{}.nc'.format(
                        result.site, var_meta['standard_name'])
                    ts = TimeSeries.from_dataframe(
                        result.df,
                        output_directory,
                        output_filename,
                        glat,
                        glon,
                        station_urn.urn,
                        gas,
                        var_meta["standard_name"],
                        var_meta,
                        sensor_vertical_datum='NAVD88',
                        fillvalue=fillvalue,
                        data_column=var,
                        vertical_axis_name='height')
                    sensor_urn = urnify(station_urn.authority,
                                        station_urn.label, var_meta)
                    ts.add_instrument_metadata(urn=sensor_urn)
                elif output_format == 'cf16':
                    # If CF, add variable to existing TimeSeries
                    try:
                        int(var[0])
                        variable_name = 'v_{}'.format(var)
                    except BaseException:
                        variable_name = var
                    ts.add_variable(variable_name,
                                    values=result.df[var].values,
                                    attributes=var_meta,
                                    fillvalue=fillvalue,
                                    sensor_vertical_datum='NAVD88')
示例#35
0
def main(output, download_folder, do_download, projects, csv_metadata_file):

    project_metadata = dict()
    with open(csv_metadata_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row['project_name']
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata)
        except KeyboardInterrupt:
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, "*"))

    temp_folder = os.path.abspath(os.path.join(".", "temp"))
    shutil.rmtree(temp_folder, ignore_errors=True)
    try:
        os.makedirs(temp_folder)
    except OSError:
        pass  # Exists

    for down_file in downloaded_files:

        # For debugging
        #if os.path.basename(down_file) != "8451met-a.nc":
        #    continue

        nc = None
        try:
            temp_file = os.path.join(temp_folder, os.path.basename(down_file))
            shutil.copy(down_file, temp_file)

            if projects:
                tmpnc = netCDF4.Dataset(temp_file)
                project_name, _ = tmpnc.id.split("/")
                nc_close(tmpnc)
                if project_name.lower() not in projects:
                    # Skip this project!
                    continue

            # Cleanup to CF-1.6
            normalize_time(temp_file)
            normalize_epic_codes(temp_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            station_id   = None
            latitude     = None
            longitude    = None

            nc = netCDF4.Dataset(temp_file)

            # Default station_id
            project_name, _ = nc.id.split("/")
            # Now try to come up with a better one.
            if hasattr(nc, 'MOORING') and hasattr(nc, 'id'):
                mooring_id = str(nc.MOORING).replace(':', '').strip()
                station_id = "{0}_{1}".format(project_name, mooring_id[0:3]).lower()
            else:
                try:
                    # Mooring ID is the first three numbers of the file
                    station_id = int(os.path.basename(down_file)[0:3])
                    station_id = "{0}_mooring_{0}".format(project_name, station_id)
                except BaseException:
                    logger.error("Could not create a suitable station_id. Skipping {0}.".format(down_file))
                    continue

            try:
                latitude  = nc.variables.get("lat")[0]
                longitude = nc.variables.get("lon")[0]
            except IndexError:
                latitude  = nc.variables.get("lat")[:]
                longitude = nc.variables.get("lon")[:]

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info("Translating {0} into CF1.6 format: {1}".format(down_file, os.path.abspath(os.path.join(output_directory, file_name))))

            if not os.path.isdir(output_directory):
                os.makedirs(output_directory)

            file_global_attributes = { k : getattr(nc, k) for k in nc.ncattrs() }
            file_global_attributes.update(global_attributes)
            file_global_attributes['id'] = station_id
            file_global_attributes['title'] = '{0} - {1}'.format(project_name, os.path.basename(down_file))
            if project_name in project_metadata:
                for k, v in project_metadata[project_name].items():
                    if v and k.lower() not in ['id', 'title', 'catalog_xml', 'project_name']:
                        file_global_attributes[k] = v

            times           = nc.variables.get('time')[:]
            feature_name, _ = os.path.splitext(os.path.basename(down_file))
            # Get all depth values
            depth_variables = []
            for dv in nc.variables:
                depth_variables += [ x for x in nc.variables.get(dv).dimensions if 'depth' in x ]
            depth_variables = sorted(list(set(depth_variables)))
            depth_values = np.asarray([ nc.variables.get(x)[:] for x in depth_variables ]).flatten()

            ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=depth_values, output_filename=file_name)

            v = []
            for other in sorted(nc.variables):  # Sorted for a reason... don't change!
                if other in coord_vars:
                    continue

                old_var = nc.variables.get(other)
                variable_attributes = { k : getattr(old_var, k) for k in old_var.ncattrs() }

                fillvalue = None
                if hasattr(old_var, "_FillValue"):
                    fillvalue = old_var._FillValue

                # Figure out if this is a variable that is repeated at different depths
                # as different variable names.   Assumes sorted.
                new_var_name = other.split('_')[0]
                if new_var_name in ts.ncd.variables:
                    # Already in new file (processed when the first was encountered in the loop below)
                    continue

                # Get the depth index
                depth_variable = [ x for x in old_var.dimensions if 'depth' in x ]
                if depth_variable and len(old_var.dimensions) > 1 and 'time' in old_var.dimensions:
                    depth_index = np.squeeze(np.where(depth_values == nc.variables.get(depth_variable[0])[:]))

                    # Find other variable names like this one
                    depth_indexes = [(other, depth_index)]
                    for search_var in sorted(nc.variables):
                        # If they have different depth dimension names we need to combine them into one variable
                        if search_var != other and search_var.split('_')[0] == new_var_name and \
                           depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]:
                            # Found a match at a different depth
                            search_depth_variable = [ x for x in nc.variables.get(search_var).dimensions if 'depth' in x ]
                            depth_index = np.squeeze(np.where(depth_values == nc.variables.get(search_depth_variable[0])[:]))
                            depth_indexes.append((search_var, depth_index))
                            logger.info("Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)".format(search_var, other, new_var_name))

                    values = np.ma.empty((times.size, len(depth_values)))
                    values.fill_value = fillvalue
                    values.mask = True
                    for nm, index in depth_indexes:
                        values[:, index] = np.squeeze(nc.variables.get(nm)[:])

                    # If we just have one index we want to use the original name
                    if len(depth_indexes) == 1:
                        # Just use the original variable name
                        new_var_name = other

                    # Create this one, should be the first we encounter for this type
                    ts.add_variable(new_var_name, values=values, times=times, fillvalue=fillvalue, attributes=variable_attributes)
                elif depth_variable and 'time' not in old_var.dimensions:
                    # elif (depth_variable and len(old_var.dimensions) == 1 and 'depth' == old_var.dimensions[0]) or \
                    # Metadata variable like bin distance
                    meta_var = ts.ncd.createVariable(other, old_var.dtype, ('z',), fill_value=fillvalue)
                    for k, v in variable_attributes.iteritems():
                        if k != '_FillValue':
                            setattr(meta_var, k, v)
                    meta_var[:] = old_var[:]
                else:
                    values = old_var[:]
                    if len(old_var.dimensions) == 1 and old_var.dimensions[0] == 'time':
                        # Metadata variables like pitch, roll, record count, etc.
                        ts.add_variable(other, values=values, times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                    elif depth_values.size > 1:
                        # No Z variables in a profile dataset, aka Bottom Temperature
                        ts.add_variable(other, values=values, times=times, verticals=[old_var.sensor_depth], unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes)
                    else:
                        ts.add_variable(other, values=values, times=times, fillvalue=fillvalue, attributes=variable_attributes)

                ts.ncd.sync()
            ts.ncd.close()

        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            nc_close(nc)
            os.remove(temp_file)

    shutil.rmtree(temp_folder, ignore_errors=True)
示例#36
0
def parse_type_2(output_format, site_id, contents, output, csv_link):
    """
    # These data are provisional and subject to revision.
    # Data processed as of 12/05/2012 11:54:29.
    # Data collected as part of Hurricane Sandy (2012) Storm Tide project.
    # Data are archived at http://water.usgs.gov/floods/events/2012/isaac/index.php
    # Elevation determined from GPS surveys (NAVD 88).
    # Time datum is GMT (Greenwich Mean Time).
    # Water density estimated on basis of sensor location
    #   where saltwater = 63.989 lb/ft3       (Saltwater = dissolved solids concentration greater than 20000 milligrams per liter)
    #   where brackish water = 63.052 lb/ft3  (Brackish water = dissolved solids concentration between 1000 and 20000 milligrams per liter)
    #   where freshwater = 62.428 lb/ft3      (Freshwater = dissolved solids concentration less than 1000 milligrams per liter)
    # The equation used to compute elevation from recorded pressure is
    #  (((sp-bp)*144)/d)+e
    # Where sp = surge pressure in psi; bp = barometric pressure in psi;
    #  d = water density in lb/ft3; and e = elevation of sensor in ft above NAVD 88.
    # Barometric data from nearest pressure sensor. Location for the barometric sensor is listed below.
    # Elevation is computer-rounded to two decimal places.
    #      Sensor information
    # Site id = SSS-NY-WES-001WL
    # Site type = water level
    # Horizontal datum used is NAD 83
    # Sensor location latitude 40.942755
    # Sensor location longitude -73.719828
    # Sensor elevation above NAVD 88 = -3.97 ft
    # Lowest recordable water elevation is -3.90 ft
    # Water density value used = 63.989 lb/ft3
    # Barometric sensor site (source of bp) = SSS-NY-WES-002BP
    # Barometric sensor location latitude 40.90754368
    # Barometric sensor location longitude -73.8692184

    date_time_GMT   elevation   nearest_barometric_sensor_psi
    10-28-2012 06:00:00 0.88    14.5145
    10-28-2012 06:00:30 0.86    14.5145
    10-28-2012 06:01:00 0.85    14.5170
    10-28-2012 06:01:30 0.85    14.5145
    10-28-2012 06:02:00 0.84    14.5170
    10-28-2012 06:02:30 0.81    14.5145
    10-28-2012 06:03:00 0.76    14.5145
    ...
    """

    variable_map = {
        'elevation': {
            'long_name':
            'Water Level Elevation above Reference Datum (NAVD88)',
            'geoid_name': 'NAVD88',
            'vertical_datum': 'NAVD88',
            'water_surface_reference_datum': 'NAVD88',
            'standard_name': 'water_surface_height_above_reference_datum',
            'units': 'feet'
        },
    }

    def to_floats(x):
        try:
            return float(x)
        except ValueError:
            return fillvalue

    comments, headers, data = split_file(contents, "date_time_GMT")
    df = pd.DataFrame(data, columns=headers)
    fillvalue = -9999.9

    lat = None
    lon = None
    z = 0
    name = site_id
    sensor_vertical_datum = "NAVD88"

    for c in comments:
        if "Sensor location latitude" in c:
            lat = float(
                list(filter(None, map(lambda x: x.strip(), c.split(" "))))[-1])
        elif "Sensor location longitude" in c:
            lon = float(
                list(filter(None, map(lambda x: x.strip(), c.split(" "))))[-1])
        elif "Site id" in c:
            site_id = list(filter(None, map(lambda x: x.strip(),
                                            c.split(" "))))[-1]
            name = site_id
        elif "Sensor elevation" in c:
            sensor_vertical_datum = "".join(c.split("=")[0].split(" ")[4:6])
            l = list(filter(None, map(lambda x: x.strip(), c.split(" "))))
            z = float(l[-2])
            if l[-1] in ["feet", "ft"]:
                z *= 0.3048

    loc = "POINT({!s} {!s} {!s})".format(lon, lat, z)
    df['time'] = df["date_time_GMT"].map(lambda x: parse(x + " UTC"))
    df['depth'] = [z for x in range(len(df['time']))]

    # Add global attributes to appear in the resulting NetCDF file
    global_attributes = dict(
        title=name,
        summary=
        'USGS Hurricane Sandy Rapid Response Stations.  Data acquired from http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.',
        keywords=
        "usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s"
        % site_id,
        keywords_vocaublary="None",
        naming_authority='gov.usgs',
        id=site_id,
        cdm_data_type="Station",
        history="NetCDF file generated from {!s}".format(csv_link),
        creator="USGS",
        creator_url="http://waterdata.usgs.gov",
        creator_institution="USGS",
        creator_urn="gov.usgs",
        publisher="Axiom Data Science",
        publisher_uri="http://axiomdatascience.com",
        processing_level="None",
        acknowledgement="None",
        geospatial_bounds=loc,
        geospatial_lat_min=lat,
        geospatial_lat_max=lat,
        geospatial_lon_min=lon,
        geospatial_lon_max=lon,
        license="Freely Distributed",
        date_created=datetime.utcnow().replace(second=0,
                                               microsecond=0).isoformat())

    full_station_urn = "urn:ioos:station:{!s}:{!s}".format(
        global_attributes["naming_authority"], site_id)
    min_time = df["time"].min()
    max_time = df["time"].max()

    if output_format == 'cf16':
        times = [calendar.timegm(x.timetuple()) for x in df['time']]
        verticals = df['depth'].values
        output_filename = '{}_{}-{}.nc'.format(
            site_id, min_time.strftime('%Y%m%dT%H%M%S'),
            max_time.strftime('%Y%m%dT%H%M%S'))
        ts = TimeSeries(output,
                        latitude=lat,
                        longitude=lon,
                        station_name=full_station_urn,
                        global_attributes=global_attributes,
                        output_filename=output_filename,
                        times=times,
                        verticals=verticals)

    for var in df.columns:
        if var in ['date_time_GMT', 'time', 'depth']:
            continue
        try:
            int(var[0])
            variable_name = 'v_{}'.format(var)
        except:
            variable_name = var

        try:
            var_meta = variable_map[var]
        except KeyError:
            logger.error(
                "Variable {!s} was not found in variable map!".format(var))
            continue

        # Convert to floats
        df[var] = df[var].map(to_floats)
        if var_meta["units"] in ["feet", "ft"]:
            df[var] = [v * 0.3048 if v != fillvalue else v for v in df[var]]
            var_meta["units"] = "meters"

        if output_format == 'axiom':
            full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(
                global_attributes["naming_authority"], site_id,
                var_meta["standard_name"])
            output_directory = os.path.join(output, full_sensor_urn)
            output_filename = '{}_{}-{}.nc'.format(
                var, min_time.strftime('%Y%m%dT%H%M%S'),
                max_time.strftime('%Y%m%dT%H%M%S'))
            ts = TimeSeries.from_dataframe(
                df,
                output_directory,
                output_filename,
                lat,
                lon,
                full_station_urn,
                global_attributes,
                var_meta["standard_name"],
                var_meta,
                sensor_vertical_datum=sensor_vertical_datum,
                fillvalue=fillvalue,
                data_column=var)
            ts.add_instrument_metadata(urn=full_sensor_urn)
        elif output_format == 'cf16':
            ts.add_variable(variable_name,
                            values=df[var].values,
                            attributes=var_meta,
                            fillvalue=fillvalue,
                            sensor_vertical_datum=sensor_vertical_datum)
示例#37
0
def main(output,
         download_folder,
         do_download,
         projects,
         csv_metadata_file,
         filesubset=None,
         since=None):
    project_metadata = dict()
    with open(csv_metadata_file, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            project_name = row['project_name']
            if isinstance(project_name, str) and project_name[0] == '#':
                continue
            if projects and project_name.lower() not in projects:
                # Skip projects if a subset was defined
                continue
            project_metadata[project_name] = dict()
            for k, v in row.items():
                project_metadata[project_name][k] = v

    if do_download:
        try:
            downloaded_files = download(download_folder, project_metadata,
                                        filesubset, since)
        except KeyboardInterrupt:
            logger.exception('Error downloading datasets from THREDDS')
            downloaded_files = []
    else:
        downloaded_files = glob(os.path.join(download_folder, '**', '*'))
        if since is not None:

            def should_keep(d):
                modt = datetime.utcfromtimestamp(
                    os.path.getmtime(d)).replace(tzinfo=pytz.utc)
                return modt >= since

            downloaded_files = [
                dl for dl in downloaded_files if should_keep(dl)
            ]

    for down_file in sorted(downloaded_files):

        temp_fd, temp_file = tempfile.mkstemp(prefix='cmg_collector',
                                              suffix='nc')
        try:

            if filesubset is not None:
                if os.path.basename(down_file).lower() not in filesubset:
                    # aka "9631ecp-a.nc"
                    # Skip this file!
                    continue

            project_name = os.path.basename(os.path.dirname(down_file))
            if projects:
                if project_name.lower() not in projects:
                    # Skip this project!
                    continue
            shutil.copy(down_file, temp_file)

            # Cleanup to CF-1.6
            try:
                first_time = normalize_time(temp_file)
            except (TypeError, ValueError, IndexError):
                logger.exception(
                    "Could not normalize the time variable. Skipping {0}.".
                    format(down_file))
                continue
            except OverflowError:
                logger.error(
                    "Dates out of range. Skipping {0}.".format(down_file))
                continue

            normalize_epic_codes(temp_file, down_file)
            normalize_vectors(temp_file)
            normalize_units(temp_file)

            # Create list of variables that we want to save.
            mooring_id = None
            latitude = None
            longitude = None

            fname = os.path.basename(down_file)
            feature_name, file_ext = os.path.splitext(
                os.path.basename(down_file))
            try:
                mooring_id = int(9999)
            except ValueError:
                logger.exception(
                    "Could not create a suitable station_id. Skipping {0}.".
                    format(down_file))
                continue

            file_name = os.path.basename(down_file)
            output_directory = os.path.join(output, project_name)
            logger.info("Translating {0} into CF1.6 format: {1}".format(
                down_file,
                os.path.abspath(os.path.join(output_directory, file_name))))

            with EnhancedDataset(temp_file) as nc:

                try:
                    latitude = nc.variables.get("lat")[0]
                    longitude = nc.variables.get("lon")[0]
                except IndexError:
                    latitude = nc.variables.get("lat")[:]
                    longitude = nc.variables.get("lon")[:]
                except TypeError:
                    logger.error(
                        "Could not find lat/lon variables. Skipping {0}.".
                        format(down_file))

                file_global_attributes = {
                    k: getattr(nc, k)
                    for k in nc.ncattrs()
                }
                file_global_attributes.update(global_attributes)
                file_global_attributes['id'] = feature_name
                file_global_attributes['MOORING'] = mooring_id
                file_global_attributes['original_filename'] = fname
                file_global_attributes['original_folder'] = project_name

                no_override = [
                    'id', 'MOORING', 'original_filename', 'original_folder',
                    'catalog_xml', 'project_name'
                ]
                if project_name in project_metadata:
                    for k, v in project_metadata[project_name].items():
                        if v and k.lower() not in no_override:
                            file_global_attributes[k] = v

                if 'summary' in file_global_attributes:
                    # Save the original summary
                    file_global_attributes[
                        'WHOI_Buoy_Group_summary'] = file_global_attributes[
                            'summary']

                # Better title/summary for discovery via catalogs
                project_title = file_global_attributes.get(
                    'project_title', project_name).strip()
                project_summary = file_global_attributes.get(
                    'project_summary', '').strip()
                file_global_attributes[
                    'title'] = 'USGS-CMG time-series data: {0} - {1} - {2}'.format(
                        project_name, mooring_id, feature_name)
                file_global_attributes[
                    'summary'] = 'USGS-CMG time-series data from the {} project, mooring {} and package {}. {}'.format(
                        project_title, mooring_id, feature_name,
                        project_summary).strip()

                times = nc.variables.get('time')[:]

                # Get all depth values
                depth_variables = []
                for dv in nc.variables:
                    depth_variables += [
                        x for x in nc.variables.get(dv).dimensions
                        if 'depth' in x
                    ]
                depth_variables = sorted(list(set(depth_variables)))

                try:
                    assert depth_variables
                    depth_values = np.asarray([
                        nc.variables.get(x)[:] for x in depth_variables
                    ]).flatten()
                except (AssertionError, TypeError):
                    logger.warning(
                        "No depth variables found in {}, skipping.".format(
                            down_file))
                    continue

                # Convert everything to positive up, unless it is specifically specified as "up" already
                depth_conversion = -1.0
                if depth_variables:
                    pull_positive = nc.variables.get(depth_variables[0])
                    if hasattr(pull_positive, 'positive'
                               ) and pull_positive.positive.lower() == 'up':
                        depth_conversion = 1.0
                depth_values = depth_values * depth_conversion

                if not os.path.isdir(output_directory):
                    os.makedirs(output_directory)
                ts = TimeSeries(output_directory,
                                latitude,
                                longitude,
                                feature_name,
                                file_global_attributes,
                                times=times,
                                verticals=depth_values,
                                output_filename=file_name,
                                vertical_positive='up')

                # Set the platform type from the global attribute 'platform_type', defaulting to 'fixed'
                with EnhancedDataset(ts.out_file, 'a') as onc:
                    platform_type = getattr(onc, 'platform_type',
                                            'fixed').lower()
                    onc.variables['platform'].setncattr('type', platform_type)
                    onc.variables['platform'].setncattr(
                        'nodc_name', "FIXED PLATFORM, MOORINGS")
                    # Add ERDDAP variables
                    onc.cdm_data_type = "TimeSeries"
                    onc.cdm_timeseries_variables = "latitude,longitude,z,feature_type_instance"

                v = []
                depth_files = []
                for other in sorted(
                        nc.variables):  # Sorted for a reason... don't change!
                    try:
                        if other in coord_vars:
                            continue

                        ovsd = None  # old var sensor depth
                        old_var = nc.variables.get(other)
                        variable_attributes = {
                            k: getattr(old_var, k)
                            for k in old_var.ncattrs()
                        }
                        # Remove/rename some attributes
                        # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67
                        if 'valid_range' in variable_attributes:
                            del variable_attributes['valid_range']
                        if 'minimum' in variable_attributes:
                            variable_attributes[
                                'actual_min'] = variable_attributes['minimum']
                            del variable_attributes['minimum']
                        if 'maximum' in variable_attributes:
                            variable_attributes[
                                'actual_max'] = variable_attributes['maximum']
                            del variable_attributes['maximum']
                        if 'sensor_depth' in variable_attributes:
                            # sensor_depth is ALWAYS positive "down", so don't convert!
                            # This is contrary to the "positive" attribute on the Z axis.
                            # variable_attributes['sensor_depth'] = variable_attributes['sensor_depth'] * -1
                            # Round the sensor_depth attribute
                            variable_attributes['sensor_depth'] = np.around(
                                variable_attributes['sensor_depth'],
                                decimals=4)
                            ovsd = np.around(old_var.sensor_depth *
                                             depth_conversion,
                                             decimals=4)

                        fillvalue = None
                        if hasattr(old_var, "_FillValue"):
                            fillvalue = old_var._FillValue

                        # Figure out if this is a variable that is repeated at different depths
                        # as different variable names.   Assumes sorted.
                        new_var_name = other.split('_')[0]
                        if new_var_name in ts.ncd.variables:
                            # Already in new file (processed when the first was encountered in the loop below)
                            continue

                        # Get the depth index
                        depth_variable = [
                            x for x in old_var.dimensions if 'depth' in x
                        ]
                        if depth_variable and len(
                                old_var.dimensions
                        ) > 1 and 'time' in old_var.dimensions:
                            depth_index = np.squeeze(
                                np.where(depth_values == (
                                    nc.variables.get(depth_variable[0])[:] *
                                    depth_conversion)))

                            # Find other variable names like this one
                            depth_indexes = [(other, depth_index)]
                            for search_var in sorted(nc.variables):
                                # If they have different depth dimension names we need to combine them into one variable
                                if search_var != other and search_var.split('_')[0] == new_var_name and \
                                   depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]:
                                    # Found a match at a different depth
                                    search_depth_variable = [
                                        x for x in nc.variables.get(
                                            search_var).dimensions
                                        if 'depth' in x
                                    ]
                                    depth_index = np.squeeze(
                                        np.where(depth_values == (
                                            nc.variables.get(
                                                search_depth_variable[0])[:] *
                                            depth_conversion)))
                                    depth_indexes.append(
                                        (search_var, depth_index))
                                    logger.info(
                                        "Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)"
                                        .format(search_var, other,
                                                new_var_name))

                            values = np.ma.empty(
                                (times.size, len(depth_values)),
                                dtype=old_var.dtype)
                            values.fill_value = fillvalue
                            values.mask = True
                            inconsistent = False
                            for nm, index in depth_indexes:
                                try:
                                    values[:, index] = np.squeeze(
                                        nc.variables.get(nm)[:])
                                except ValueError:
                                    inconsistent = True
                                    break

                            # If we just have one index we want to use the original name
                            if len(depth_indexes) == 1:
                                # Just use the original variable name
                                new_var_name = other

                            if inconsistent is True:
                                # Incorrect array size, most likely a strange variable
                                ts.add_variable_object(
                                    old_var,
                                    dimension_map=dict(depth='z'),
                                    reduce_dims=True)
                            else:
                                # Create this one, should be the first we encounter for this type
                                ts.add_variable(new_var_name,
                                                values=values,
                                                times=times,
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)

                        elif len(old_var.dimensions
                                 ) == 1 and old_var.dimensions[0] == 'time':
                            # A single time dimensioned variable, like pitch, roll, record count, etc.
                            ts.add_variable(other,
                                            values=old_var[:],
                                            times=times,
                                            unlink_from_profile=True,
                                            fillvalue=fillvalue,
                                            attributes=variable_attributes)
                        elif old_var.ndim <= 3 and ovsd and \
                                ((depth_values.size == 1 and not depth_variable and 'time' in old_var.dimensions) or
                                 (depth_values.size  > 1 and not depth_variable and 'time' in old_var.dimensions and 'sensor_depth' in ts.ncd.variables)):

                            if 'sensor_depth' in ts.ncd.variables and np.isclose(
                                    ts.ncd.variables['sensor_depth'][:], ovsd):
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                unlink_from_profile=True,
                                                verticals=[ovsd],
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                            else:
                                # Search through secondary files that have been created for detached variables at a certain depth and
                                # try to match this variable with one of the depths.
                                found_df = False
                                for dfts in depth_files:
                                    if isinstance(ovsd, np.ndarray):
                                        # Well, this is a bad file.
                                        raise ValueError(
                                            "The sensor_depth attribute has more than one value, please fix the source NetCDF: {}"
                                            .format(down_file))
                                    if np.isclose(
                                            dfts.ncd.variables[
                                                ts.vertical_axis_name][:],
                                            ovsd):
                                        dfts.add_variable(
                                            other,
                                            values=old_var[:],
                                            times=times,
                                            unlink_from_profile=True,
                                            verticals=[ovsd],
                                            fillvalue=fillvalue,
                                            attributes=variable_attributes)
                                        found_df = True
                                        break

                                # If we couldn't match the current or one of the existing secondary depth files, create a new one.
                                if found_df is False:
                                    new_file_name = file_name.replace(
                                        file_ext, '_z{}{}'.format(
                                            len(depth_files) + 1, file_ext))
                                    fga = copy(file_global_attributes)
                                    fga['id'] = os.path.splitext(
                                        new_file_name)[0]
                                    new_ts = TimeSeries(
                                        output_directory,
                                        latitude,
                                        longitude,
                                        feature_name,
                                        fga,
                                        times=times,
                                        verticals=[ovsd],
                                        output_filename=new_file_name,
                                        vertical_positive='up')
                                    new_ts.add_variable(
                                        other,
                                        values=old_var[:],
                                        times=times,
                                        verticals=[ovsd],
                                        fillvalue=fillvalue,
                                        attributes=variable_attributes)
                                    depth_files.append(new_ts)
                        elif old_var.ndim <= 3 and (
                                depth_values.size > 1 and not depth_variable
                                and 'time' in old_var.dimensions):
                            if ovsd:
                                # An ADCP or profiling dataset, but this variable is measued at a single depth.
                                # Example: Bottom Temperature on an ADCP
                                # Skip things with a dimension over 3 (some beam variables like `brange`)
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                unlink_from_profile=True,
                                                verticals=[ovsd],
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                            else:
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                unlink_from_profile=True,
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                        else:
                            if 'time' in old_var.dimensions and old_var.ndim <= 3:
                                ts.add_variable(other,
                                                values=old_var[:],
                                                times=times,
                                                fillvalue=fillvalue,
                                                attributes=variable_attributes)
                            else:
                                ts.add_variable_object(
                                    old_var,
                                    dimension_map=dict(depth='z'),
                                    reduce_dims=True)

                    except BaseException:
                        logger.exception(
                            "Error processing variable {0} in {1}. Skipping it."
                            .format(other, down_file))
        except KeyboardInterrupt:
            logger.info("Breaking out of Translate loop!")
            break
        except BaseException:
            logger.exception("Error. Skipping {0}.".format(down_file))
            continue
        finally:
            try:
                for df in depth_files:
                    del df
            except NameError:
                pass
            try:
                del ts
            except NameError:
                pass
            os.close(temp_fd)
            if os.path.isfile(temp_file):
                os.remove(temp_file)
示例#38
0
class TestTimeseriesTimeBounds(unittest.TestCase):

    def setUp(self):
        self.output_directory = os.path.join(os.path.dirname(__file__), "output")
        self.latitude = 34
        self.longitude = -72
        self.station_name = "PytoolsTestStation"
        self.global_attributes = dict(id='this.is.the.id')

        self.filename = 'test_timeseries_bounds.nc'
        self.times = [0, 1000, 2000, 3000, 4000, 5000]
        verticals = [0]
        self.ts = TimeSeries(output_directory=self.output_directory,
                             latitude=self.latitude,
                             longitude=self.longitude,
                             station_name=self.station_name,
                             global_attributes=self.global_attributes,
                             output_filename=self.filename,
                             times=self.times,
                             verticals=verticals)

        self.values = [20, 21, 22, 23, 24, 25]
        attrs = dict(standard_name='sea_water_temperature')
        self.ts.add_variable('temperature', values=self.values, attributes=attrs)

    def tearDown(self):
        os.remove(os.path.join(self.output_directory, self.filename))

    def test_time_bounds_start(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='start')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [0,    1000],
                                                                    [1000, 2000],
                                                                    [2000, 3000],
                                                                    [3000, 4000],
                                                                    [4000, 5000],
                                                                    [5000, 6000]
                                                                ])).all()
        nc.close()

    def test_time_bounds_middle(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='middle')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [ -500,  500],
                                                                    [  500, 1500],
                                                                    [ 1500, 2500],
                                                                    [ 2500, 3500],
                                                                    [ 3500, 4500],
                                                                    [ 4500, 5500]
                                                                ])).all()
        nc.close()

    def test_time_bounds_end(self):
        delta = timedelta(seconds=1000)
        self.ts.add_time_bounds(delta=delta, position='end')

        nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename))
        assert nc.variables.get('time_bounds').shape == (len(self.times), 2,)
        assert (nc.variables.get('time_bounds')[:] == np.asarray([
                                                                    [-1000,    0],
                                                                    [    0, 1000],
                                                                    [ 1000, 2000],
                                                                    [ 2000, 3000],
                                                                    [ 3000, 4000],
                                                                    [ 4000, 5000]
                                                                ])).all()
        nc.close()
示例#39
0
def parse_type_1(output_format, site_id, contents, output, csv_link):
    """
    # ---------------------------------- WARNING ----------------------------------------
    # The data you have obtained from this automated U.S. Geological Survey database
    # have not received Director's approval and as such are provisional and subject to
    # revision.  The data are released on the condition that neither the USGS nor the
    # United States Government may be held liable for any damages resulting from its use.
    # Additional info: http://waterdata.usgs.gov/ga/nwis/help/?provisional
    #
    # File-format description:  http://waterdata.usgs.gov/nwis/?tab_delimited_format_info
    # Automated-retrieval info: http://waterdata.usgs.gov/nwis/?automated_retrieval_info
    #
    # Contact:   [email protected]
    # retrieved: 2012-11-20 12:05:22 EST       (caww01)
    #
    # Data for the following 1 site(s) are contained in this file
    #    USGS 395740074482628 South Branch Rancocas Cr at S Main St nr Lumberton
    # -----------------------------------------------------------------------------------
    #
    # Data provided for site 395740074482628
    #    DD parameter   Description
    #    03   00035     Wind speed, miles per hour
    #    07   00025     Barometric pressure, millimeters of mercury
    #    09   00045     Precipitation, total, inches
    #    19   63160     Stream water level elevation above NAVD 1988, in feet
    #
    # Data-value qualification codes included in this output:
    #     P  Provisional data subject to revision.
    #
    agency_cd   site_no datetime    tz_cd   03_00035    03_00035_cd 07_00025    07_00025_cd 09_00045    09_00045_cd 19_63160    19_63160_cd
    5s  15s 20d 6s  14n 10s 14n 10s 14n 10s 14n 10s
    USGS    395740074482628 2012-10-28 13:00    EST 4.2 P   755 P           3.22    P
    USGS    395740074482628 2012-10-28 13:15    EST 6.4 P   754 P   0.00    P   3.36    P
    USGS    395740074482628 2012-10-28 13:30    EST 3.6 P   754 P   0.00    P   3.50    P
    USGS    395740074482628 2012-10-28 13:45    EST 3.2 P   754 P   0.00    P   3.63    P
    USGS    395740074482628 2012-10-28 14:00    EST 7.0 P   754 P   0.00    P   3.76    P
    USGS    395740074482628 2012-10-28 14:15    EST 4.0 P   754 P   0.00    P   3.87    P
    ...
    """
    # lat/lon point: http://waterservices.usgs.gov/nwis/site/?sites=395740074482628

    variable_map = {
        '01_00065': {
            'long_name': 'Gage height',
            'geoid_name': 'NAVD88',
            'vertical_datum': 'NAVD88',
            'water_surface_reference_datum': 'NAVD88',
            'standard_name': 'water_surface_height_above_reference_datum',
            'units': 'feet'
        },
        '03_00035': {
            'long_name': 'Wind Speed',
            'standard_name': 'wind_speed',
            'units': 'mph'
        },
        '04_00035': {
            'long_name': 'Wind Gust',
            'standard_name': 'wind_speed_of_gust',
            'units': 'mph'
        },
        '05_00035': {
            'long_name': 'Wind Speed',
            'standard_name': 'wind_speed',
            'units': 'mph'
        },
        '06_00035': {
            'long_name': 'Wind Gust',
            'standard_name': 'wind_speed_of_gust',
            'units': 'mph'
        },
        '04_00036': {
            'long_name': 'Wind Direction',
            'standard_name': 'wind_from_direction',
            'units': 'degrees'
        },
        '02_00036': {
            'long_name': 'Wind Direction',
            'standard_name': 'wind_from_direction',
            'units': 'degrees'
        },
        '05_00025': {
            'long_name': 'Air Pressure',
            'standard_name': 'air_pressure',
            'units': 'mm of mercury'
        },
        '07_00025': {
            'long_name': 'Air Pressure',
            'standard_name': 'air_pressure',
            'units': 'mm of mercury'
        },
        '09_00025': {
            'long_name': 'Air Pressure',
            'standard_name': 'air_pressure',
            'units': 'mm of mercury'
        },
        '03_00045': {
            'long_name': 'Total Precipitation',
            'standard_name': 'lwe_thickness_of_precipitation_amount',
            'units': 'inches'
        },
        '08_00045': {
            'long_name': 'Total Precipitation',
            'standard_name': 'lwe_thickness_of_precipitation_amount',
            'units': 'inches'
        },
        '09_00045': {
            'long_name': 'Total Precipitation',
            'standard_name': 'lwe_thickness_of_precipitation_amount',
            'units': 'inches'
        },
        '06_00052': {
            'long_name': 'Relative Humidity',
            'standard_name': 'relative_humidity',
            'units': 'percent'
        },
        '07_00052': {
            'long_name': 'Relative Humidity',
            'standard_name': 'relative_humidity',
            'units': 'percent'
        },
        '08_00052': {
            'long_name': 'Relative Humidity',
            'standard_name': 'relative_humidity',
            'units': 'percent'
        },
        '05_00020': {
            'long_name': 'Air Temperature',
            'standard_name': 'air_temperature',
            'units': 'degrees_Celsius'
        },
        '06_00020': {
            'long_name': 'Air Temperature',
            'standard_name': 'air_temperature',
            'units': 'degrees_Celsius'
        },
        '07_00020': {
            'long_name': 'Air Temperature',
            'standard_name': 'air_temperature',
            'units': 'degrees_Celsius'
        },
        '19_63160': {
            'long_name': 'Water Surface Height Above Reference Datum (NAVD88)',
            'geoid_name': 'NAVD88',
            'vertical_datum': 'NAVD88',
            'water_surface_reference_datum': 'NAVD88',
            'standard_name': 'water_surface_height_above_reference_datum',
            'units': 'feet'
        },
        '01_63160': {
            'long_name': 'Water Surface Height Above Reference Datum (NAVD88)',
            'geoid_name': 'NAVD88',
            'vertical_datum': 'NAVD88',
            'water_surface_reference_datum': 'NAVD88',
            'standard_name': 'water_surface_height_above_reference_datum',
            'units': 'feet'
        },
    }

    # Get metadata from a seperate endpoint.
    d = requests.get(
        "http://waterservices.usgs.gov/nwis/site/?sites={!s}".format(site_id))
    try:
        d.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.error(
            "Could not find lat/lon endpoint for station {!s}, skipping. Status code: {!s}"
            .format(site_id, d.status_code))
        return
    _, hz, dz = split_file(d.text, "agency_cd")
    # Strip off the one line after the headers
    dz = dz[1:]
    dfz = pd.DataFrame(dz, columns=hz)
    lat = float(dfz["dec_lat_va"][0])
    lon = float(dfz["dec_long_va"][0])
    sensor_vertical_datum = dfz["alt_datum_cd"][0] or "NAVD88"
    try:
        z = float(dfz["alt_va"][0])
    except ValueError:
        z = 0.
    loc = "POINT({!s} {!s} {!s})".format(lon, lat, z)
    name = dfz["station_nm"][0]

    comments, headers, data = split_file(contents, "agency_cd")
    df = pd.DataFrame(data, columns=headers)

    fillvalue = -9999.9

    # Combine date columns
    dates = df["datetime"]
    tz = df["tz_cd"]
    new_dates = list()
    for i in range(len(dates)):
        try:
            new_dates.append(
                parse(dates[i] + " " + tz[i]).astimezone(pytz.utc))
        except BaseException:
            # Remove row.  Bad date.
            df.drop(i, axis=0, inplace=True)
            continue
    df['time'] = new_dates
    df['depth'] = [z for x in range(len(df['time']))]

    # Strip out "_cd" columns (quality checks for USGS)
    for h in headers:
        if "_cd" in h:
            df.drop(h, axis=1, inplace=True)

    # Add global attributes to appear in the resulting NetCDF file
    global_attributes = dict(
        title=name,
        summary=
        'USGS Hurricane Sandy Rapid Response Stations.  Data acquired from "http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.',
        keywords=
        "usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s"
        % site_id,
        keywords_vocaublary="None",
        naming_authority='gov.usgs',
        id=site_id,
        cdm_data_type="Station",
        history="NetCDF file generated from {!s}".format(csv_link),
        creator="USGS",
        creator_url="http://waterdata.usgs.gov",
        creator_institution="USGS",
        creator_urn="gov.usgs",
        publisher="Axiom Data Science",
        publisher_uri="http://axiomdatascience.com",
        processing_level="None",
        acknowledgement="None",
        geospatial_bounds=loc,
        geospatial_lat_min=lat,
        geospatial_lat_max=lat,
        geospatial_lon_min=lon,
        geospatial_lon_max=lon,
        license="Freely Distributed",
        date_created=datetime.utcnow().replace(second=0,
                                               microsecond=0).isoformat())

    def to_floats(x):
        try:
            return float(x)
        except ValueError:
            return fillvalue

    min_time = df['time'].min()
    max_time = df['time'].max()

    full_station_urn = "urn:ioos:station:{!s}:{!s}".format(
        global_attributes["naming_authority"], site_id)
    if output_format == 'cf16':
        output_filename = '{}_{}-{}.nc'.format(
            site_id, min_time.strftime('%Y%m%dT%H%M%S'),
            max_time.strftime('%Y%m%dT%H%M%S'))
        times = [calendar.timegm(x.timetuple()) for x in df["time"]]
        verticals = df['depth'].values
        ts = TimeSeries(output,
                        latitude=lat,
                        longitude=lon,
                        station_name=full_station_urn,
                        global_attributes=global_attributes,
                        output_filename=output_filename,
                        times=times,
                        verticals=verticals,
                        vertical_axis_name='height',
                        vertical_positive='down')

    for var in df.columns:
        if var in [
                'datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd'
        ]:
            continue

        try:
            var_meta = variable_map[var]
        except KeyError:
            logger.error(
                "Variable {!s} was not found in variable map!".format(var))
            continue

        # Convert to floats
        df[var] = df[var].map(to_floats)

        # Change feet to meters
        if var_meta["units"] in ["feet", "ft"]:
            df[var] = np.asarray(
                [v * 0.3048 if v != fillvalue else v for v in df[var]])
            var_meta["units"] = "meters"

        if output_format == 'axiom':
            full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(
                global_attributes["naming_authority"], site_id,
                var_meta["standard_name"])
            output_directory = os.path.join(output, full_sensor_urn)
            output_filename = '{}_{}-{}.nc'.format(
                var, min_time.strftime('%Y%m%dT%H%M%S'),
                max_time.strftime('%Y%m%dT%H%M%S'))
            ts = TimeSeries.from_dataframe(
                df,
                output_directory,
                output_filename,
                lat,
                lon,
                full_station_urn,
                global_attributes,
                var_meta["standard_name"],
                var_meta,
                sensor_vertical_datum=sensor_vertical_datum,
                fillvalue=fillvalue,
                data_column=var,
                vertical_axis_name='height',
                vertical_positive='down')
            ts.add_instrument_metadata(urn=full_sensor_urn)
        elif output_format == 'cf16':
            # Variable names shouldn't start with a number
            try:
                int(var[0])
                variable_name = 'v_{}'.format(var)
            except:
                variable_name = var
            ts.add_variable(variable_name,
                            values=df[var].values,
                            attributes=var_meta,
                            fillvalue=fillvalue,
                            sensor_vertical_datum=sensor_vertical_datum)
示例#40
0
def main(output_format, output, do_download, download_folder, filesubset=None):

    if do_download is True:

        try:
            os.makedirs(download_folder)
        except OSError:
            pass

        waf = 'http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/'

        r = requests.get(waf)
        soup = BeautifulSoup(r.text, "lxml")

        for link in soup.find_all('a'):

            # Skip non .txt files
            site_id, ext = os.path.splitext(link['href'])
            if ext != ".txt":
                continue

            if filesubset and site_id.lower() not in filesubset:
                # Skip this file!
                continue

            csv_link = waf + link['href']
            logger.info("Downloading '{}'".format(csv_link))
            d = requests.get(csv_link)
            try:
                d.raise_for_status()
            except requests.exceptions.HTTPError:
                logger.error("Could not download: {!s}, skipping. Status code: {!s}".format(csv_link, d.status_code))
                continue

            with open(os.path.join(download_folder, os.path.basename(csv_link)), 'wt') as f:
                f.write(d.text)

    # Yes, this uses lots of RAM, but we need to match up lon/lat positions later on.
    results = []
    for datafile in os.listdir(download_folder):

        site_id = os.path.splitext(os.path.basename(datafile))[0]

        if filesubset and site_id.lower() not in filesubset:
            # Skip this file!
            continue

        with open(os.path.join(download_folder, datafile)) as d:
            contents = d.read()
            r = None
            for line in contents.split("\n"):
                if "agency_cd" in line:
                    r = parse_type_1(output_format, site_id, contents, output)
                    break
                elif "date_time_GMT" in line:
                    r = parse_type_2(output_format, site_id, contents, output)
                    break
                else:
                    continue

            if r is None:
                logger.error('Could not process file: {}'.format(datafile))
            else:
                logger.info("Processed {}".format(datafile))
                results.append(r)

    results = sorted(results, key=attrgetter('lon', 'lat'))
    gresults = groupby(results, attrgetter('lon', 'lat'))

    for (glon, glat), group in gresults:

        groups = [ x for x in list(group) if x ]

        # Strip off the variable type if need be
        gsite = groups[0].site
        if gsite[-2:] in ['WV', 'BP', 'WL']:
            gsite = gsite[:-2]

        for result in groups:

            gas = get_globals(glat, glon, result.z, result.name, gsite)
            station_urn = IoosUrn(asset_type='station',
                                  authority=gas['naming_authority'],
                                  label=gsite)

            if output_format == 'cf16':
                # If CF, a file for each result dataframe
                times = [ calendar.timegm(x.timetuple()) for x in result.df['time'] ]
                verticals = result.df['depth'].values
                output_filename = '{}.nc'.format(result.site)
                ts = TimeSeries(output, latitude=glat, longitude=glon, station_name=gsite, global_attributes=gas, output_filename=output_filename, times=times, verticals=verticals)

            for var in result.df.columns:
                if var in ['date_time_GMT', 'datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd']:
                    continue

                try:
                    var_meta = copy(variable_map[var])
                except KeyError:
                    logger.error("Variable {!s} was not found in variable map!".format(var))
                    continue

                # Convert to floats
                result.df[var] = result.df[var].map(to_floats)
                if var_meta["units"].lower() in ["feet", "ft"]:
                    result.df[var] = result.df[var].apply(lambda x: None if pd.isnull(x) else x * 0.3048)
                    var_meta["units"] = "meters"
                elif var_meta["units"].lower() in ["psi"]:
                    result.df[var] = result.df[var].apply(lambda x: None if pd.isnull(x) else x * 68.9476)
                    var_meta["units"] = "mbar"
                elif var_meta["units"].lower() in ['millimeters of mercury']:
                    result.df[var] = result.df[var].apply(lambda x: None if pd.isnull(x) else x * 1.33322)
                    var_meta["units"] = "mbar"

                # Now put the fillvalue we want to be interpreted
                result.df.fillna(fillvalue, inplace=True)

                if output_format == 'axiom':
                    # If Axiom, a file for each variable
                    output_directory = os.path.join(output, gsite)
                    output_filename = '{}_{}.nc'.format(result.site, var_meta['standard_name'])
                    ts = TimeSeries.from_dataframe(result.df, output_directory, output_filename, glat, glon, station_urn.urn, gas, var_meta["standard_name"], var_meta, sensor_vertical_datum='NAVD88', fillvalue=fillvalue, data_column=var, vertical_axis_name='height')
                    sensor_urn = urnify(station_urn.authority, station_urn.label, var_meta)
                    ts.add_instrument_metadata(urn=sensor_urn)
                elif output_format == 'cf16':
                    # If CF, add variable to existing TimeSeries
                    try:
                        int(var[0])
                        variable_name = 'v_{}'.format(var)
                    except BaseException:
                        variable_name = var
                    ts.add_variable(variable_name, values=result.df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum='NAVD88')
                verticals=df.depth.values,
                output_filename=ofile,
                vertical_positive='down')

# ### Add data variables

# In[9]:

df.columns.tolist()

# In[10]:

for c in df.columns:
    if c in ts._nc.variables:
        print("Skipping '{}' (already in file)".format(c))
        continue
    if c in ['time', 'lat', 'lon', 'depth', 'cpm_date_time_string']:
        print("Skipping axis '{}' (already in file)".format(c))
        continue
    print("Adding {}".format(c))
    try:
        ts.add_variable(c, df[c].values)
    except:
        print('skipping, hit object')

# In[13]:

df['error_flag1'].dtype.name

# In[ ]:
示例#42
0
# In[11]:

for c in df.columns:
    if c in ts._nc.variables:
        print("Skipping '{}' (already in file)".format(c))
        continue
    if c in ['time', 'lat', 'lon', 'depth', 'cpm_date_time_string']:
        print("Skipping axis '{}' (already in file)".format(c))
        continue
    if 'object' in df[c].dtype.name:
        print("Skipping object {}".format(c))
        continue

    print("Adding {}".format(c))
    # add variable values and variable attributes here
    ts.add_variable(c, df[c].values, attributes=atts.get(c))

# In[12]:

df['error_flag3'][0]

# In[13]:

ts.ncd

# In[14]:

import netCDF4
nc = netCDF4.Dataset(outfile)

# In[15]: