def test_timeseries_extra_values(self): """ This will map directly to the time variable and ignore any time indexes that are not found. The 'times' parameter to add_variable should be the same length as the values parameter. """ filename = 'test_timeseries_extra_values.nc' times = [0, 1000, 2000, 3000, 4000, 5000] verticals = None ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = [20, 21, 22, 23, 24, 25, 26, 27, 28] value_times = [0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000] attrs = dict(standard_name='sea_water_temperature') ts.add_variable('temperature', values=values, attributes=attrs, times=value_times) ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None assert nc.variables.get('time').size == len(times) assert nc.variables.get('temperature').size == len(times) assert (nc.variables.get('temperature')[:] == np.asarray(values[0:6])).all()
def test_timeseries_profile(self): filename = 'test_timeseries_profile.nc' times = [0, 1000, 2000, 3000, 4000, 5000] verticals = [0, 1, 2] ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals)) attrs = dict(standard_name='sea_water_temperature') ts.add_variable('temperature', values=values, attributes=attrs) ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None assert nc.variables.get('time').size == len(times) assert nc.variables.get('z').size == len(verticals) assert nc.variables.get('z').positive == 'down' assert nc.variables.get('temperature').size == len(times) * len(verticals) assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all()
def test_timeseries_many_variables(self): filename = 'test_timeseries_many_variables.nc' times = [0, 1000, 2000, 3000, 4000, 5000] verticals = [0, 1, 2] ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals)) bottom_values = [30, 31, 32, 33, 34, 35] full_masked = values.view(np.ma.MaskedArray) full_masked.mask = True attrs = dict(standard_name='sea_water_temperature') ts.add_variable('temperature', values=values, attributes=attrs) ts.add_variable('salinity', values=values.reshape((len(times), len(verticals)))) ts.add_variable('dissolved_oxygen', values=full_masked, fillvalue=full_masked.fill_value) ts.add_variable('bottom_temperature', values=bottom_values, verticals=[60], unlink_from_profile=True, attributes=attrs) ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None assert nc.variables.get('time').size == len(times) assert nc.variables.get('z').size == len(verticals) assert nc.variables.get('temperature').size == len(times) * len(verticals) assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all() assert (nc.variables.get('salinity')[:] == values.reshape((len(times), len(verticals)))).all() assert nc.variables.get('dissolved_oxygen')[:].mask.all()
def test_timeseries_profile_with_bottom_temperature(self): filename = 'test_timeseries_profile_with_bottom_temperature.nc' times = [0, 1000, 2000, 3000, 4000, 5000] verticals = [0, 1, 2] ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals)) bottom_values = [30, 31, 32, 33, 34, 35] attrs = dict(standard_name='sea_water_temperature') ts.add_variable('temperature', values=values, attributes=attrs) ts.add_variable('bottom_temperature', values=bottom_values, verticals=[60], unlink_from_profile=True, attributes=attrs) ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None assert nc.variables.get('time').size == len(times) assert nc.variables.get('z').size == len(verticals) assert nc.variables.get('temperature').size == len(times) * len(verticals) assert nc.variables.get('sensor_depth') is not None assert nc.variables.get('bottom_temperature').size == len(times) assert (nc.variables.get('temperature')[:] == values.reshape((len(times), len(verticals)))).all() assert (nc.variables.get('bottom_temperature')[:] == np.asarray(bottom_values)).all()
def test_from_variable(self): filename = 'test_urn_from_variable.nc' times = [0, 1000, 2000, 3000, 4000, 5000] verticals = None ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = [20, 21, 22, 23, 24, 25] attrs = dict(standard_name='lwe_thickness_of_precipitation_amount', vertical_datum='NAVD88') ts.add_variable('temperature', values=values, attributes=attrs) ts.ncd.sync() urn = urnify('axiom', 'foo', ts.ncd.variables['temperature']) assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#vertical_datum=navd88' values = [20, 21, 22, 23, 24, 25] attrs = dict(standard_name='lwe_thickness_of_precipitation_amount', cell_methods='time: variance (interval: PT1H comment: sampled instantaneously)') ts.add_variable('temperature2', values=values, attributes=attrs) ts.ncd.sync() urn = urnify('axiom', 'foo', ts.ncd.variables['temperature2']) assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:variance;interval=pt1h' values = [20, 21, 22, 23, 24, 25] attrs = dict(standard_name='lwe_thickness_of_precipitation_amount', cell_methods='time: variance time: mean (interval: PT1H comment: sampled instantaneously)') ts.add_variable('temperature3', values=values, attributes=attrs) ts.ncd.sync() urn = urnify('axiom', 'foo', ts.ncd.variables['temperature3']) assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount#cell_methods=time:mean,time:variance;interval=pt1h' values = [20, 21, 22, 23, 24, 25] attrs = dict(standard_name='lwe_thickness_of_precipitation_amount', cell_methods='time: variance time: mean (interval: PT1H comment: sampled instantaneously)', discriminant='2') ts.add_variable('temperature4', values=values, attributes=attrs) ts.ncd.sync() urn = urnify('axiom', 'foo', ts.ncd.variables['temperature4']) assert urn == 'urn:ioos:sensor:axiom:foo:lwe_thickness_of_precipitation_amount-2#cell_methods=time:mean,time:variance;interval=pt1h' ts.close()
def test_timeseries_profile_fill_value_in_z(self): filename = 'test_timeseries_profile_fill_value_in_z.nc' times = [0, 1000, 2000, 3000, 4000, 5000] # Vertical fills MUST be at the BEGINNING of the array!!!! verticals = [self.fillvalue, 0] ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = [self.fillvalue, 20, self.fillvalue, 21, self.fillvalue, 22, self.fillvalue, 23, self.fillvalue, 24, self.fillvalue, 25] attrs = dict(standard_name='sea_water_temperature') ts.add_variable('temperature', values=values, attributes=attrs, fillvalue=self.fillvalue) ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None assert nc.variables.get('time').size == len(times) assert nc.variables.get('z').size == len(verticals) assert nc.variables.get('temperature').size == len(times) * len(verticals) assert nc.variables.get('temperature')[:][0][1] == 20 assert nc.variables.get('temperature')[:].mask[0][0] == True assert nc.variables.get('temperature')[:][1][1] == 21 assert nc.variables.get('temperature')[:].mask[1][0] == True assert nc.variables.get('temperature')[:][2][1] == 22 assert nc.variables.get('temperature')[:].mask[2][0] == True assert nc.variables.get('temperature')[:][3][1] == 23 assert nc.variables.get('temperature')[:].mask[3][0] == True assert nc.variables.get('temperature')[:][4][1] == 24 assert nc.variables.get('temperature')[:].mask[4][0] == True assert nc.variables.get('temperature')[:][5][1] == 25 assert nc.variables.get('temperature')[:].mask[5][0] == True assert (nc.variables.get('temperature')[:] == np.asarray(values).reshape((len(times), len(verticals)))).all()
def test_timeseries_profile_unsorted_time_and_z(self): filename = 'test_timeseries_profile_unsorted_time_and_z.nc' times = [5000, 1000, 2000, 3000, 4000, 0] verticals = [0, 50] ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=filename, times=times, verticals=verticals) values = np.repeat([20, 21, 22, 23, 24, 25], len(verticals)) attrs = dict(standard_name='sea_water_temperature') ts.add_variable('temperature', values=values, attributes=attrs, fillvalue=self.fillvalue) ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None assert nc.variables.get('time').size == len(times) assert nc.variables.get('z').size == len(verticals) assert nc.variables.get('temperature').size == len(times) * len(verticals) assert nc.variables.get('temperature')[:][0][0] == 25 assert nc.variables.get('temperature')[:][0][1] == 25 assert nc.variables.get('temperature')[:][1][0] == 21 assert nc.variables.get('temperature')[:][1][1] == 21 assert nc.variables.get('temperature')[:][2][0] == 22 assert nc.variables.get('temperature')[:][2][1] == 22 assert nc.variables.get('temperature')[:][3][0] == 23 assert nc.variables.get('temperature')[:][3][1] == 23 assert nc.variables.get('temperature')[:][4][0] == 24 assert nc.variables.get('temperature')[:][4][1] == 24 assert nc.variables.get('temperature')[:][5][0] == 20 assert nc.variables.get('temperature')[:][5][1] == 20
def parse_type_1(output_format, site_id, contents, output, csv_link): """ # ---------------------------------- WARNING ---------------------------------------- # The data you have obtained from this automated U.S. Geological Survey database # have not received Director's approval and as such are provisional and subject to # revision. The data are released on the condition that neither the USGS nor the # United States Government may be held liable for any damages resulting from its use. # Additional info: http://waterdata.usgs.gov/ga/nwis/help/?provisional # # File-format description: http://waterdata.usgs.gov/nwis/?tab_delimited_format_info # Automated-retrieval info: http://waterdata.usgs.gov/nwis/?automated_retrieval_info # # Contact: [email protected] # retrieved: 2012-11-20 12:05:22 EST (caww01) # # Data for the following 1 site(s) are contained in this file # USGS 395740074482628 South Branch Rancocas Cr at S Main St nr Lumberton # ----------------------------------------------------------------------------------- # # Data provided for site 395740074482628 # DD parameter Description # 03 00035 Wind speed, miles per hour # 07 00025 Barometric pressure, millimeters of mercury # 09 00045 Precipitation, total, inches # 19 63160 Stream water level elevation above NAVD 1988, in feet # # Data-value qualification codes included in this output: # P Provisional data subject to revision. # agency_cd site_no datetime tz_cd 03_00035 03_00035_cd 07_00025 07_00025_cd 09_00045 09_00045_cd 19_63160 19_63160_cd 5s 15s 20d 6s 14n 10s 14n 10s 14n 10s 14n 10s USGS 395740074482628 2012-10-28 13:00 EST 4.2 P 755 P 3.22 P USGS 395740074482628 2012-10-28 13:15 EST 6.4 P 754 P 0.00 P 3.36 P USGS 395740074482628 2012-10-28 13:30 EST 3.6 P 754 P 0.00 P 3.50 P USGS 395740074482628 2012-10-28 13:45 EST 3.2 P 754 P 0.00 P 3.63 P USGS 395740074482628 2012-10-28 14:00 EST 7.0 P 754 P 0.00 P 3.76 P USGS 395740074482628 2012-10-28 14:15 EST 4.0 P 754 P 0.00 P 3.87 P ... """ # lat/lon point: http://waterservices.usgs.gov/nwis/site/?sites=395740074482628 variable_map = { '01_00065' : {'long_name' : 'Gage height', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, '03_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'}, '04_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'}, '05_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'}, '06_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'}, '04_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'}, '02_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'}, '05_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'}, '07_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'}, '09_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'}, '03_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'}, '08_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'}, '09_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'}, '06_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'}, '07_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'}, '08_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'}, '05_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'}, '06_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'}, '07_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'}, '19_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, '01_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, } # Get metadata from a seperate endpoint. d = requests.get("http://waterservices.usgs.gov/nwis/site/?sites={!s}".format(site_id)) try: d.raise_for_status() except requests.exceptions.HTTPError: logger.error("Could not find lat/lon endpoint for station {!s}, skipping. Status code: {!s}".format(site_id, d.status_code)) return _, hz, dz = split_file(d.text, "agency_cd") # Strip off the one line after the headers dz = dz[1:] dfz = pd.DataFrame(dz, columns=hz) lat = float(dfz["dec_lat_va"][0]) lon = float(dfz["dec_long_va"][0]) sensor_vertical_datum = dfz["alt_datum_cd"][0] or "NAVD88" try: z = float(dfz["alt_va"][0]) except ValueError: z = 0. loc = "POINT({!s} {!s} {!s})".format(lon, lat, z) name = dfz["station_nm"][0] comments, headers, data = split_file(contents, "agency_cd") df = pd.DataFrame(data, columns=headers) fillvalue = -9999.9 # Combine date columns dates = df["datetime"] tz = df["tz_cd"] new_dates = list() for i in range(len(dates)): try: new_dates.append(parse(dates[i] + " " + tz[i]).astimezone(pytz.utc)) except BaseException: # Remove row. Bad date. df.drop(i, axis=0, inplace=True) continue df['time'] = new_dates df['depth'] = [ z for x in range(len(df['time'])) ] # Strip out "_cd" columns (quality checks for USGS) for h in headers: if "_cd" in h: df.drop(h, axis=1, inplace=True) # Add global attributes to appear in the resulting NetCDF file global_attributes = dict( title=name, summary='USGS Hurricane Sandy Rapid Response Stations. Data acquired from "http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.', keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id, keywords_vocaublary="None", naming_authority='gov.usgs', id=site_id, cdm_data_type="Station", history="NetCDF file generated from {!s}".format(csv_link), creator="USGS", creator_url="http://waterdata.usgs.gov", creator_institution="USGS", creator_urn="gov.usgs", publisher="Axiom Data Science", publisher_uri="http://axiomdatascience.com", processing_level="None", acknowledgement="None", geospatial_bounds=loc, geospatial_lat_min=lat, geospatial_lat_max=lat, geospatial_lon_min=lon, geospatial_lon_max=lon, license="Freely Distributed", date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat() ) def to_floats(x): try: return float(x) except ValueError: return fillvalue min_time = df['time'].min() max_time = df['time'].max() full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id) if output_format == 'cf16': output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) times = [ calendar.timegm(x.timetuple()) for x in df["time"] ] verticals = df['depth'].values ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals, vertical_axis_name='z') for var in df.columns: if var in ['datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd']: continue try: var_meta = variable_map[var] except KeyError: logger.error("Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats df[var] = df[var].map(to_floats) # Change feet to meters if var_meta["units"] in ["feet", "ft"]: df[var] = np.asarray([ v * 0.3048 if v != fillvalue else v for v in df[var] ]) var_meta["units"] = "meters" if output_format == 'axiom': full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"]) output_directory = os.path.join(output, full_sensor_urn) output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var, vertical_axis_name='height') ts.add_instrument_metadata(urn=full_sensor_urn) ts.close() elif output_format == 'cf16': # Variable names shouldn't start with a number try: int(var[0]) variable_name = 'v_{}'.format(var) except: variable_name = var ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum) if output_format == 'cf16': ts.close()
def parse_type_2(output_format, site_id, contents, output, csv_link): """ # These data are provisional and subject to revision. # Data processed as of 12/05/2012 11:54:29. # Data collected as part of Hurricane Sandy (2012) Storm Tide project. # Data are archived at http://water.usgs.gov/floods/events/2012/isaac/index.php # Elevation determined from GPS surveys (NAVD 88). # Time datum is GMT (Greenwich Mean Time). # Water density estimated on basis of sensor location # where saltwater = 63.989 lb/ft3 (Saltwater = dissolved solids concentration greater than 20000 milligrams per liter) # where brackish water = 63.052 lb/ft3 (Brackish water = dissolved solids concentration between 1000 and 20000 milligrams per liter) # where freshwater = 62.428 lb/ft3 (Freshwater = dissolved solids concentration less than 1000 milligrams per liter) # The equation used to compute elevation from recorded pressure is # (((sp-bp)*144)/d)+e # Where sp = surge pressure in psi; bp = barometric pressure in psi; # d = water density in lb/ft3; and e = elevation of sensor in ft above NAVD 88. # Barometric data from nearest pressure sensor. Location for the barometric sensor is listed below. # Elevation is computer-rounded to two decimal places. # Sensor information # Site id = SSS-NY-WES-001WL # Site type = water level # Horizontal datum used is NAD 83 # Sensor location latitude 40.942755 # Sensor location longitude -73.719828 # Sensor elevation above NAVD 88 = -3.97 ft # Lowest recordable water elevation is -3.90 ft # Water density value used = 63.989 lb/ft3 # Barometric sensor site (source of bp) = SSS-NY-WES-002BP # Barometric sensor location latitude 40.90754368 # Barometric sensor location longitude -73.8692184 date_time_GMT elevation nearest_barometric_sensor_psi 10-28-2012 06:00:00 0.88 14.5145 10-28-2012 06:00:30 0.86 14.5145 10-28-2012 06:01:00 0.85 14.5170 10-28-2012 06:01:30 0.85 14.5145 10-28-2012 06:02:00 0.84 14.5170 10-28-2012 06:02:30 0.81 14.5145 10-28-2012 06:03:00 0.76 14.5145 ... """ variable_map = { 'elevation' : {'long_name' : 'Water Level Elevation above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, } def to_floats(x): try: return float(x) except ValueError: return fillvalue comments, headers, data = split_file(contents, "date_time_GMT") df = pd.DataFrame(data, columns=headers) fillvalue = -9999.9 lat = None lon = None z = 0 name = site_id sensor_vertical_datum = "NAVD88" for c in comments: if "Sensor location latitude" in c: lat = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1]) elif "Sensor location longitude" in c: lon = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1]) elif "Site id" in c: site_id = filter(None, map(lambda x: x.strip(), c.split(" ")))[-1] name = site_id elif "Sensor elevation" in c: sensor_vertical_datum = "".join(c.split("=")[0].split(" ")[4:6]) l = filter(None, map(lambda x: x.strip(), c.split(" "))) z = float(l[-2]) if l[-1] in ["feet", "ft"]: z *= 0.3048 loc = "POINT({!s} {!s} {!s})".format(lon, lat, z) df['time'] = df["date_time_GMT"].map(lambda x: parse(x + " UTC")) df['depth'] = [ z for x in range(len(df['time'])) ] # Add global attributes to appear in the resulting NetCDF file global_attributes = dict( title=name, summary='USGS Hurricane Sandy Rapid Response Stations. Data acquired from http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.', keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id, keywords_vocaublary="None", naming_authority='gov.usgs', id=site_id, cdm_data_type="Station", history="NetCDF file generated from {!s}".format(csv_link), creator="USGS", creator_url="http://waterdata.usgs.gov", creator_institution="USGS", creator_urn="gov.usgs", publisher="Axiom Data Science", publisher_uri="http://axiomdatascience.com", processing_level="None", acknowledgement="None", geospatial_bounds=loc, geospatial_lat_min=lat, geospatial_lat_max=lat, geospatial_lon_min=lon, geospatial_lon_max=lon, license="Freely Distributed", date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat() ) full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id) min_time = df["time"].min() max_time = df["time"].max() if output_format == 'cf16': times = [ calendar.timegm(x.timetuple()) for x in df['time'] ] verticals = df['depth'].values output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals) for var in df.columns: if var in ['date_time_GMT', 'time', 'depth']: continue try: int(var[0]) variable_name = 'v_{}'.format(var) except: variable_name = var try: var_meta = variable_map[var] except KeyError: logger.error("Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats df[var] = df[var].map(to_floats) if var_meta["units"] in ["feet", "ft"]: df[var] = [ v * 0.3048 if v != fillvalue else v for v in df[var] ] var_meta["units"] = "meters" if output_format == 'axiom': full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"]) output_directory = os.path.join(output, full_sensor_urn) output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var) ts.add_instrument_metadata(urn=full_sensor_urn) ts.close() elif output_format == 'cf16': ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum) if output_format == 'cf16': ts.close()
def main(output, download_folder, do_download, projects, csv_metadata_file, filesubset=None): project_metadata = dict() with open(csv_metadata_file, 'r') as f: reader = csv.DictReader(f) for row in reader: project_name = row['project_name'] if isinstance(project_name, str) and project_name[0] == '#': continue if projects and project_name.lower() not in projects: # Skip projects if a subset was defined continue project_metadata[project_name] = dict() for k, v in row.items(): project_metadata[project_name][k] = v if do_download: try: downloaded_files = download(download_folder, project_metadata, filesubset) except KeyboardInterrupt: logger.exception('Error downloading datasets from THREDDS') downloaded_files = [] else: downloaded_files = glob(os.path.join(download_folder, "*")) for down_file in downloaded_files: if filesubset is not None: if os.path.basename(down_file).lower() not in filesubset: # aka "9631ecp-a.nc" # Skip this file! continue if projects: tmpnc = netCDF4.Dataset(down_file) project_name, _ = tmpnc.id.split("/") nc_close(tmpnc) if project_name.lower() not in projects: # Skip this project! continue _, temp_file = tempfile.mkstemp(prefix='cmg_collector', suffix='nc') shutil.copy(down_file, temp_file) nc = None try: # Cleanup to CF-1.6 first_time = normalize_time(temp_file) normalize_epic_codes(temp_file) normalize_vectors(temp_file) normalize_units(temp_file) # Create list of variables that we want to save. mooring_id = None latitude = None longitude = None nc = netCDF4.Dataset(temp_file) project_name, _ = nc.id.split("/") feature_name, _ = os.path.splitext(os.path.basename(down_file)) fname = os.path.basename(down_file) try: if int(fname[0]) <= 9 and int(fname[0]) >= 2: # 1.) everything with first char between 2-9 is 3-digit mooring_id = int(fname[0:3]) elif int(fname[0]) == 1: # 2.) if MOORING starts with 1, and data is newer than 2014, it's 4 digit, otherwise 3 digit. if first_time > datetime(2014, 1, 1, 0): # 4 digit if after Jan 1, 2014 mooring_id = int(fname[0:4]) else: # 3 digit if before mooring_id = int(fname[0:3]) except ValueError: logger.exception("Could not create a suitable station_id. Skipping {0}.".format(down_file)) continue try: latitude = nc.variables.get("lat")[0] longitude = nc.variables.get("lon")[0] except IndexError: latitude = nc.variables.get("lat")[:] longitude = nc.variables.get("lon")[:] file_name = os.path.basename(down_file) output_directory = os.path.join(output, project_name) logger.info("Translating {0} into CF1.6 format: {1}".format(down_file, os.path.abspath(os.path.join(output_directory, file_name)))) if not os.path.isdir(output_directory): os.makedirs(output_directory) file_global_attributes = { k : getattr(nc, k) for k in nc.ncattrs() } file_global_attributes.update(global_attributes) file_global_attributes['id'] = feature_name file_global_attributes['title'] = '{0} - {1}'.format(project_name, os.path.basename(down_file)) file_global_attributes['MOORING'] = mooring_id file_global_attributes['original_filename'] = fname file_global_attributes['original_folder'] = project_name if project_name in project_metadata: for k, v in project_metadata[project_name].items(): if v and k.lower() not in ['id', 'title', 'catalog_xml', 'project_name']: file_global_attributes[k] = v times = nc.variables.get('time')[:] # Get all depth values depth_variables = [] for dv in nc.variables: depth_variables += [ x for x in nc.variables.get(dv).dimensions if 'depth' in x ] depth_variables = sorted(list(set(depth_variables))) depth_values = np.asarray([ nc.variables.get(x)[:] for x in depth_variables ]).flatten() # Convert everything to positive up, unless it is specifically specified as "up" already depth_conversion = -1.0 if depth_variables: pull_positive = nc.variables.get(depth_variables[0]) if pull_positive and hasattr(pull_positive, 'positive') and pull_positive.positive.lower() == 'up': depth_conversion = 1.0 depth_values = depth_values * depth_conversion ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=depth_values, output_filename=file_name, vertical_positive='up') v = [] for other in sorted(nc.variables): # Sorted for a reason... don't change! if other in coord_vars: continue old_var = nc.variables.get(other) variable_attributes = { k : getattr(old_var, k) for k in old_var.ncattrs() } # Remove/rename some attributes # https://github.com/USGS-CMG/usgs-cmg-portal/issues/67 if 'valid_range' in variable_attributes: del variable_attributes['valid_range'] if 'minimum' in variable_attributes: variable_attributes['actual_min'] = variable_attributes['minimum'] del variable_attributes['minimum'] if 'maximum' in variable_attributes: variable_attributes['actual_max'] = variable_attributes['maximum'] del variable_attributes['maximum'] if 'sensor_depth' in variable_attributes: # Convert to the correct positive "up" or "down" variable_attributes['sensor_depth'] = variable_attributes['sensor_depth'] * depth_conversion fillvalue = None if hasattr(old_var, "_FillValue"): fillvalue = old_var._FillValue # Figure out if this is a variable that is repeated at different depths # as different variable names. Assumes sorted. new_var_name = other.split('_')[0] if new_var_name in ts.ncd.variables: # Already in new file (processed when the first was encountered in the loop below) continue # Get the depth index depth_variable = [ x for x in old_var.dimensions if 'depth' in x ] if depth_variable and len(old_var.dimensions) > 1 and 'time' in old_var.dimensions: depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(depth_variable[0])[:] * depth_conversion))) # Find other variable names like this one depth_indexes = [(other, depth_index)] for search_var in sorted(nc.variables): # If they have different depth dimension names we need to combine them into one variable if search_var != other and search_var.split('_')[0] == new_var_name and \ depth_variable[0] != [ x for x in nc.variables[search_var].dimensions if 'depth' in x ][0]: # Found a match at a different depth search_depth_variable = [ x for x in nc.variables.get(search_var).dimensions if 'depth' in x ] depth_index = np.squeeze(np.where(depth_values == (nc.variables.get(search_depth_variable[0])[:] * depth_conversion))) depth_indexes.append((search_var, depth_index)) logger.info("Combining '{}' with '{}' as '{}' (different variables at different depths but are the same parameter)".format(search_var, other, new_var_name)) values = np.ma.empty((times.size, len(depth_values))) values.fill_value = fillvalue values.mask = True for nm, index in depth_indexes: values[:, index] = np.squeeze(nc.variables.get(nm)[:]) # If we just have one index we want to use the original name if len(depth_indexes) == 1: # Just use the original variable name new_var_name = other # Create this one, should be the first we encounter for this type ts.add_variable(new_var_name, values=values, times=times, fillvalue=fillvalue, attributes=variable_attributes) elif len(old_var.dimensions) == 1 and old_var.dimensions[0] == 'time': # A single time dimensioned variable, like pitch, roll, record count, etc. ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes) elif depth_variable and 'time' not in old_var.dimensions: # Metadata variable like bin distance meta_var = ts.ncd.createVariable(other, old_var.dtype, ('z',), fill_value=fillvalue) for k, v in variable_attributes.iteritems(): if k != '_FillValue': meta_var.setncattr(k, v) meta_var[:] = old_var[:] elif depth_values.size == 1 and not depth_variable and 'time' in old_var.dimensions: # There is a single depth_value for most variables, but this one does not have a depth dimension # Instead, it has a sensor_depth attribute that defines the Z index. These need to be put into # a different file to remain CF compliant. new_file_name = file_name.replace('.nc', '_{}.nc'.format(other)) new_ts = TimeSeries(output_directory, latitude, longitude, feature_name, file_global_attributes, times=times, verticals=[old_var.sensor_depth*depth_conversion], output_filename=new_file_name, vertical_positive='up') new_ts.add_variable(other, values=old_var[:], times=times, verticals=[old_var.sensor_depth*depth_conversion], fillvalue=fillvalue, attributes=variable_attributes) new_ts.close() elif depth_values.size > 1 and not depth_variable and 'time' in old_var.dimensions: if hasattr(old_var, 'sensor_depth'): # An ADCP or profiling dataset, but this variable is measued at a single depth. # Example: Bottom Temperature on an ADCP ts.add_variable(other, values=old_var[:], times=times, verticals=[old_var.sensor_depth*depth_conversion], unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes) else: ts.add_variable(other, values=old_var[:], times=times, unlink_from_profile=True, fillvalue=fillvalue, attributes=variable_attributes) else: ts.add_variable(other, values=old_var[:], times=times, fillvalue=fillvalue, attributes=variable_attributes) ts.ncd.sync() ts.ncd.close() except BaseException: logger.exception("Error. Skipping {0}.".format(down_file)) continue finally: nc_close(nc) if os.path.isfile(temp_file): os.remove(temp_file)
class TestTimeseriesTimeBounds(unittest.TestCase): def setUp(self): self.output_directory = os.path.join(os.path.dirname(__file__), "output") self.latitude = 34 self.longitude = -72 self.station_name = "PytoolsTestStation" self.global_attributes = dict(id='this.is.the.id') self.filename = 'test_timeseries_bounds.nc' self.times = [0, 1000, 2000, 3000, 4000, 5000] verticals = [0] self.ts = TimeSeries(output_directory=self.output_directory, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, output_filename=self.filename, times=self.times, verticals=verticals) self.values = [20, 21, 22, 23, 24, 25] attrs = dict(standard_name='sea_water_temperature') self.ts.add_variable('temperature', values=self.values, attributes=attrs) def tearDown(self): self.ts.close() os.remove(os.path.join(self.output_directory, self.filename)) def test_time_bounds_start(self): delta = timedelta(seconds=1000) self.ts.add_time_bounds(delta=delta, position='start') self.ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename)) assert nc.variables.get('time_bounds').shape == (len(self.times), 2,) assert (nc.variables.get('time_bounds')[:] == np.asarray([ [0, 1000], [1000, 2000], [2000, 3000], [3000, 4000], [4000, 5000], [5000, 6000] ])).all() nc.close() def test_time_bounds_middle(self): delta = timedelta(seconds=1000) self.ts.add_time_bounds(delta=delta, position='middle') self.ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename)) assert nc.variables.get('time_bounds').shape == (len(self.times), 2,) assert (nc.variables.get('time_bounds')[:] == np.asarray([ [ -500, 500], [ 500, 1500], [ 1500, 2500], [ 2500, 3500], [ 3500, 4500], [ 4500, 5500] ])).all() nc.close() def test_time_bounds_end(self): delta = timedelta(seconds=1000) self.ts.add_time_bounds(delta=delta, position='end') self.ts.close() nc = netCDF4.Dataset(os.path.join(self.output_directory, self.filename)) assert nc.variables.get('time_bounds').shape == (len(self.times), 2,) assert (nc.variables.get('time_bounds')[:] == np.asarray([ [-1000, 0], [ 0, 1000], [ 1000, 2000], [ 2000, 3000], [ 3000, 4000], [ 4000, 5000] ])).all() nc.close()