def test_attempts_empty(self): filename = 'test_attempts_empty.nc' # From dataframe TimeSeries.from_dataframe(self.df, output_directory=self.output_directory, output_filename=filename, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, variable_name=self.vname, variable_attributes=self.vatts)
def test_attempts_4(self): filename = 'test_attempts_4.nc' # From dataframe with self.assertRaises(ValueError): TimeSeries.from_dataframe(self.df, output_directory=self.output_directory, output_filename=filename, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, variable_name=self.vname, variable_attributes=self.vatts, attempts=4)
def test_timeseries_profile_from_dataframe(self): filename = 'test_timeseries_profile_from_dataframe.nc' times = [0, 1000, 2000, 3000, 4000, 5000] verticals = [0, 1, 2] values = [20, 21, 22] attrs = dict(standard_name='sea_water_temperature') # From dataframe df = pd.DataFrame({ 'depth': np.tile(verticals, 6), 'time': np.repeat([datetime.utcfromtimestamp(x) for x in times], 3), 'value': np.tile(values, 6) }) ts = TimeSeries.from_dataframe( df, output_directory=self.output_directory, output_filename=filename, latitude=self.latitude, longitude=self.longitude, station_name=self.station_name, global_attributes=self.global_attributes, variable_name='temperature', variable_attributes=attrs, attempts=4) ts.add_instrument_variable('temperature') nc = netCDF4.Dataset(os.path.join(self.output_directory, filename)) assert nc is not None # Basic metadata on all timeseries self.assertEqual(nc.cdm_data_type, 'Station') self.assertEqual(nc.geospatial_lat_units, 'degrees_north') self.assertEqual(nc.geospatial_lon_units, 'degrees_east') self.assertEqual(nc.geospatial_vertical_units, 'meters') self.assertEqual(nc.geospatial_vertical_positive, 'down') self.assertEqual(nc.featureType, 'timeSeriesProfile') self.assertEqual(nc.geospatial_vertical_resolution, '1 1') self.assertEqual(nc.geospatial_vertical_min, 0) self.assertEqual(nc.geospatial_vertical_max, 2) assert nc.variables.get('time').size == len(times) assert nc.variables.get('time')[:].dtype == np.int32 assert nc.variables.get('z').size == len(verticals) assert nc.variables.get('z').positive == 'down' assert nc.variables.get('z')[:].dtype == np.int32 assert nc.variables.get( 'temperature').size == len(times) * len(verticals) assert (nc.variables.get('temperature')[:] == np.tile( values, 6).reshape(6, 3)).all()
def parse_type_1(output_format, site_id, contents, output, csv_link): """ # ---------------------------------- WARNING ---------------------------------------- # The data you have obtained from this automated U.S. Geological Survey database # have not received Director's approval and as such are provisional and subject to # revision. The data are released on the condition that neither the USGS nor the # United States Government may be held liable for any damages resulting from its use. # Additional info: http://waterdata.usgs.gov/ga/nwis/help/?provisional # # File-format description: http://waterdata.usgs.gov/nwis/?tab_delimited_format_info # Automated-retrieval info: http://waterdata.usgs.gov/nwis/?automated_retrieval_info # # Contact: [email protected] # retrieved: 2012-11-20 12:05:22 EST (caww01) # # Data for the following 1 site(s) are contained in this file # USGS 395740074482628 South Branch Rancocas Cr at S Main St nr Lumberton # ----------------------------------------------------------------------------------- # # Data provided for site 395740074482628 # DD parameter Description # 03 00035 Wind speed, miles per hour # 07 00025 Barometric pressure, millimeters of mercury # 09 00045 Precipitation, total, inches # 19 63160 Stream water level elevation above NAVD 1988, in feet # # Data-value qualification codes included in this output: # P Provisional data subject to revision. # agency_cd site_no datetime tz_cd 03_00035 03_00035_cd 07_00025 07_00025_cd 09_00045 09_00045_cd 19_63160 19_63160_cd 5s 15s 20d 6s 14n 10s 14n 10s 14n 10s 14n 10s USGS 395740074482628 2012-10-28 13:00 EST 4.2 P 755 P 3.22 P USGS 395740074482628 2012-10-28 13:15 EST 6.4 P 754 P 0.00 P 3.36 P USGS 395740074482628 2012-10-28 13:30 EST 3.6 P 754 P 0.00 P 3.50 P USGS 395740074482628 2012-10-28 13:45 EST 3.2 P 754 P 0.00 P 3.63 P USGS 395740074482628 2012-10-28 14:00 EST 7.0 P 754 P 0.00 P 3.76 P USGS 395740074482628 2012-10-28 14:15 EST 4.0 P 754 P 0.00 P 3.87 P ... """ # lat/lon point: http://waterservices.usgs.gov/nwis/site/?sites=395740074482628 variable_map = { '01_00065': { 'long_name': 'Gage height', 'geoid_name': 'NAVD88', 'vertical_datum': 'NAVD88', 'water_surface_reference_datum': 'NAVD88', 'standard_name': 'water_surface_height_above_reference_datum', 'units': 'feet' }, '03_00035': { 'long_name': 'Wind Speed', 'standard_name': 'wind_speed', 'units': 'mph' }, '04_00035': { 'long_name': 'Wind Gust', 'standard_name': 'wind_speed_of_gust', 'units': 'mph' }, '05_00035': { 'long_name': 'Wind Speed', 'standard_name': 'wind_speed', 'units': 'mph' }, '06_00035': { 'long_name': 'Wind Gust', 'standard_name': 'wind_speed_of_gust', 'units': 'mph' }, '04_00036': { 'long_name': 'Wind Direction', 'standard_name': 'wind_from_direction', 'units': 'degrees' }, '02_00036': { 'long_name': 'Wind Direction', 'standard_name': 'wind_from_direction', 'units': 'degrees' }, '05_00025': { 'long_name': 'Air Pressure', 'standard_name': 'air_pressure', 'units': 'mm of mercury' }, '07_00025': { 'long_name': 'Air Pressure', 'standard_name': 'air_pressure', 'units': 'mm of mercury' }, '09_00025': { 'long_name': 'Air Pressure', 'standard_name': 'air_pressure', 'units': 'mm of mercury' }, '03_00045': { 'long_name': 'Total Precipitation', 'standard_name': 'lwe_thickness_of_precipitation_amount', 'units': 'inches' }, '08_00045': { 'long_name': 'Total Precipitation', 'standard_name': 'lwe_thickness_of_precipitation_amount', 'units': 'inches' }, '09_00045': { 'long_name': 'Total Precipitation', 'standard_name': 'lwe_thickness_of_precipitation_amount', 'units': 'inches' }, '06_00052': { 'long_name': 'Relative Humidity', 'standard_name': 'relative_humidity', 'units': 'percent' }, '07_00052': { 'long_name': 'Relative Humidity', 'standard_name': 'relative_humidity', 'units': 'percent' }, '08_00052': { 'long_name': 'Relative Humidity', 'standard_name': 'relative_humidity', 'units': 'percent' }, '05_00020': { 'long_name': 'Air Temperature', 'standard_name': 'air_temperature', 'units': 'degrees_Celsius' }, '06_00020': { 'long_name': 'Air Temperature', 'standard_name': 'air_temperature', 'units': 'degrees_Celsius' }, '07_00020': { 'long_name': 'Air Temperature', 'standard_name': 'air_temperature', 'units': 'degrees_Celsius' }, '19_63160': { 'long_name': 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name': 'NAVD88', 'vertical_datum': 'NAVD88', 'water_surface_reference_datum': 'NAVD88', 'standard_name': 'water_surface_height_above_reference_datum', 'units': 'feet' }, '01_63160': { 'long_name': 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name': 'NAVD88', 'vertical_datum': 'NAVD88', 'water_surface_reference_datum': 'NAVD88', 'standard_name': 'water_surface_height_above_reference_datum', 'units': 'feet' }, } # Get metadata from a seperate endpoint. d = requests.get( "http://waterservices.usgs.gov/nwis/site/?sites={!s}".format(site_id)) try: d.raise_for_status() except requests.exceptions.HTTPError: logger.error( "Could not find lat/lon endpoint for station {!s}, skipping. Status code: {!s}" .format(site_id, d.status_code)) return _, hz, dz = split_file(d.text, "agency_cd") # Strip off the one line after the headers dz = dz[1:] dfz = pd.DataFrame(dz, columns=hz) lat = float(dfz["dec_lat_va"][0]) lon = float(dfz["dec_long_va"][0]) sensor_vertical_datum = dfz["alt_datum_cd"][0] or "NAVD88" try: z = float(dfz["alt_va"][0]) except ValueError: z = 0. loc = "POINT({!s} {!s} {!s})".format(lon, lat, z) name = dfz["station_nm"][0] comments, headers, data = split_file(contents, "agency_cd") df = pd.DataFrame(data, columns=headers) fillvalue = -9999.9 # Combine date columns dates = df["datetime"] tz = df["tz_cd"] new_dates = list() for i in range(len(dates)): try: new_dates.append( parse(dates[i] + " " + tz[i]).astimezone(pytz.utc)) except BaseException: # Remove row. Bad date. df.drop(i, axis=0, inplace=True) continue df['time'] = new_dates df['depth'] = [z for x in range(len(df['time']))] # Strip out "_cd" columns (quality checks for USGS) for h in headers: if "_cd" in h: df.drop(h, axis=1, inplace=True) # Add global attributes to appear in the resulting NetCDF file global_attributes = dict( title=name, summary= 'USGS Hurricane Sandy Rapid Response Stations. Data acquired from "http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.', keywords= "usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id, keywords_vocaublary="None", naming_authority='gov.usgs', id=site_id, cdm_data_type="Station", history="NetCDF file generated from {!s}".format(csv_link), creator="USGS", creator_url="http://waterdata.usgs.gov", creator_institution="USGS", creator_urn="gov.usgs", publisher="Axiom Data Science", publisher_uri="http://axiomdatascience.com", processing_level="None", acknowledgement="None", geospatial_bounds=loc, geospatial_lat_min=lat, geospatial_lat_max=lat, geospatial_lon_min=lon, geospatial_lon_max=lon, license="Freely Distributed", date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat()) def to_floats(x): try: return float(x) except ValueError: return fillvalue min_time = df['time'].min() max_time = df['time'].max() full_station_urn = "urn:ioos:station:{!s}:{!s}".format( global_attributes["naming_authority"], site_id) if output_format == 'cf16': output_filename = '{}_{}-{}.nc'.format( site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) times = [calendar.timegm(x.timetuple()) for x in df["time"]] verticals = df['depth'].values ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals, vertical_axis_name='height', vertical_positive='down') for var in df.columns: if var in [ 'datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd' ]: continue try: var_meta = variable_map[var] except KeyError: logger.error( "Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats df[var] = df[var].map(to_floats) # Change feet to meters if var_meta["units"] in ["feet", "ft"]: df[var] = np.asarray( [v * 0.3048 if v != fillvalue else v for v in df[var]]) var_meta["units"] = "meters" if output_format == 'axiom': full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format( global_attributes["naming_authority"], site_id, var_meta["standard_name"]) output_directory = os.path.join(output, full_sensor_urn) output_filename = '{}_{}-{}.nc'.format( var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries.from_dataframe( df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var, vertical_axis_name='height', vertical_positive='down') ts.add_instrument_metadata(urn=full_sensor_urn) elif output_format == 'cf16': # Variable names shouldn't start with a number try: int(var[0]) variable_name = 'v_{}'.format(var) except: variable_name = var ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum)
def parse_type_2(output_format, site_id, contents, output, csv_link): """ # These data are provisional and subject to revision. # Data processed as of 12/05/2012 11:54:29. # Data collected as part of Hurricane Sandy (2012) Storm Tide project. # Data are archived at http://water.usgs.gov/floods/events/2012/isaac/index.php # Elevation determined from GPS surveys (NAVD 88). # Time datum is GMT (Greenwich Mean Time). # Water density estimated on basis of sensor location # where saltwater = 63.989 lb/ft3 (Saltwater = dissolved solids concentration greater than 20000 milligrams per liter) # where brackish water = 63.052 lb/ft3 (Brackish water = dissolved solids concentration between 1000 and 20000 milligrams per liter) # where freshwater = 62.428 lb/ft3 (Freshwater = dissolved solids concentration less than 1000 milligrams per liter) # The equation used to compute elevation from recorded pressure is # (((sp-bp)*144)/d)+e # Where sp = surge pressure in psi; bp = barometric pressure in psi; # d = water density in lb/ft3; and e = elevation of sensor in ft above NAVD 88. # Barometric data from nearest pressure sensor. Location for the barometric sensor is listed below. # Elevation is computer-rounded to two decimal places. # Sensor information # Site id = SSS-NY-WES-001WL # Site type = water level # Horizontal datum used is NAD 83 # Sensor location latitude 40.942755 # Sensor location longitude -73.719828 # Sensor elevation above NAVD 88 = -3.97 ft # Lowest recordable water elevation is -3.90 ft # Water density value used = 63.989 lb/ft3 # Barometric sensor site (source of bp) = SSS-NY-WES-002BP # Barometric sensor location latitude 40.90754368 # Barometric sensor location longitude -73.8692184 date_time_GMT elevation nearest_barometric_sensor_psi 10-28-2012 06:00:00 0.88 14.5145 10-28-2012 06:00:30 0.86 14.5145 10-28-2012 06:01:00 0.85 14.5170 10-28-2012 06:01:30 0.85 14.5145 10-28-2012 06:02:00 0.84 14.5170 10-28-2012 06:02:30 0.81 14.5145 10-28-2012 06:03:00 0.76 14.5145 ... """ variable_map = { 'elevation': { 'long_name': 'Water Level Elevation above Reference Datum (NAVD88)', 'geoid_name': 'NAVD88', 'vertical_datum': 'NAVD88', 'water_surface_reference_datum': 'NAVD88', 'standard_name': 'water_surface_height_above_reference_datum', 'units': 'feet' }, } def to_floats(x): try: return float(x) except ValueError: return fillvalue comments, headers, data = split_file(contents, "date_time_GMT") df = pd.DataFrame(data, columns=headers) fillvalue = -9999.9 lat = None lon = None z = 0 name = site_id sensor_vertical_datum = "NAVD88" for c in comments: if "Sensor location latitude" in c: lat = float( list(filter(None, map(lambda x: x.strip(), c.split(" "))))[-1]) elif "Sensor location longitude" in c: lon = float( list(filter(None, map(lambda x: x.strip(), c.split(" "))))[-1]) elif "Site id" in c: site_id = list(filter(None, map(lambda x: x.strip(), c.split(" "))))[-1] name = site_id elif "Sensor elevation" in c: sensor_vertical_datum = "".join(c.split("=")[0].split(" ")[4:6]) l = list(filter(None, map(lambda x: x.strip(), c.split(" ")))) z = float(l[-2]) if l[-1] in ["feet", "ft"]: z *= 0.3048 loc = "POINT({!s} {!s} {!s})".format(lon, lat, z) df['time'] = df["date_time_GMT"].map(lambda x: parse(x + " UTC")) df['depth'] = [z for x in range(len(df['time']))] # Add global attributes to appear in the resulting NetCDF file global_attributes = dict( title=name, summary= 'USGS Hurricane Sandy Rapid Response Stations. Data acquired from http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.', keywords= "usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id, keywords_vocaublary="None", naming_authority='gov.usgs', id=site_id, cdm_data_type="Station", history="NetCDF file generated from {!s}".format(csv_link), creator="USGS", creator_url="http://waterdata.usgs.gov", creator_institution="USGS", creator_urn="gov.usgs", publisher="Axiom Data Science", publisher_uri="http://axiomdatascience.com", processing_level="None", acknowledgement="None", geospatial_bounds=loc, geospatial_lat_min=lat, geospatial_lat_max=lat, geospatial_lon_min=lon, geospatial_lon_max=lon, license="Freely Distributed", date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat()) full_station_urn = "urn:ioos:station:{!s}:{!s}".format( global_attributes["naming_authority"], site_id) min_time = df["time"].min() max_time = df["time"].max() if output_format == 'cf16': times = [calendar.timegm(x.timetuple()) for x in df['time']] verticals = df['depth'].values output_filename = '{}_{}-{}.nc'.format( site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals) for var in df.columns: if var in ['date_time_GMT', 'time', 'depth']: continue try: int(var[0]) variable_name = 'v_{}'.format(var) except: variable_name = var try: var_meta = variable_map[var] except KeyError: logger.error( "Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats df[var] = df[var].map(to_floats) if var_meta["units"] in ["feet", "ft"]: df[var] = [v * 0.3048 if v != fillvalue else v for v in df[var]] var_meta["units"] = "meters" if output_format == 'axiom': full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format( global_attributes["naming_authority"], site_id, var_meta["standard_name"]) output_directory = os.path.join(output, full_sensor_urn) output_filename = '{}_{}-{}.nc'.format( var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries.from_dataframe( df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var) ts.add_instrument_metadata(urn=full_sensor_urn) elif output_format == 'cf16': ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum)
def parse_type_1(output_format, site_id, contents, output, csv_link): """ # ---------------------------------- WARNING ---------------------------------------- # The data you have obtained from this automated U.S. Geological Survey database # have not received Director's approval and as such are provisional and subject to # revision. The data are released on the condition that neither the USGS nor the # United States Government may be held liable for any damages resulting from its use. # Additional info: http://waterdata.usgs.gov/ga/nwis/help/?provisional # # File-format description: http://waterdata.usgs.gov/nwis/?tab_delimited_format_info # Automated-retrieval info: http://waterdata.usgs.gov/nwis/?automated_retrieval_info # # Contact: [email protected] # retrieved: 2012-11-20 12:05:22 EST (caww01) # # Data for the following 1 site(s) are contained in this file # USGS 395740074482628 South Branch Rancocas Cr at S Main St nr Lumberton # ----------------------------------------------------------------------------------- # # Data provided for site 395740074482628 # DD parameter Description # 03 00035 Wind speed, miles per hour # 07 00025 Barometric pressure, millimeters of mercury # 09 00045 Precipitation, total, inches # 19 63160 Stream water level elevation above NAVD 1988, in feet # # Data-value qualification codes included in this output: # P Provisional data subject to revision. # agency_cd site_no datetime tz_cd 03_00035 03_00035_cd 07_00025 07_00025_cd 09_00045 09_00045_cd 19_63160 19_63160_cd 5s 15s 20d 6s 14n 10s 14n 10s 14n 10s 14n 10s USGS 395740074482628 2012-10-28 13:00 EST 4.2 P 755 P 3.22 P USGS 395740074482628 2012-10-28 13:15 EST 6.4 P 754 P 0.00 P 3.36 P USGS 395740074482628 2012-10-28 13:30 EST 3.6 P 754 P 0.00 P 3.50 P USGS 395740074482628 2012-10-28 13:45 EST 3.2 P 754 P 0.00 P 3.63 P USGS 395740074482628 2012-10-28 14:00 EST 7.0 P 754 P 0.00 P 3.76 P USGS 395740074482628 2012-10-28 14:15 EST 4.0 P 754 P 0.00 P 3.87 P ... """ # lat/lon point: http://waterservices.usgs.gov/nwis/site/?sites=395740074482628 variable_map = { '01_00065' : {'long_name' : 'Gage height', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, '03_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'}, '04_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'}, '05_00035' : {'long_name' : 'Wind Speed', 'standard_name' : 'wind_speed', 'units': 'mph'}, '06_00035' : {'long_name' : 'Wind Gust', 'standard_name' : 'wind_speed_of_gust', 'units': 'mph'}, '04_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'}, '02_00036' : {'long_name' : 'Wind Direction', 'standard_name' : 'wind_from_direction', 'units': 'degrees'}, '05_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'}, '07_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'}, '09_00025' : {'long_name' : 'Air Pressure', 'standard_name' : 'air_pressure', 'units': 'mm of mercury'}, '03_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'}, '08_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'}, '09_00045' : {'long_name' : 'Total Precipitation', 'standard_name' : 'lwe_thickness_of_precipitation_amount', 'units': 'inches'}, '06_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'}, '07_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'}, '08_00052' : {'long_name' : 'Relative Humidity', 'standard_name' : 'relative_humidity', 'units': 'percent'}, '05_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'}, '06_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'}, '07_00020' : {'long_name' : 'Air Temperature', 'standard_name' : 'air_temperature', 'units': 'degrees_Celsius'}, '19_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, '01_63160' : {'long_name' : 'Water Surface Height Above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, } # Get metadata from a seperate endpoint. d = requests.get("http://waterservices.usgs.gov/nwis/site/?sites={!s}".format(site_id)) try: d.raise_for_status() except requests.exceptions.HTTPError: logger.error("Could not find lat/lon endpoint for station {!s}, skipping. Status code: {!s}".format(site_id, d.status_code)) return _, hz, dz = split_file(d.text, "agency_cd") # Strip off the one line after the headers dz = dz[1:] dfz = pd.DataFrame(dz, columns=hz) lat = float(dfz["dec_lat_va"][0]) lon = float(dfz["dec_long_va"][0]) sensor_vertical_datum = dfz["alt_datum_cd"][0] or "NAVD88" try: z = float(dfz["alt_va"][0]) except ValueError: z = 0. loc = "POINT({!s} {!s} {!s})".format(lon, lat, z) name = dfz["station_nm"][0] comments, headers, data = split_file(contents, "agency_cd") df = pd.DataFrame(data, columns=headers) fillvalue = -9999.9 # Combine date columns dates = df["datetime"] tz = df["tz_cd"] new_dates = list() for i in range(len(dates)): try: new_dates.append(parse(dates[i] + " " + tz[i]).astimezone(pytz.utc)) except BaseException: # Remove row. Bad date. df.drop(i, axis=0, inplace=True) continue df['time'] = new_dates df['depth'] = [ z for x in range(len(df['time'])) ] # Strip out "_cd" columns (quality checks for USGS) for h in headers: if "_cd" in h: df.drop(h, axis=1, inplace=True) # Add global attributes to appear in the resulting NetCDF file global_attributes = dict( title=name, summary='USGS Hurricane Sandy Rapid Response Stations. Data acquired from "http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.', keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id, keywords_vocaublary="None", naming_authority='gov.usgs', id=site_id, cdm_data_type="Station", history="NetCDF file generated from {!s}".format(csv_link), creator="USGS", creator_url="http://waterdata.usgs.gov", creator_institution="USGS", creator_urn="gov.usgs", publisher="Axiom Data Science", publisher_uri="http://axiomdatascience.com", processing_level="None", acknowledgement="None", geospatial_bounds=loc, geospatial_lat_min=lat, geospatial_lat_max=lat, geospatial_lon_min=lon, geospatial_lon_max=lon, license="Freely Distributed", date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat() ) def to_floats(x): try: return float(x) except ValueError: return fillvalue min_time = df['time'].min() max_time = df['time'].max() full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id) if output_format == 'cf16': output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) times = [ calendar.timegm(x.timetuple()) for x in df["time"] ] verticals = df['depth'].values ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals, vertical_axis_name='z') for var in df.columns: if var in ['datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd']: continue try: var_meta = variable_map[var] except KeyError: logger.error("Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats df[var] = df[var].map(to_floats) # Change feet to meters if var_meta["units"] in ["feet", "ft"]: df[var] = np.asarray([ v * 0.3048 if v != fillvalue else v for v in df[var] ]) var_meta["units"] = "meters" if output_format == 'axiom': full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"]) output_directory = os.path.join(output, full_sensor_urn) output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var, vertical_axis_name='height') ts.add_instrument_metadata(urn=full_sensor_urn) ts.close() elif output_format == 'cf16': # Variable names shouldn't start with a number try: int(var[0]) variable_name = 'v_{}'.format(var) except: variable_name = var ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum) if output_format == 'cf16': ts.close()
def parse_type_2(output_format, site_id, contents, output, csv_link): """ # These data are provisional and subject to revision. # Data processed as of 12/05/2012 11:54:29. # Data collected as part of Hurricane Sandy (2012) Storm Tide project. # Data are archived at http://water.usgs.gov/floods/events/2012/isaac/index.php # Elevation determined from GPS surveys (NAVD 88). # Time datum is GMT (Greenwich Mean Time). # Water density estimated on basis of sensor location # where saltwater = 63.989 lb/ft3 (Saltwater = dissolved solids concentration greater than 20000 milligrams per liter) # where brackish water = 63.052 lb/ft3 (Brackish water = dissolved solids concentration between 1000 and 20000 milligrams per liter) # where freshwater = 62.428 lb/ft3 (Freshwater = dissolved solids concentration less than 1000 milligrams per liter) # The equation used to compute elevation from recorded pressure is # (((sp-bp)*144)/d)+e # Where sp = surge pressure in psi; bp = barometric pressure in psi; # d = water density in lb/ft3; and e = elevation of sensor in ft above NAVD 88. # Barometric data from nearest pressure sensor. Location for the barometric sensor is listed below. # Elevation is computer-rounded to two decimal places. # Sensor information # Site id = SSS-NY-WES-001WL # Site type = water level # Horizontal datum used is NAD 83 # Sensor location latitude 40.942755 # Sensor location longitude -73.719828 # Sensor elevation above NAVD 88 = -3.97 ft # Lowest recordable water elevation is -3.90 ft # Water density value used = 63.989 lb/ft3 # Barometric sensor site (source of bp) = SSS-NY-WES-002BP # Barometric sensor location latitude 40.90754368 # Barometric sensor location longitude -73.8692184 date_time_GMT elevation nearest_barometric_sensor_psi 10-28-2012 06:00:00 0.88 14.5145 10-28-2012 06:00:30 0.86 14.5145 10-28-2012 06:01:00 0.85 14.5170 10-28-2012 06:01:30 0.85 14.5145 10-28-2012 06:02:00 0.84 14.5170 10-28-2012 06:02:30 0.81 14.5145 10-28-2012 06:03:00 0.76 14.5145 ... """ variable_map = { 'elevation' : {'long_name' : 'Water Level Elevation above Reference Datum (NAVD88)', 'geoid_name' : 'NAVD88', 'vertical_datum' : 'NAVD88', 'water_surface_reference_datum' : 'NAVD88', 'standard_name' : 'water_surface_height_above_reference_datum', 'units': 'feet'}, } def to_floats(x): try: return float(x) except ValueError: return fillvalue comments, headers, data = split_file(contents, "date_time_GMT") df = pd.DataFrame(data, columns=headers) fillvalue = -9999.9 lat = None lon = None z = 0 name = site_id sensor_vertical_datum = "NAVD88" for c in comments: if "Sensor location latitude" in c: lat = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1]) elif "Sensor location longitude" in c: lon = float(filter(None, map(lambda x: x.strip(), c.split(" ")))[-1]) elif "Site id" in c: site_id = filter(None, map(lambda x: x.strip(), c.split(" ")))[-1] name = site_id elif "Sensor elevation" in c: sensor_vertical_datum = "".join(c.split("=")[0].split(" ")[4:6]) l = filter(None, map(lambda x: x.strip(), c.split(" "))) z = float(l[-2]) if l[-1] in ["feet", "ft"]: z *= 0.3048 loc = "POINT({!s} {!s} {!s})".format(lon, lat, z) df['time'] = df["date_time_GMT"].map(lambda x: parse(x + " UTC")) df['depth'] = [ z for x in range(len(df['time'])) ] # Add global attributes to appear in the resulting NetCDF file global_attributes = dict( title=name, summary='USGS Hurricane Sandy Rapid Response Stations. Data acquired from http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/.', keywords="usgs, waterdata, elevation, water, waterlevel, sandy, hurricane, rapid, response, %s" % site_id, keywords_vocaublary="None", naming_authority='gov.usgs', id=site_id, cdm_data_type="Station", history="NetCDF file generated from {!s}".format(csv_link), creator="USGS", creator_url="http://waterdata.usgs.gov", creator_institution="USGS", creator_urn="gov.usgs", publisher="Axiom Data Science", publisher_uri="http://axiomdatascience.com", processing_level="None", acknowledgement="None", geospatial_bounds=loc, geospatial_lat_min=lat, geospatial_lat_max=lat, geospatial_lon_min=lon, geospatial_lon_max=lon, license="Freely Distributed", date_created=datetime.utcnow().replace(second=0, microsecond=0).isoformat() ) full_station_urn = "urn:ioos:station:{!s}:{!s}".format(global_attributes["naming_authority"], site_id) min_time = df["time"].min() max_time = df["time"].max() if output_format == 'cf16': times = [ calendar.timegm(x.timetuple()) for x in df['time'] ] verticals = df['depth'].values output_filename = '{}_{}-{}.nc'.format(site_id, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries(output, latitude=lat, longitude=lon, station_name=full_station_urn, global_attributes=global_attributes, output_filename=output_filename, times=times, verticals=verticals) for var in df.columns: if var in ['date_time_GMT', 'time', 'depth']: continue try: int(var[0]) variable_name = 'v_{}'.format(var) except: variable_name = var try: var_meta = variable_map[var] except KeyError: logger.error("Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats df[var] = df[var].map(to_floats) if var_meta["units"] in ["feet", "ft"]: df[var] = [ v * 0.3048 if v != fillvalue else v for v in df[var] ] var_meta["units"] = "meters" if output_format == 'axiom': full_sensor_urn = "urn:ioos:sensor:{!s}:{!s}:{!s}".format(global_attributes["naming_authority"], site_id, var_meta["standard_name"]) output_directory = os.path.join(output, full_sensor_urn) output_filename = '{}_{}-{}.nc'.format(var, min_time.strftime('%Y%m%dT%H%M%S'), max_time.strftime('%Y%m%dT%H%M%S')) ts = TimeSeries.from_dataframe(df, output_directory, output_filename, lat, lon, full_station_urn, global_attributes, var_meta["standard_name"], var_meta, sensor_vertical_datum=sensor_vertical_datum, fillvalue=fillvalue, data_column=var) ts.add_instrument_metadata(urn=full_sensor_urn) ts.close() elif output_format == 'cf16': ts.add_variable(variable_name, values=df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum=sensor_vertical_datum) if output_format == 'cf16': ts.close()
def main(output, station, datatype): if datatype == 'met': headers = met_header mapping = met_mapping elif datatype == 'waves': headers = waves_header mapping = waves_mapping elif datatype == 'currents': headers = currents_header mapping = currents_mapping df = None def dp(*args): datestr = "".join([str(x) for x in args]) try: return datetime.strptime(datestr, '%Y %m %d %H %M %S') except ValueError: return np.nan datapath = os.path.abspath( os.path.join(os.path.dirname(__file__), 'data', datatype)) for csv_file in sorted(os.listdir(datapath)): f = os.path.join(datapath, csv_file) cf = pd.read_csv( f, header=None, names=headers, parse_dates={ 'time': ['year', 'month', 'day', 'hour', 'minute', 'second'] }, date_parser=dp) cf.dropna(subset=['time'], inplace=True) if df is None: df = cf else: df = df.append(cf) fillvalue = -9999.9 # Station metadata stat_meta = stations[station] station_urn = IoosUrn(asset_type='station', authority=global_attributes['naming_authority'], label=stat_meta['title']) for var in df.columns: try: var_meta = mapping[var] except KeyError: logger.error( "Variable {!s} was not found in variable map!".format(var)) continue sensor_urn = urnify(station_urn.authority, station_urn.label, var_meta) gas = copy(global_attributes) gas['keywords'] = var_meta['keywords'] gas['title'] = stat_meta['title'] gas['description'] = stat_meta['description'] skip_variable_attributes = [ 'keywords', 'height_above_site', 'depth_below_surface', 'add_offset' ] vas = { k: v for k, v in var_meta.items() if k not in skip_variable_attributes } if var_meta.get('height_above_site') and stat_meta.get('site_height'): # Convert to positive down df['depth'] = -1 * (stat_meta['site_height'] + var_meta['height_above_site']) else: df['depth'] = var_meta.get('depth_below_surface', np.nan) if 'add_offset' in var_meta: df[var] = df[var] + var_meta['add_offset'] output_filename = '{}_{}_{}.nc'.format(station_urn.label, datatype, var_meta['standard_name']) ts = TimeSeries.from_dataframe( df, output, output_filename, stat_meta['latitude'], stat_meta['longitude'], station_urn.urn, gas, var_meta["standard_name"], vas, sensor_vertical_datum=var_meta.get('vertical_datum'), fillvalue=fillvalue, data_column=var, vertical_axis_name='height', vertical_positive='down') ts.add_instrument_metadata(urn=sensor_urn)
def main(output_format, output, do_download, download_folder, filesubset=None): if do_download is True: try: os.makedirs(download_folder) except OSError: pass waf = 'http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/' r = requests.get(waf) soup = BeautifulSoup(r.text, "lxml") for link in soup.find_all('a'): # Skip non .txt files site_id, ext = os.path.splitext(link['href']) if ext != ".txt": continue if filesubset and site_id.lower() not in filesubset: # Skip this file! continue csv_link = waf + link['href'] logger.info("Downloading '{}'".format(csv_link)) d = requests.get(csv_link) try: d.raise_for_status() except requests.exceptions.HTTPError: logger.error("Could not download: {!s}, skipping. Status code: {!s}".format(csv_link, d.status_code)) continue with open(os.path.join(download_folder, os.path.basename(csv_link)), 'wt') as f: f.write(d.text) # Yes, this uses lots of RAM, but we need to match up lon/lat positions later on. results = [] for datafile in os.listdir(download_folder): site_id = os.path.splitext(os.path.basename(datafile))[0] if filesubset and site_id.lower() not in filesubset: # Skip this file! continue with open(os.path.join(download_folder, datafile)) as d: contents = d.read() r = None for line in contents.split("\n"): if "agency_cd" in line: r = parse_type_1(output_format, site_id, contents, output) break elif "date_time_GMT" in line: r = parse_type_2(output_format, site_id, contents, output) break else: continue if r is None: logger.error('Could not process file: {}'.format(datafile)) else: logger.info("Processed {}".format(datafile)) results.append(r) results = sorted(results, key=attrgetter('lon', 'lat')) gresults = groupby(results, attrgetter('lon', 'lat')) for (glon, glat), group in gresults: groups = [ x for x in list(group) if x ] # Strip off the variable type if need be gsite = groups[0].site if gsite[-2:] in ['WV', 'BP', 'WL']: gsite = gsite[:-2] for result in groups: gas = get_globals(glat, glon, result.z, result.name, gsite) station_urn = IoosUrn(asset_type='station', authority=gas['naming_authority'], label=gsite) if output_format == 'cf16': # If CF, a file for each result dataframe times = [ calendar.timegm(x.timetuple()) for x in result.df['time'] ] verticals = result.df['depth'].values output_filename = '{}.nc'.format(result.site) ts = TimeSeries(output, latitude=glat, longitude=glon, station_name=gsite, global_attributes=gas, output_filename=output_filename, times=times, verticals=verticals) for var in result.df.columns: if var in ['date_time_GMT', 'datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd']: continue try: var_meta = copy(variable_map[var]) except KeyError: logger.error("Variable {!s} was not found in variable map!".format(var)) continue # Convert to floats result.df[var] = result.df[var].map(to_floats) if var_meta["units"].lower() in ["feet", "ft"]: result.df[var] = result.df[var].apply(lambda x: None if pd.isnull(x) else x * 0.3048) var_meta["units"] = "meters" elif var_meta["units"].lower() in ["psi"]: result.df[var] = result.df[var].apply(lambda x: None if pd.isnull(x) else x * 68.9476) var_meta["units"] = "mbar" elif var_meta["units"].lower() in ['millimeters of mercury']: result.df[var] = result.df[var].apply(lambda x: None if pd.isnull(x) else x * 1.33322) var_meta["units"] = "mbar" # Now put the fillvalue we want to be interpreted result.df.fillna(fillvalue, inplace=True) if output_format == 'axiom': # If Axiom, a file for each variable output_directory = os.path.join(output, gsite) output_filename = '{}_{}.nc'.format(result.site, var_meta['standard_name']) ts = TimeSeries.from_dataframe(result.df, output_directory, output_filename, glat, glon, station_urn.urn, gas, var_meta["standard_name"], var_meta, sensor_vertical_datum='NAVD88', fillvalue=fillvalue, data_column=var, vertical_axis_name='height') sensor_urn = urnify(station_urn.authority, station_urn.label, var_meta) ts.add_instrument_metadata(urn=sensor_urn) elif output_format == 'cf16': # If CF, add variable to existing TimeSeries try: int(var[0]) variable_name = 'v_{}'.format(var) except BaseException: variable_name = var ts.add_variable(variable_name, values=result.df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum='NAVD88')
def main(output_format, output, do_download, download_folder, filesubset=None): if do_download is True: try: os.makedirs(download_folder) except OSError: pass waf = 'http://ga.water.usgs.gov/flood/hurricane/sandy/datafiles/' r = requests.get(waf) soup = BeautifulSoup(r.text, "lxml") for link in soup.find_all('a'): # Skip non .txt files site_id, ext = os.path.splitext(link['href']) if ext != ".txt": continue if filesubset and site_id.lower() not in filesubset: # Skip this file! continue csv_link = waf + link['href'] logger.info("Downloading '{}'".format(csv_link)) d = requests.get(csv_link) try: d.raise_for_status() except requests.exceptions.HTTPError: logger.error( "Could not download: {!s}, skipping. Status code: {!s}". format(csv_link, d.status_code)) continue with open( os.path.join(download_folder, os.path.basename(csv_link)), 'wt') as f: f.write(d.text) # Yes, this uses lots of RAM, but we need to match up lon/lat positions later on. results = [] for datafile in os.listdir(download_folder): site_id = os.path.splitext(os.path.basename(datafile))[0] if filesubset and site_id.lower() not in filesubset: # Skip this file! continue with open(os.path.join(download_folder, datafile)) as d: contents = d.read() r = None for line in contents.split("\n"): if "agency_cd" in line: r = parse_type_1(output_format, site_id, contents, output) break elif "date_time_GMT" in line: r = parse_type_2(output_format, site_id, contents, output) break else: continue if r is None: logger.error('Could not process file: {}'.format(datafile)) else: logger.info("Processed {}".format(datafile)) results.append(r) results = sorted(results, key=attrgetter('lon', 'lat')) gresults = groupby(results, attrgetter('lon', 'lat')) for (glon, glat), group in gresults: groups = [x for x in list(group) if x] # Strip off the variable type if need be gsite = groups[0].site if gsite[-2:] in ['WV', 'BP', 'WL']: gsite = gsite[:-2] for result in groups: gas = get_globals(glat, glon, result.z, result.name, gsite) station_urn = IoosUrn(asset_type='station', authority=gas['naming_authority'], label=gsite) if output_format == 'cf16': # If CF, a file for each result dataframe times = [ calendar.timegm(x.timetuple()) for x in result.df['time'] ] verticals = result.df['depth'].values output_filename = '{}.nc'.format(result.site) ts = TimeSeries(output, latitude=glat, longitude=glon, station_name=gsite, global_attributes=gas, output_filename=output_filename, times=times, verticals=verticals) for var in result.df.columns: if var in [ 'date_time_GMT', 'datetime', 'time', 'depth', 'tz_cd', 'site_no', 'agency_cd' ]: continue try: var_meta = copy(variable_map[var]) except KeyError: logger.error( "Variable {!s} was not found in variable map!".format( var)) continue # Convert to floats result.df[var] = result.df[var].map(to_floats) if var_meta["units"].lower() in ["feet", "ft"]: result.df[var] = result.df[var].apply( lambda x: None if pd.isnull(x) else x * 0.3048) var_meta["units"] = "meters" elif var_meta["units"].lower() in ["psi"]: result.df[var] = result.df[var].apply( lambda x: None if pd.isnull(x) else x * 68.9476) var_meta["units"] = "mbar" elif var_meta["units"].lower() in ['millimeters of mercury']: result.df[var] = result.df[var].apply( lambda x: None if pd.isnull(x) else x * 1.33322) var_meta["units"] = "mbar" # Now put the fillvalue we want to be interpreted result.df.fillna(fillvalue, inplace=True) if output_format == 'axiom': # If Axiom, a file for each variable output_directory = os.path.join(output, gsite) output_filename = '{}_{}.nc'.format( result.site, var_meta['standard_name']) ts = TimeSeries.from_dataframe( result.df, output_directory, output_filename, glat, glon, station_urn.urn, gas, var_meta["standard_name"], var_meta, sensor_vertical_datum='NAVD88', fillvalue=fillvalue, data_column=var, vertical_axis_name='height') sensor_urn = urnify(station_urn.authority, station_urn.label, var_meta) ts.add_instrument_metadata(urn=sensor_urn) elif output_format == 'cf16': # If CF, add variable to existing TimeSeries try: int(var[0]) variable_name = 'v_{}'.format(var) except BaseException: variable_name = var ts.add_variable(variable_name, values=result.df[var].values, attributes=var_meta, fillvalue=fillvalue, sensor_vertical_datum='NAVD88')