def simple_grid_dataset(): """ @brief Create a simple dap grid dataset Just use the pydap interface - passing dicts does not make sense here. """ # Convert metadata and data to a dap dataset ds = DatasetType(name='SimpleGridData') g = GridType(name='grid') data = numpy.arange(24.) data.shape = (4, 2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('time', 'x', 'y')) g['time'] = BaseType(name='time', data=numpy.arange(4.), shape=(4, ), type=Float64) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2, ), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3, ), type=Float64) ds[g.name] = g return ds
def test_id(self): """Test that the dataset id is not propagated.""" dataset = DatasetType("dataset") child = BaseType("child") child.id = "error" dataset["child"] = child self.assertEqual(child.id, "child")
def mean(dataset, var, axis=0): """Calculate the mean of an array along a given axis. The input variable should be either a ``GridType`` or ``BaseType``. The function will return an object of the same type with the mean applied. """ if not isinstance(var, (GridType, BaseType)): raise ConstraintExpressionError( 'Function "mean" should be used on an array or grid.') axis = int(axis) dims = tuple(dim for i, dim in enumerate(var.dimensions) if i != axis) # process basetype if isinstance(var, BaseType): return BaseType(name=var.name, data=np.mean(var.data[:], axis=axis), dimensions=dims, attributes=var.attributes) # process grid out = GridType(name=var.name, attributes=var.attributes) out[var.array.name] = BaseType(name=var.array.name, data=np.mean(var.array.data[:], axis=axis), dimensions=dims, attributes=var.array.attributes) for dim in dims: out[dim] = BaseType(name=dim, data=var[dim].data[:], dimensions=(dim, ), attributes=var[dim].attributes) return out
def test_DatasetType_id(): """Test that the dataset id is not propagated.""" dataset = DatasetType("dataset") child = BaseType("child") child.id = "error" dataset["child"] = child assert (child.id == "child")
def gridtype_example(): """Create a simple grid.""" example = GridType("example") example["a"] = BaseType("a", data=np.arange(30*50).reshape(30, 50)) example["x"] = BaseType("x", data=np.arange(30)) example["y"] = BaseType("y", data=np.arange(50)) return example
def stream(self): sz = 10 time = numpy.arange(float(self.index), float(self.index + sz)) self.index += sz data = numpy.arange(float(sz)) for ind in range(sz): data[ind] = numpy.random.random() ds = DatasetType(name='SimpleGridData') g = GridType(name='Time Series') # The name in the dictionary must match the name in the basetype g['timeseries'] = BaseType(name='timeseries', data=data, shape=data.shape, type=Float32, dimensions=('time')) g['time'] = BaseType(name='time', data=time, shape=(sz, ), type=Float32) ds[g.name] = g msg = dap_tools.ds2dap_msg(ds) yield self.send(self.deliver, 'data', msg.encode())
def setUp(self): sz = 12 time = numpy.arange(float(0), float(sz)) data = numpy.arange(float(sz)) for ind in range(sz): data[ind] = numpy.random.random() ds = DatasetType(name='SimpleGridData') g = GridType(name='TimeSeries') # The name in the dictionary must match the name in the basetype g['timeseries'] = BaseType(name='timeseries', data=data, shape=data.shape, type=Float32, dimensions=('time')) g['time'] = BaseType(name='time', data=time, shape=(sz, ), type=Float32) ds[g.name] = g self.ds1 = ds self.tc = timeseries_consumer.TimeseriesConsumer() yield self.tc.plc_init()
def setUp(self): """Create a simple grid.""" example = GridType("example") example["a"] = BaseType("a", data=np.arange(30 * 50).reshape(30, 50)) example["x"] = BaseType("x", data=np.arange(30)) example["y"] = BaseType("y", data=np.arange(50)) self.example = example
def test_set_data(self): """Test that data is propagated to children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") var["two"] = BaseType("two") var.data = [10, 20] self.assertEqual(var["one"].data, 10) self.assertEqual(var["two"].data, 20)
def test_StructureType_set_data(): """Test that data is propagated to children.""" var = StructureType("var", value=42, one="1") var["one"] = BaseType("one") var["two"] = BaseType("two") var.data = [10, 20] assert (var["one"].data == 10) assert (var["two"].data == 20)
def test_StructureType_repr(): """Test ``__repr__`` method.""" var = StructureType("var") assert (repr(var) == "<StructureType with children >") var["one"] = BaseType("one") var["two"] = BaseType("two") assert (repr(var) == "<StructureType with children 'one', 'two'>")
def test_repr(self): """Test ``__repr__`` method.""" var = StructureType("var") self.assertEqual(repr(var), "<StructureType with children >") var["one"] = BaseType("one") var["two"] = BaseType("two") self.assertEqual(repr(var), "<StructureType with children 'one', 'two'>")
def test_conflict(self): """Test a dataset with conflicting short names.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") dataset["d"] = StructureType("d") dataset["d"]["c"] = BaseType("c") projection = [[("c", ())]] with self.assertRaises(ConstraintExpressionError): fix_shorthand(projection, dataset)
def sequence_example(): """Create a standard sequence from the DAP spec.""" example = SequenceType("example") example["index"] = BaseType("index") example["temperature"] = BaseType("temperature") example["site"] = BaseType("site") example.data = np.rec.fromrecords([ (10, 15.2, "Diamond_St"), (11, 13.1, 'Blacktail_Loop'), (12, 13.3, 'Platinum_St'), (13, 12.1, 'Kodiak_Trail')], names=list(example.keys())) return example
def nested_object(nested_data): name = 'nameless' dataset = DatasetType(name) seq = dataset['nested'] = SequenceType('nested') for var in ['a', 'b', 'c']: seq[var] = BaseType(var) seq['d'] = SequenceType('d') for var in ['e', 'f', 'g']: seq['d'][var] = BaseType(var) nested = IterData(nested_data, seq) return nested
def sequence_type_data(): """ Simple sequence test data """ data = [(10, 15.2, 'Diamond_St'), (11, 13.1, 'Blacktail_Loop'), (12, 13.3, 'Platinum_St'), (13, 12.1, 'Kodiak_Trail')] dtype = [('index', '<i4'), ('temperature', '<f8'), ('station', 'S40')] seq = SequenceType('sequence') seq['index'] = BaseType('index') seq['temperature'] = BaseType('temperature') seq['station'] = BaseType('station') seq.data = np.array(data, dtype=dtype) return seq
def setUp(self): """Create a flat IterData.""" template = SequenceType("a") template["b"] = BaseType("b") template["c"] = BaseType("c") template["d"] = BaseType("d") self.data = IterData([(1, 2, 3), (4, 5, 6)], template) self.array = np.array( np.rec.fromrecords([ (1, 2, 3), (4, 5, 6), ], names=["b", "c", "d"]))
def demo_dataset(): """ @Brief Example methods for creating a dataset http://pydap.org/developer.html#the-dap-data-model """ #Create a dataset object ds = DatasetType(name='Mine') #Add Some attributes ds.attributes['history'] = 'David made a dataset' ds.attributes['conventions'] = 'OOIs special format' # Create some data and put it in a variable varname = 'var1' data = (1, 2, 3, 4, 5, 8) shape = (8, ) type = Int32 # dims = ('time', ) attributes = {'long_name': 'long variable name one'} ds[varname] = BaseType(name=varname, data=data, shape=shape, dimensions=dims, type=type, attributes=attributes) # Now make a grid data object g = GridType(name='g') data = numpy.arange(6.) data.shape = (2, 3) # The name in the dictionary must match the name in the basetype g['a'] = BaseType(name='a', data=data, shape=data.shape, type=Float32, dimensions=('x', 'y')) g['x'] = BaseType(name='x', data=numpy.arange(2.), shape=(2, ), type=Float64) g['y'] = BaseType(name='y', data=numpy.arange(3.), shape=(3, ), type=Float64) ds[g.name] = g return ds
def test_StructureType_getitem(): """Test item retrieval.""" var = StructureType("var") child = BaseType("child") child.data = np.array([[[0, 1]]]) var["child"] = child assert var["child"] is child assert var["child.child"] is child with pytest.raises(KeyError): var["unloved child"] with pytest.raises(KeyError): var[:] assert var["parent.child"] is child assert var["grandparent.parent.child"] is child
def setUp(self): # create dataset self.dataset = DatasetType("test") self.dataset["foo["] = BaseType("foo[", np.array(1)) # create WSGI app self.app = BaseHandler(self.dataset)
def make_series(self, response, name, data, attrs, ttype): base_type = BaseType(name=name, data=data, type=ttype, attributes=attrs) #grid[dims[0]] = BaseType(name=dims[0], data=time_data, type=time_data.dtype.char, attributes=time_attrs, dimensions=dims, shape=time_data.shape) return base_type
def handle_dds(self, coverage, dataset, fields): cov = coverage seq = SequenceType('data') for name in fields: # Strip the data. from the field if name.startswith('data.'): name = name[5:] if re.match(r'.*_[a-z0-9]{32}', name): continue # Let's not do this try: context = coverage.get_parameter_context(name) attrs = self.get_attrs(cov, name) #grid[name] = BaseType(name=name, type=self.dap_type(context), attributes=attrs, dimensions=(time_name,), shape=(coverage.num_timesteps,)) seq[name] = BaseType(name=name, type=self.dap_type(context), attributes=attrs, shape=(coverage.num_timesteps(), )) #grid[cov.temporal_parameter_name] = time_base except Exception: log.exception('Problem reading cov %s', str(cov)) continue dataset['data'] = seq return dataset
def test_get_var(self): """Test that the id is returned properly.""" dataset = DatasetType("a") dataset["b"] = StructureType("b") dataset["b"]["c"] = BaseType("c") self.assertEqual(get_var(dataset, 'b.c'), dataset['b']['c'])
def density(dataset, salinity, temperature, pressure): """Calculate in-situ density. This function calculated in-situ density from absolute salinity and conservative temperature, using the `gsw.rho` function. Returns a new sequence with the data. """ # find sequence for sequence in walk(dataset, SequenceType): break else: raise ConstraintExpressionError( 'Function "bounds" should be used on a Sequence.') selection = sequence[salinity.name, temperature.name, pressure.name] rows = [tuple(row) for row in selection] data = np.rec.fromrecords(rows, names=['salinity', 'temperature', 'pressure']) rho = gsw.rho(data['salinity'], data['temperature'], data['pressure']) out = SequenceType("result") out['rho'] = BaseType("rho", units="kg/m**3") out.data = np.rec.fromrecords(rho.reshape(-1, 1), names=['rho']) return out
def make_grid(self, response, name, data, time_data, attrs, time_attrs, dims, ttype): grid = GridType(name=name) grid[name] = BaseType(name=name, data=data, type=ttype, attributes=attrs, dimensions=dims, shape=data.shape) grid[dims[0]] = BaseType(name=dims[0], data=time_data, type=time_data.dtype.char, attributes=time_attrs, dimensions=dims, shape=time_data.shape) return grid
def setUp(self): # create dataset dataset = DatasetType("test") dataset["a.b"] = BaseType("a.b", np.array(1)) # create WSGI app self.app = BaseHandler(dataset)
def __init__(self, filepath): BaseHandler.__init__(self) self.filepath = filepath try: with netcdf_file(self.filepath, 'r') as source: self.additional_headers.append(('Last-modified', (formatdate( time.mktime(time.localtime( os.stat(filepath)[ST_MTIME])))))) # shortcuts vars = source.variables dims = source.dimensions # build dataset name = os.path.split(filepath)[1] self.dataset = DatasetType( name, attributes=dict(NC_GLOBAL=attrs(source))) for dim in dims: if dims[dim] is None: self.dataset.attributes['DODS_EXTRA'] = { 'Unlimited_Dimension': dim, } break # add grids grids = [var for var in vars if var not in dims] for grid in grids: self.dataset[grid] = GridType(grid, attrs(vars[grid])) # add array self.dataset[grid][grid] = BaseType( grid, LazyVariable(source, grid, grid, self.filepath), vars[grid].dimensions, attrs(vars[grid])) # add maps for dim in vars[grid].dimensions: self.dataset[grid][dim] = BaseType( dim, vars[dim][:], None, attrs(vars[dim])) # add dims for dim in dims: self.dataset[dim] = BaseType(dim, vars[dim][:], None, attrs(vars[dim])) except Exception as exc: raise message = 'Unable to open file %s: %s' % (filepath, exc) raise OpenFileError(message)
def test_regexp(self): sequence = SequenceType("sequence") sequence["name"] = BaseType("name") sequence.data = IterData([ ("John", "Paul", "George", "Ringo"), ], sequence) filtered = sequence[ConstraintExpression('sequence.name=~"J.*"')] self.assertEqual(list(filtered.iterdata()), [("John", )])
def test_comparisons(self): """Test that comparisons are applied to data.""" var = BaseType("var", np.array(1)) self.assertTrue(var == 1) self.assertTrue(var != 2) self.assertTrue(var >= 0) self.assertTrue(var <= 2) self.assertTrue(var > 0) self.assertTrue(var < 2)
def test_delitem(self): """Test item deletion.""" var = StructureType("var") var["one"] = BaseType("one") self.assertEqual(var.keys(), ['one']) del var["one"] self.assertEqual(var.keys(), [])
def buildParmDict(self): ''' Build parameter dictionary akin to that returned by pydap. The parameters from the .btl file must match the parameters read from the .nc file. See comments for mapping copied from pctdToNetCDF.py. ''' # Match the mapping done in pctdToNetCDF.py: # self.pr_list.append(float(r['PrDM'])) # self.depth = self.ncFile.createVariable('depth', 'float64', ('time',)) # self.depth.long_name = 'DEPTH' # self.depth.standard_name = 'depth' # self.depth.units = 'm' # self.depth[:] = csiro.depth(self.pr_list, self.lat_list) # Convert pressure to depth # self.t1_list.append(r['T190C']) # temp = self.ncFile.createVariable('TEMP', 'float64', ('time',)) # temp.long_name = 'Temperature, 2 [ITS-90, deg C]' # temp.standard_name = 'sea_water_temperature' # temp.units = 'Celsius' # self.sal_list.append(r['Sal00']) # sal = self.ncFile.createVariable('PSAL', 'float64', ('time',)) # sal.long_name = 'Salinity, Practical [PSU]' # sal.standard_name = 'sea_water_salinity' # self.xmiss_list.append(r['Xmiss']) # xmiss = self.ncFile.createVariable('xmiss', 'float64', ('time',)) # xmiss.long_name = 'Beam Transmission, Chelsea/Seatech' # xmiss.units = '%' # self.ecofl_list.append(r['FlECO-AFL']) # ecofl = self.ncFile.createVariable('ecofl', 'float64', ('time',)) # ecofl.long_name = 'Fluorescence, WET Labs ECO-AFL/FL' # ecofl.units = 'mg/m^3' # self.oxygen_list.append(r['Sbeox0ML/L']) # oxygen = self.ncFile.createVariable('oxygen', 'float64', ('time',)) # oxygen.long_name = 'Oxygen, SBE 43' # oxygen.units = 'ml/l' parmDict = {} pr = BaseType('nameless') pr.attributes = {'colname': 'PrDM', 'units': 'm' , 'long_name': 'DEPTH', 'standard_name': 'depth'} temp = BaseType('nameless') temp.attributes = {'colname': 'T190C', 'units': 'ITS-90, deg C', 'long_name': 'temperature', 'standard_name': 'sea_water_temperature'} sal = BaseType('nameless') sal.attributes = {'colname': 'Sal00', 'units': '1' , 'long_name': 'salinity', 'standard_name': 'sea_water_salinity'} xmiss = BaseType('nameless') xmiss.attributes = {'colname': 'Xmiss', 'units': '%', 'long_name': 'Beam Transmission, Chelsea/Seatech'} ecofl = BaseType('nameless') ecofl.attributes = {'colname': 'FlECO-AFL', 'units': 'mg/m^3', 'long_name': 'Fluorescence, WET Labs ECO-AFL/FL'} wetstar = BaseType('nameless') wetstar.attributes = {'colname': 'WetStar', 'units': 'mg/m^3', 'long_name': 'Fluorescence, WET Labs WETstar'} oxygen = BaseType('nameless') oxygen.attributes = {'colname': 'Sbeox0ML/L', 'units': 'ml/l', 'long_name': 'Oxygen, SBE 43'} # The colname attribute must be the keys that DictReader returns - the keys of this dictionary will be the Parameter names in stoqs parmDict = {'pressure': pr, 'TEMP': temp, 'PSAL': sal, 'xmiss': xmiss, 'ecofl': ecofl, 'oxygen': oxygen, 'wetstar': wetstar} return parmDict
def buildParmDict(self): """ Build parameter dictionary akin to that returned by pydap. The parameters from the .btl file must match the parameters read from the .nc file. See comments for mapping copied from pctdToNetCDF.py. """ # Match the mapping done in pctdToNetCDF.py: # self.pr_list.append(float(r['PrDM'])) # self.depth = self.ncFile.createVariable('depth', 'float64', ('time',)) # self.depth.long_name = 'DEPTH' # self.depth.standard_name = 'depth' # self.depth.units = 'm' # self.depth[:] = csiro.depth(self.pr_list, self.lat_list) # Convert pressure to depth # self.t1_list.append(r['T190C']) # temp = self.ncFile.createVariable('TEMP', 'float64', ('time',)) # temp.long_name = 'Temperature, 2 [ITS-90, deg C]' # temp.standard_name = 'sea_water_temperature' # temp.units = 'Celsius' # self.sal_list.append(r['Sal00']) # sal = self.ncFile.createVariable('PSAL', 'float64', ('time',)) # sal.long_name = 'Salinity, Practical [PSU]' # sal.standard_name = 'sea_water_salinity' # self.xmiss_list.append(r['Xmiss']) # xmiss = self.ncFile.createVariable('xmiss', 'float64', ('time',)) # xmiss.long_name = 'Beam Transmission, Chelsea/Seatech' # xmiss.units = '%' # self.ecofl_list.append(r['FlECO-AFL']) # ecofl = self.ncFile.createVariable('ecofl', 'float64', ('time',)) # ecofl.long_name = 'Fluorescence, WET Labs ECO-AFL/FL' # ecofl.units = 'mg/m^3' # self.oxygen_list.append(r['Sbeox0ML/L']) # oxygen = self.ncFile.createVariable('oxygen', 'float64', ('time',)) # oxygen.long_name = 'Oxygen, SBE 43' # oxygen.units = 'ml/l' parmDict = {} pr = BaseType() pr.attributes = {"colname": "PrDM", "units": m, "long_name": "DEPTH", "standard_name": "depth"} temp = BaseType() temp.attributes = { "colname": "T190C", "units": "ITS-90, deg C", "long_name": "temperature", "standard_name": "sea_water_temperature", } sal = BaseType() sal.attributes = { "colname": "Sal00", "units": "1", "long_name": "salinity", "standard_name": "sea_water_salinity", } xmiss = BaseType() xmiss.attributes = {"colname": "Xmiss", "units": "%", "long_name": "Beam Transmission, Chelsea/Seatech"} ecofl = BaseType() ecofl.attributes = {"colname": "FlECO-AFL", "units": "mg/m^3", "long_name": "Fluorescence, WET Labs ECO-AFL/FL"} wetstar = BaseType() wetstar.attributes = {"colname": "WetStar", "units": "mg/m^3", "long_name": "Fluorescence, WET Labs WETstar"} oxygen = BaseType() oxygen.attributes = {"colname": "Sbeox0ML/L", "units": "ml/l", "long_name": "Oxygen, SBE 43"} # The colname attribute must be the keys that DictReader returns - the keys of this dictionary will be the Parameter names in stoqs parmDict = { "pressure": pr, "TEMP": temp, "PSAL": sal, "xmiss": xmiss, "ecofl": ecofl, "oxygen": oxygen, "wetstar": wetstar, } return parmDict
def process_csv_file(self, fh): ''' Iterate through lines of iterator to csv file and pull out data for loading into STOQS ''' ds = {} DA = BaseType('nameless') DA.attributes = {'units': 'ng ml-1 ' , 'long_name': 'Domoic Acid', 'standard_name': 'domoic_acid', 'type': 'float', 'description': 'Domoic acid' , 'origin': 'www.sccoos.org' } PD = BaseType('nameless') PD.attributes = {'units': 'cells l-1', 'long_name': 'Pseudo-nitzschia delicatissima group', 'standard_name': 'pseudo_nitzschia_delicatissima', 'name': 'pseudo_nitzschia_delicatissima' , 'type': 'float' , 'description': 'Pseudo-nitzschia delicatissima group (cells/L)' , 'origin': 'www.sccoos.org' } PA = BaseType('nameless') PA.attributes = {'units': 'cells l-1', 'long_name': 'Pseudo-nitzschia seriata group', 'standard_name': 'pseudo_nitzschia_seriata', 'name': 'pseudo_nitzschia_seriata' , 'type': 'float' , 'description': 'Pseudo-nitzschia seriata group (cells/L)' , 'origin': 'www.sccoos.org' } alexandrium = BaseType('nameless') alexandrium.attributes = {'units': 'cells l-1', 'long_name': 'Alexandrium', 'standard_name': 'alexandrium', 'name': 'alexandrium' , 'type': 'float' , 'description': 'Alexandrium spp. (cells/L)' , 'origin': 'www.sccoos.org' } phosphate = BaseType('nameless') phosphate.attributes = {'units': 'm-3 mol l-1', 'long_name': 'Phosphate', 'standard_name': 'phosphate_dissolved_in_seawater', 'name': 'Phosphate' , 'type': 'float' , 'description': 'Phosphate (uM)' , 'origin': 'www.sccoos.org' } ammonia = BaseType('nameless') ammonia.attributes = {'units': 'm-3 mol l-1', 'long_name': 'Ammonia', 'standard_name': 'ammonia_dissolved_in_seawater', 'name': 'ammonia_dissolved_in_sewater' , 'type': 'float' , 'description': 'Ammonia (uM)' , 'origin': 'www.sccoos.org' } silicate = BaseType('nameless') silicate.attributes = {'units': 'm-3 mol l-1', 'long_name': 'Silicate', 'standard_name': 'silicate_dissolved_in_seawater', 'name': 'silicate_dissolved_in_seawater' , 'type': 'float' , 'description': 'Silicate (uM)' , 'origin': 'www.sccoos.org' } chlorophyll = BaseType('nameless') chlorophyll.attributes = {'units': 'kg m-3', 'long_name': 'Chlorophyll', 'standard_name': 'mass_concentration_of_chlorophyll_in_sea_water', 'name': 'mass_concentration_of_chlorophyll_in_sea_water' , 'type': 'float' , 'description': 'Chlorophyll (kg/m3)' , 'origin': 'www.sccoos.org' } prorocentrum = BaseType('nameless') prorocentrum.attributes = {'units': 'cells l-1', 'long_name': 'Prorocentrum', 'standard_name': 'mass_concentration_of_prorocentrum_in_sea_water', 'name': 'mass_concentration_of_prorocentrum_in_sea_water' , 'type': 'float' , 'description': 'Prorocentrum spp. (cells/L)' , 'origin': 'www.sccoos.org' } self.ds = { 'Domoic Acid (ng/mL)': DA, 'Pseudo-nitzschia seriata group (cells/L)': PA, 'Pseudo-nitzschia delicatissima group (cells/L)': PD, 'Phosphate (uM)': phosphate, 'Silicate (uM)': silicate, 'Ammonia (uM)': ammonia, 'Chlorophyll (mg/m3)': chlorophyll, 'Chlorophyll 1 (mg/m3)': chlorophyll, 'Chlorophyll 2 (mg/m3)': chlorophyll , 'Alexandrium spp. (cells/L)': alexandrium } self.include_names = ['Pseudo-nitzschia seriata group (cells/L)', 'Pseudo-nitzschia delicatissima group (cells/L)', 'Domoic Acid (ng/mL)', 'Chlorophyll (mg/m3)', 'Chlorophyll 1 (mg/m3)', 'Chlorophyll 2 (mg/m3)', 'Prorocentrum spp. (cells/L)', 'Silicate (uM)', 'Ammonia (uM)', 'Nitrate (uM)', 'Phosphate (uM)', 'Alexandrium spp. (cells/L)'] self.initDB() for pn in self.include_names: self.parmCount[pn] = 0 reader = csv.reader(fh) for line in fh: # Skip all lines that don't begin with '"' nor ' ' then open that with csv.DictReader if not line.startswith('"') and not line.startswith(' '): titles = next(reader) reader = csv.DictReader(fh, titles) for r in reader: year = int(r['year']) month = int(r['month']) day = int(r['day']) time = r['time'] lat = float(r['latitude']) lon = float(r['longitude']) depth = float(r['depth (m)']) location = r['location'] hours = int(time.split(':')[0]) mins = int(time.split(':')[1]) secs = int(time.split(':')[2]) parmNameValues = [] for name in list(self.ds.keys()): if name.startswith('Chlorophyll'): parmNameValues.append((name, 1e-5*float(r[name]))) else: parmNameValues.append((name, float(r[name]))) # Check to make sure all data from this file are from the same location. # The program could be modified to read data in one file from multiple locations by reading data into a hash keyed by location name # and then stepping through each key of the hash saving the data for each location into it's own activity. For now just require # each data file to have data from just one location. try: if lat != lastlat or lon != lastlon: logger.error("lat and lon are not the same for location = %s and lastlocation = %s. The input data should have just one location." % (location, lastlocation)) sys.exit(-1) except NameError as e: # Expected first time through when lastlon & lastlat don't yet exist pass # Load data dt = datetime(year, month, day, hours, mins, secs) self.load_measurement(lon, lat, depth, dt, parmNameValues) # Load sample bName = dt.isoformat() self.load_sample(lon, lat, depth, dt, bName) lastlat = lat lastlon = lon lastlocation = location logger.info("Data load complete, %d records loaded.", self.loaded) fh.close() # Update the Activity with information we now have following the load # Careful with the structure of this comment. It is parsed in views.py to give some useful links in showActivities() newComment = "%d MeasuredParameters loaded. Loaded on %sZ" % (self.loaded, datetime.utcnow()) logger.info("runHABLoader(): Updating its comment with newComment = %s", newComment) aName = location num_updated = m.Activity.objects.using(self.dbAlias).filter(id = self.activity.id).update( name = aName, comment = newComment, maptrack = None, mappoint = 'POINT(%s %s)' % (lon, lat), mindepth = self.mindepth, maxdepth = self.maxdepth, num_measuredparameters = self.loaded, loaded_date = datetime.utcnow()) self.updateActivityParameterStats(self.parameterCount) self.updateCampaignStartEnd()
def process_csv_file(self, fh): ''' Iterate through lines of iterator to csv file and pull out data for loading into STOQS ''' ds = {} DA = BaseType() DA.attributes = {'units': 'ng ml-1 ' , 'long_name': 'Domoic Acid', 'standard_name': 'domoic_acid', 'type': 'float', 'description': 'Domoic acid' , 'origin': 'www.sccoos.org' } PD = BaseType() PD.attributes = {'units': 'cells l-1', 'long_name': 'Pseudo-nitzschia delicatissima group', 'standard_name': 'pseudo_nitzschia_delicatissima', 'name': 'pseudo_nitzschia_delicatissima' , 'type': 'float' , 'description': 'Pseudo-nitzschia delicatissima group (cells/L)' , 'origin': 'www.sccoos.org' } PA = BaseType() PA.attributes = {'units': 'cells l-1', 'long_name': 'Pseudo-nitzschia seriata group', 'standard_name': 'pseudo_nitzschia_seriata', 'name': 'pseudo_nitzschia_seriata' , 'type': 'float' , 'description': 'Pseudo-nitzschia seriata group (cells/L)' , 'origin': 'www.sccoos.org' } alexandrium = BaseType() alexandrium.attributes = {'units': 'cells l-1', 'long_name': 'Alexandrium', 'standard_name': 'alexandrium', 'name': 'alexandrium' , 'type': 'float' , 'description': 'Alexandrium spp. (cells/L)' , 'origin': 'www.sccoos.org' } phosphate = BaseType() phosphate.attributes = {'units': 'm-3 mol l-1', 'long_name': 'Phosphate', 'standard_name': 'phosphate_dissolved_in_seawater', 'name': 'Phosphate' , 'type': 'float' , 'description': 'Phosphate (uM)' , 'origin': 'www.sccoos.org' } ammonia = BaseType() ammonia.attributes = {'units': 'm-3 mol l-1', 'long_name': 'Ammonia', 'standard_name': 'ammonia_dissolved_in_seawater', 'name': 'ammonia_dissolved_in_sewater' , 'type': 'float' , 'description': 'Ammonia (uM)' , 'origin': 'www.sccoos.org' } silicate = BaseType() silicate.attributes = {'units': 'm-3 mol l-1', 'long_name': 'Silicate', 'standard_name': 'silicate_dissolved_in_seawater', 'name': 'silicate_dissolved_in_seawater' , 'type': 'float' , 'description': 'Silicate (uM)' , 'origin': 'www.sccoos.org' } chlorophyll = BaseType() chlorophyll.attributes = {'units': 'kg m-3', 'long_name': 'Chlorophyll', 'standard_name': 'mass_concentration_of_chlorophyll_in_sea_water', 'name': 'mass_concentration_of_chlorophyll_in_sea_water' , 'type': 'float' , 'description': 'Chlorophyll (kg/m3)' , 'origin': 'www.sccoos.org' } prorocentrum = BaseType() prorocentrum.attributes = {'units': 'cells l-1', 'long_name': 'Prorocentrum', 'standard_name': 'mass_concentration_of_prorocentrum_in_sea_water', 'name': 'mass_concentration_of_prorocentrum_in_sea_water' , 'type': 'float' , 'description': 'Prorocentrum spp. (cells/L)' , 'origin': 'www.sccoos.org' } self.ds = { 'Domoic Acid (ng/mL)': DA, 'Pseudo-nitzschia seriata group (cells/L)': PA, 'Pseudo-nitzschia delicatissima group (cells/L)': PD, 'Phosphate (uM)': phosphate, 'Silicate (uM)': silicate, 'Ammonia (uM)': ammonia, 'Chlorophyll (mg/m3)': chlorophyll, 'Chlorophyll 1 (mg/m3)': chlorophyll, 'Chlorophyll 2 (mg/m3)': chlorophyll , 'Alexandrium spp. (cells/L)': alexandrium } self.include_names = ['Pseudo-nitzschia seriata group (cells/L)', 'Pseudo-nitzschia delicatissima group (cells/L)', 'Domoic Acid (ng/mL)', 'Chlorophyll (mg/m3)', 'Chlorophyll 1 (mg/m3)', 'Chlorophyll 2 (mg/m3)', 'Prorocentrum spp. (cells/L)', 'Silicate (uM)', 'Ammonia (uM)', 'Nitrate (uM)', 'Phosphate (uM)', 'Alexandrium spp. (cells/L)'] self.initDB() for pn in self.include_names: self.parmCount[pn] = 0 reader = csv.reader(fh) for line in fh: # Skip all lines that don't begin with '"' nor ' ' then open that with csv.DictReader if not line.startswith('"') and not line.startswith(' '): titles = reader.next() reader = csv.DictReader(fh, titles) for r in reader: year = int(r['year']) month = int(r['month']) day = int(r['day']) time = r['time'] lat = float(r['latitude']) lon = float(r['longitude']) depth = float(r['depth (m)']) location = r['location'] hours = int(time.split(':')[0]) mins = int(time.split(':')[1]) secs = int(time.split(':')[2]) parmNameValues = [] for name in self.ds.keys(): if name.startswith('Chlorophyll'): parmNameValues.append((name, 1e-5*float(r[name]))) else: parmNameValues.append((name, float(r[name]))) # Check to make sure all data from this file are from the same location. # The program could be modified to read data in one file from multiple locations by reading data into a hash keyed by location name # and then stepping through each key of the hash saving the data for each location into it's own activity. For now just require # each data file to have data from just one location. try: if lat != lastlat or lon != lastlon: logger.error("lat and lon are not the same for location = %s and lastlocation = %s. The input data should have just one location." % (location, lastlocation)) sys.exit(-1) except NameError, e: # Expected first time through when lastlon & lastlat don't yet exist pass # Load data dt = datetime(year, month, day, hours, mins, secs) self.load_measurement(lon, lat, depth, dt, parmNameValues) # Load sample bName = dt.isoformat() self.load_sample(lon, lat, depth, dt, bName) lastlat = lat lastlon = lon lastlocation = location