def test_append_parameter(self): # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() # Grab the egg egg_url = self.egg_url egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) self.addCleanup(os.remove, egg_path) # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist) pfunc_id = self.dataset_management.create_parameter_function(pf) self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a context (instance of the function) context = ParameterContext(name='array_sum', units="1", fill_value="-9999", parameter_function_id=pfunc_id, parameter_type="function", value_encoding="float32", display_name="Array Summation", parameter_function_map={ 'a': 'temp', 'b': 'pressure' }) #pfunc = DatasetManagementService.get_coverage_function(pf) #pfunc.param_map = {'a':'temp', 'b':'pressure'} #ctxt = ParameterContext('array_sum', param_type=ParameterFunctionType(pfunc)) #ctxt_dump = ctxt.dump() #ctxt_id = self.dataset_management.create_parameter_context('array_sum', ctxt_dump) ctxt_id = self.dataset_management.create_parameter(context) self.dataset_management.add_parameter_to_dataset(ctxt_id, dataset_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2))
def load_parameter_function(self, row): name = row['Name'] ftype = row['Function Type'] func_expr = row['Function'] owner = row['Owner'] args = ast.literal_eval(row['Args']) #kwargs = row['Kwargs'] descr = row['Description'] data_process_management = DataProcessManagementServiceProcessClient( self) function_type = None if ftype == 'PythonFunction': function_type = PFT.PYTHON elif ftype == 'NumexprFunction': function_type = PFT.NUMEXPR else: raise Conflict('Unsupported Function Type: %s' % ftype) parameter_function = ParameterFunctionResource( name=name, function=func_expr, function_type=function_type, owner=owner, args=args, description=descr) parameter_function.alt_ids = ['PRE:' + row['ID']] parameter_function_id = self.create_parameter_function( parameter_function) dpd = DataProcessDefinition() dpd.name = name dpd.description = 'Parameter Function Definition for %s' % name dpd.data_process_type = DataProcessTypeEnum.PARAMETER_FUNCTION dpd.parameters = args data_process_management.create_data_process_definition( dpd, parameter_function_id) return parameter_function_id
def load_parameter_function(self, row): name = row['Name'] ftype = row['Function Type'] func_expr = row['Function'] owner = row['Owner'] args = ast.literal_eval(row['Args']) #kwargs = row['Kwargs'] descr = row['Description'] data_process_management = DataProcessManagementServiceProcessClient(self) function_type=None if ftype == 'PythonFunction': function_type = PFT.PYTHON elif ftype == 'NumexprFunction': function_type = PFT.NUMEXPR else: raise Conflict('Unsupported Function Type: %s' % ftype) parameter_function = ParameterFunctionResource( name=name, function=func_expr, function_type=function_type, owner=owner, args=args, description=descr) parameter_function.alt_ids = ['PRE:' + row['ID']] parameter_function_id = self.create_parameter_function(parameter_function) dpd = DataProcessDefinition() dpd.name = name dpd.description = 'Parameter Function Definition for %s' % name dpd.data_process_type = DataProcessTypeEnum.PARAMETER_FUNCTION dpd.parameters = args data_process_management.create_data_process_definition(dpd, parameter_function_id) return parameter_function_id
def test_create_data_process_definition(self): pfunc = ParameterFunction(name='test_func') func_id = self.dataset_management.create_parameter_function(pfunc) data_process_definition = DataProcessDefinition() data_process_definition.name = 'Simple' dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition, func_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dpd_id) self.addCleanup(self.dataset_management.delete_parameter_function, func_id) objs, _ = self.rrclient.find_objects(dpd_id, PRED.hasParameterFunction, id_only=False) self.assertEquals(len(objs), 1) self.assertIsInstance(objs[0], ParameterFunction)
def test_add_parameter_to_data_product(self): #self.preload_ui() self.test_add_parameter_function() data_product_id = self.data_product_id stream_def_id = self.resource_registry.find_objects( data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0] pdict_id = self.resource_registry.find_objects( stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0] # Create a new data product htat represents the L1 temp from the ctd simulator dp = DataProduct(name='CTD Simulator TEMPWAT L1', category=DataProductTypeEnum.DERIVED) stream_def_id = self.pubsub_management.create_stream_definition( name='tempwat_l1', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) dp_id = self.data_product_management.create_data_product( dp, stream_definition_id=stream_def_id, parent_data_product_id=data_product_id) parameter_function = ParameterFunction(name='linear_corr', function_type=PFT.NUMEXPR, function='a * x + b', args=['x', 'a', 'b']) pf_id = self.dataset_management.create_parameter_function( parameter_function) dpd = DataProcessDefinition(name='linear_corr', description='Linear Correction') self.data_process_management.create_data_process_definition(dpd, pf_id) parameter = ParameterContext(name='temperature_corrected', parameter_type='function', parameter_function_id=pf_id, parameter_function_map={ 'x': 'temp', 'a': 1.03, 'b': 0.25 }, value_encoding='float32', units='deg_C', display_name='Temperature Corrected') p_id = self.dataset_management.create_parameter(parameter) # Add it to the parent or parsed data product self.data_product_management.add_parameter_to_data_product( p_id, data_product_id) # Then update the child's stream definition to include it stream_def = self.pubsub_management.read_stream_definition( stream_def_id) stream_def.available_fields.append('temperature_corrected') self.resource_registry.update(stream_def) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save... gevent.sleep(10) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal( rdt['temperature_corrected'], np.arange(30, dtype=np.float32) * 1.03 + 0.25, decimal=5)
def test_add_parameter_function(self): # req-tag: NEW SA - 31 # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() self.data_product_id = data_product_id dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() #-------------------------------------------------------------------------------- # This is what the user defines either via preload or through the UI #-------------------------------------------------------------------------------- # Where the egg is egg_url = self.egg_url # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist, egg_uri=egg_url) pfunc_id = self.dataset_management.create_parameter_function(pf) #-------------------------------------------------------------------------------- self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a data process definition dpd = DataProcessDefinition(name='add_arrays', description='Sums two arrays') dpd_id = self.data_process_management.create_data_process_definition( dpd, pfunc_id) # TODO: assert assoc exists argmap = {'a': 'temp', 'b': 'pressure'} dp_id = self.data_process_management.create_data_process( dpd_id, [data_product_id], argument_map=argmap, out_param_name='array_sum') # Verify that the function worked! granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2)) # Verify that we can inspect it as well source_code = self.data_process_management.inspect_data_process_definition( dpd_id) self.assertEquals(source_code, 'def add_arrays(a, b):\n return a+b\n') url = self.data_process_management.get_data_process_definition_url( dpd_id) self.assertEquals( url, 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) dpd_ids, _ = self.resource_registry.find_resources( name='dataqc_spiketest', restype=RT.DataProcessDefinition, id_only=True) dpd_id = dpd_ids[0] url = self.data_process_management.get_data_process_definition_url( dpd_id) self.assertEquals( url, 'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py' )
def create_pfuncs(self): contexts = {} funcs = {} t_ctxt = ParameterContext(name='TIME', parameter_type='quantity', value_encoding='float64', units='seconds since 1900-01-01') t_ctxt_id = self.dataset_management.create_parameter(t_ctxt) contexts['TIME'] = t_ctxt_id lat_ctxt = ParameterContext(name='LAT', parameter_type="sparse", value_encoding='float32', units='degrees_north') lat_ctxt_id = self.dataset_management.create_parameter(lat_ctxt) contexts['LAT'] = lat_ctxt_id lon_ctxt = ParameterContext(name='LON', parameter_type='sparse', value_encoding='float32', units='degrees_east') lon_ctxt_id = self.dataset_management.create_parameter(lon_ctxt) contexts['LON'] = lon_ctxt_id # Independent Parameters # Temperature - values expected to be the decimal results of conversion from hex temp_ctxt = ParameterContext(name='TEMPWAT_L0', parameter_type='quantity', value_encoding='float32', units='deg_C') temp_ctxt_id = self.dataset_management.create_parameter(temp_ctxt) contexts['TEMPWAT_L0'] = temp_ctxt_id # Conductivity - values expected to be the decimal results of conversion from hex cond_ctxt = ParameterContext(name='CONDWAT_L0', parameter_type='quantity', value_encoding='float32', units='S m-1') cond_ctxt_id = self.dataset_management.create_parameter(cond_ctxt) contexts['CONDWAT_L0'] = cond_ctxt_id # Pressure - values expected to be the decimal results of conversion from hex press_ctxt = ParameterContext(name='PRESWAT_L0', parameter_type='quantity', value_encoding='float32', units='dbar') press_ctxt_id = self.dataset_management.create_parameter(press_ctxt) contexts['PRESWAT_L0'] = press_ctxt_id # Dependent Parameters # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10 tl1_func = '(T / 10000) - 10' expr = ParameterFunction(name='TEMPWAT_L1', function_type=PFT.NUMEXPR, function=tl1_func, args=['T']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['TEMPWAT_L1'] = expr_id tl1_pmap = {'T': 'TEMPWAT_L0'} tempL1_ctxt = ParameterContext(name='TEMPWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=tl1_pmap, value_encoding='float32', units='deg_C') tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1_ctxt) contexts['TEMPWAT_L1'] = tempL1_ctxt_id # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5 cl1_func = '(C / 100000) - 0.5' expr = ParameterFunction(name='CONDWAT_L1', function_type=PFT.NUMEXPR, function=cl1_func, args=['C']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['CONDWAT_L1'] = expr_id cl1_pmap = {'C': 'CONDWAT_L0'} condL1_ctxt = ParameterContext(name='CONDWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=cl1_pmap, value_encoding='float32', units='S m-1') condL1_ctxt_id = self.dataset_management.create_parameter(condL1_ctxt) contexts['CONDWAT_L1'] = condL1_ctxt_id # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721 # PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range) pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)' expr = ParameterFunction(name='PRESWAT_L1',function=pl1_func,function_type=PFT.NUMEXPR,args=['P','p_range']) expr_id = self.dataset_management.create_parameter_function(expr) funcs['PRESWAT_L1'] = expr_id pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721} presL1_ctxt = ParameterContext(name='PRESWAT_L1', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=pl1_pmap, value_encoding='float32', units='S m-1') presL1_ctxt_id = self.dataset_management.create_parameter(presL1_ctxt) contexts['PRESWAT_L1'] = presL1_ctxt_id # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project: # https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1 # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1) owner = 'gsw' sal_func = 'SP_from_C' sal_arglist = ['C', 't', 'p'] expr = ParameterFunction(name='PRACSAL',function_type=PFT.PYTHON,function=sal_func,owner=owner,args=sal_arglist) expr_id = self.dataset_management.create_parameter_function(expr) funcs['PRACSAL'] = expr_id c10_f = ParameterFunction(name='c10', function_type=PFT.NUMEXPR, function='C*10', args=['C']) expr_id = self.dataset_management.create_parameter_function(c10_f) c10 = ParameterContext(name='c10', parameter_type='function', parameter_function_id=expr_id, parameter_function_map={'C':'CONDWAT_L1'}, value_encoding='float32', units='1') c10_id = self.dataset_management.create_parameter(c10) contexts['c10'] = c10_id # A magic function that may or may not exist actually forms the line below at runtime. sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'} sal_ctxt = ParameterContext(name='PRACSAL', parameter_type='function', parameter_function_id=expr_id, parameter_function_map=sal_pmap, value_encoding='float32', units='g kg-1') sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt) contexts['PRACSAL'] = sal_ctxt_id # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude) # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1) # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1) owner = 'gsw' abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT']) cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1']) dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1']) dens_ctxt = CoverageParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL) dens_ctxt.uom = 'kg m-3' dens_ctxt_id = self.dataset_management.create_parameter_context(name='DENSITY', parameter_context=dens_ctxt.dump()) self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id) contexts['DENSITY'] = dens_ctxt_id return contexts, funcs
def test_rdt_param_funcs(self): param_funcs = { 'identity' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.interpolation', 'function' : 'identity', 'args':['x'] }, 'ctd_tempwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_tempwat', 'args' : ['t0'] }, 'ctd_preswat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_preswat', 'args' : ["p0", "p_range_psia"] }, 'ctd_condwat' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_sbe37im_condwat', 'args' : ['c0'] }, 'ctd_pracsal' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_pracsal', 'args' : ['c', 't', 'p'] }, 'ctd_density' : { 'function_type' : PFT.PYTHON, 'owner' : 'ion_functions.data.ctd_functions', 'function' : 'ctd_density', 'args' : ['SP','t','p','lat','lon'] } } pfunc_ids = {} for name, param_def in param_funcs.iteritems(): paramfunc = ParameterFunction(name, **param_def) pf_id = self.dataset_management.create_parameter_function(paramfunc) pfunc_ids[name] = pf_id params = { 'time' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float64', 'units' : 'seconds since 1900-01-01' }, 'temperature_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'pressure_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'conductivity_counts' : { 'parameter_type' : 'quantity', 'value_encoding' : 'float32', 'units' : '1' }, 'temperature' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_tempwat'], 'parameter_function_map' : { 't0' : 'temperature_counts'}, 'value_encoding' : 'float32', 'units' : 'deg_C' }, 'pressure' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_preswat'], 'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721}, 'value_encoding' : 'float32', 'units' : 'dbar' }, 'conductivity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_condwat'], 'parameter_function_map' : {'c0' : 'conductivity_counts'}, 'value_encoding' : 'float32', 'units' : 'Sm-1' }, 'salinity' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_pracsal'], 'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'}, 'value_encoding' : 'float32', 'units' : '1' }, 'density' : { 'parameter_type' : 'function', 'parameter_function_id' : pfunc_ids['ctd_density'], 'parameter_function_map' : { 'SP' : 'salinity', 't' : 'temperature', 'p' : 'pressure', 'lat' : 'lat', 'lon' : 'lon' }, 'value_encoding' : 'float32', 'units' : 'kg m-1' }, 'lat' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_north' }, 'lon' : { 'parameter_type' : 'sparse', 'value_encoding' : 'float32', 'units' : 'degrees_east' } } param_dict = {} for name, param in params.iteritems(): pcontext = ParameterContext(name, **param) param_id = self.dataset_management.create_parameter(pcontext) param_dict[name] = param_id pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time') stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temperature_counts'] = [280000] rdt['conductivity_counts'] = [100000] rdt['pressure_counts'] = [2789] rdt['lat'] = [45] rdt['lon'] = [-71] np.testing.assert_allclose(rdt['density'], np.array([1001.00543606]))