def test_array_flow_paths(self): data_product_id, stream_def_id = self.make_array_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dm = DatasetMonitor(dataset_id) self.addCleanup(dm.stop) # I need to make sure that we can fill the RDT with its values # Test for one timestep # Test for multiple timesteps # Publishes # Ingests correctly # Retrieves correctly #-------------------------------------------------------------------------------- # Ensure that the RDT can be filled with ArrayType values #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp_sample'] = [[0,1,2,3,4]] np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]])) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dm.event.wait(10)) dm.event.clear() granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]])) #-------------------------------------------------------------------------------- # Ensure that it deals with multiple values #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1,2,3] rdt['temp_sample'] = [[0,1,2,3,4],[1],[5,5,5,5,5]] m = rdt.fill_value('temp_sample') or np.finfo(np.float32).max np.testing.assert_equal(m,np.finfo(np.float32).max) np.testing.assert_array_equal(rdt['temp_sample'], [[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dm.event.wait(10)) dm.event.clear() #-------------------------------------------------------------------------------- # Retrieve and Verify #-------------------------------------------------------------------------------- retrieved_granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(retrieved_granule) np.testing.assert_array_equal(rdt['time'], np.array([0,1,2,3])) np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4],[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]))
def test_append_parameter(self): # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() # Grab the egg egg_url = self.egg_url egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) self.addCleanup(os.remove, egg_path) # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist) pfunc_id = self.dataset_management.create_parameter_function(pf) self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a context (instance of the function) context = ParameterContext(name='array_sum', units="1", fill_value="-9999", parameter_function_id=pfunc_id, parameter_type="function", value_encoding="float32", display_name="Array Summation", parameter_function_map={ 'a': 'temp', 'b': 'pressure' }) #pfunc = DatasetManagementService.get_coverage_function(pf) #pfunc.param_map = {'a':'temp', 'b':'pressure'} #ctxt = ParameterContext('array_sum', param_type=ParameterFunctionType(pfunc)) #ctxt_dump = ctxt.dump() #ctxt_id = self.dataset_management.create_parameter_context('array_sum', ctxt_dump) ctxt_id = self.dataset_management.create_parameter(context) self.dataset_management.add_parameter_to_dataset(ctxt_id, dataset_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2))
def test_coefficient_compatibility(self): data_product_id = self.create_data_product( name='Calibration Coefficient Test Data product', stream_def_id=self.stream_def_id) self.data_product_management.activate_data_product_persistence( data_product_id) self.addCleanup( self.data_product_management.suspend_data_product_persistence, data_product_id) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = [10] * 10 rdt['cc_coefficient'] = [2] * 10 dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) rdt2 = RecordDictionaryTool.load_from_granule( self.data_retriever.retrieve(dataset_id)) np.testing.assert_array_equal(rdt2['offset'], [12] * 10)
def test_instrument_simple(self): instrument_model_id = self.create_instrument_model() instrument_agent_id = self.create_instrument_agent(instrument_model_id) instrument_device_id = self.create_instrument_device(instrument_model_id) instrument_agent_instance_id = self.create_instrument_agent_instance(instrument_agent_id, instrument_device_id) raw_dp_id, parsed_dp_id = self.create_instrument_data_products(instrument_device_id) self.start_instrument_agent_instance(instrument_agent_instance_id) agent_process_id = self.poll_instrument_agent_instance(instrument_agent_instance_id, instrument_device_id) agent_client = ResourceAgentClient(instrument_device_id, to_name=agent_process_id, process=FakeProcess()) self.agent_state_transition(agent_client, ResourceAgentEvent.INITIALIZE, ResourceAgentState.INACTIVE) self.agent_state_transition(agent_client, ResourceAgentEvent.GO_ACTIVE, ResourceAgentState.IDLE) self.agent_state_transition(agent_client, ResourceAgentEvent.RUN, ResourceAgentState.COMMAND) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(parsed_dp_id) for i in xrange(10): monitor = DatasetMonitor(dataset_id=dataset_id) agent_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)) if not monitor.event.wait(30): raise AssertionError('Failed on the %ith granule' % i) monitor.stop() rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id)) self.assertEquals(len(rdt), 10)
def test_instrument_simple(self): instrument_model_id = self.create_instrument_model() instrument_agent_id = self.create_instrument_agent(instrument_model_id) instrument_device_id = self.create_instrument_device(instrument_model_id) instrument_agent_instance_id = self.create_instrument_agent_instance(instrument_agent_id, instrument_device_id) raw_dp_id, parsed_dp_id = self.create_instrument_data_products(instrument_device_id) self.start_instrument_agent_instance(instrument_agent_instance_id) agent_process_id = self.poll_instrument_agent_instance(instrument_agent_instance_id, instrument_device_id) agent_client = ResourceAgentClient(instrument_device_id, to_name=agent_process_id, process=FakeProcess()) self.agent_state_transition(agent_client, ResourceAgentEvent.INITIALIZE, ResourceAgentState.INACTIVE) self.agent_state_transition(agent_client, ResourceAgentEvent.GO_ACTIVE, ResourceAgentState.IDLE) self.agent_state_transition(agent_client, ResourceAgentEvent.RUN, ResourceAgentState.COMMAND) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(parsed_dp_id) for i in xrange(10): monitor = DatasetMonitor(dataset_id=dataset_id) agent_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)) if not monitor.wait(): raise AssertionError('Failed on the %ith granule' % i) monitor.stop() rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id)) self.assertEquals(len(rdt), 10)
def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) for k,v in rdt_out.iteritems(): self.assertEquals(rdt_out[k], None) e.set()
def test_example_preload(self): print 'preloading...' self.preload_example1() data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD102', alt_id_ns='PRE') data_product_id = data_product_ids[0] dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) with DirectCoverageAccess() as dca: dca.upload_calibration_coefficients(dataset_id, 'test_data/sbe16coeffs.csv', 'test_data/sbe16coeffs.yml') ph = ParameterHelper(self.dataset_management, self.addCleanup) rdt = ph.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] rdt['temperature'] = [248471] rdt['pressure'] = [528418] rdt['conductivity'] = [1673175] rdt['thermistor_temperature']=[24303] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ph.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) g = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(g) breakpoint(locals())
def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) self.assertEquals(rdt, self.rdt) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set()
def test_example2_preload(self): print 'preloading...' self.preload_example2() data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD104', alt_id_ns='PRE') data_product_id = data_product_ids[0] dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) with DirectCoverageAccess() as dca: dca.upload_calibration_coefficients(dataset_id, 'test_data/vel3d_coeff.csv', 'test_data/vel3d_coeff.yml') from ion_functions.data.test.test_vel_functions import TS, VE, VN, VU rdt = ParameterHelper.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] rdt['velocity_east'] = [VE[0]] rdt['velocity_north'] = [VN[0]] rdt['velocity_up'] = [VU[0]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) g = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(g) breakpoint(locals())
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600*(self.run_interval+1))) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st,et in self.chop(int(start_time),int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug("Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={'start_time':st, 'end_time':et}) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [i for i in rdt.fields if any([i.endswith(j) for j in qc_params])] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val==0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(),{})
def test_lctest_preload(self): self.preload_lctest() pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sparse_dict', id_only=True) stream_def_id = self.create_stream_definition('sparse_example', parameter_dictionary_id=pdict_id) data_product_id = self.create_data_product('sparse_example', stream_def_id=stream_def_id) self.activate_data_product(data_product_id) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) rdt = ParameterHelper.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] rdt['sparse_float'] = [3.14159265358979323] rdt['sparse_double'] = [2.7182818284590452353602874713526624977572470936999595] rdt['sparse_int'] = [131071] # 6th mersenne prime dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) for i in xrange(10): dataset_monitor.event.clear() rdt = ParameterHelper.rdt_for_data_product(data_product_id) rdt['time'] = [time.time() + 2208988800] ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) g = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(g) breakpoint(locals())
def check_tempsf_instrument_data_product(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800 rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.event.wait(20)) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['temperature'], [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009]]) return passing
def test_execute_transform(self): available_fields_in = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'] available_fields_out = ['time', 'lat', 'lon', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'TEMPWAT_L1','CONDWAT_L1','PRESWAT_L1','PRACSAL', 'DENSITY'] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',{'process':{'routes':{(stream_id_in, stream_id_out):None},'stream_id':stream_id_out}}) config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) rdt_out = self.container.proc_manager.procs[pid]._execute_transform(msg, (stream_id_in,stream_id_out)) #need below to wrap result in a param val object rdt_out = RecordDictionaryTool.load_from_granule(rdt_out.to_granule()) for k,v in rdt_out.iteritems(): self.assertEqual(len(v), dt) self._validate_transforms(rdt_in, rdt_out) self.container.proc_manager.terminate_process(pid)
def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump()) dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp']) tempwat_dp = DataProduct(name='TEMPWAT') tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time','temp']))
def verify_incoming(self, m,r,s): rdt = RecordDictionaryTool.load_from_granule(m) for k,v in rdt.iteritems(): np.testing.assert_array_equal(v, self.rdt[k]) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set()
def verify_incoming(self, m, r, s): rdt = RecordDictionaryTool.load_from_granule(m) self.assertEquals(rdt, self.rdt) self.assertEquals(m.data_producer_id, self.data_producer_id) self.assertEquals(m.provider_metadata_update, self.provider_metadata_update) self.assertNotEqual(m.creation_timestamp, None) self.event.set()
def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for field in rdt.fields: self.assertIsInstance(rdt[field], np.ndarray) verified.set()
def verifier(msg, route, stream_id): for k, v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for field in rdt.fields: self.assertIsInstance(rdt[field], np.ndarray) verified.set()
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20))
def check_rsn_instrument_data_product(self): passing = True # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products # Parsed, Engineering # SFLPRES-0 SFLPRES-1 # Check for the two data products and make sure they have the proper parameters # SFLPRES-0 should data_products, _ = self.RR.find_resources_ext(alt_id_ns='PRE', alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID', id_only=True) passing &=self.assertTrue(len(data_products)==1) if not data_products: return passing data_product_id = data_products[0] stream_defs, _ = self.RR.find_objects(data_product_id,PRED.hasStreamDefinition,id_only=False) passing &= self.assertTrue(len(stream_defs)==1) if not stream_defs: return passing # Assert that the stream definition has the correct reference designator stream_def = stream_defs[0] passing &= self.assertEquals(stream_def.stream_configuration['reference_designator'], 'RS03AXBS-MJ03A-06-PRESTA301') # Get the pdict and make sure that the parameters corresponding to the available fields # begin with the appropriate data product identifier pdict_ids, _ = self.RR.find_objects(stream_def, PRED.hasParameterDictionary, id_only=True) passing &= self.assertEquals(len(pdict_ids), 1) if not pdict_ids: return passing pdict_id = pdict_ids[0] pdict = DatasetManagementService.get_parameter_dictionary(pdict_id) available_params = [pdict.get_context(i) for i in pdict.keys() if i in stream_def.available_fields] for p in available_params: if p.name=='time': # Ignore the domain parameter continue passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES')) passing &= self.check_presta_instrument_data_products('RS01SLBS-MJ01A-06-PRESTA101') passing &= self.check_vel3d_instrument_data_products( 'RS01SLBS-MJ01A-12-VEL3DB101') passing &= self.check_presta_instrument_data_products('RS03AXBS-MJ03A-06-PRESTA301') passing &= self.check_vel3d_instrument_data_products( 'RS03AXBS-MJ03A-12-VEL3DB301') passing &= self.check_tempsf_instrument_data_product( 'RS03ASHS-MJ03B-07-TMPSFA301') passing &= self.check_vel3d_instrument_data_products( 'RS03INT2-MJ03D-12-VEL3DB304') passing &= self.check_trhph_instrument_data_products( 'RS03INT1-MJ03C-10-TRHPHA301') self.data_product_management.activate_data_product_persistence(data_product_id) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4) self.data_product_management.suspend_data_product_persistence(data_product_id) # Should do nothing and not raise anything return passing
def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set()
def test_add_parameter_function(self): # req-tag: NEW SA - 31 # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() self.data_product_id = data_product_id dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() #-------------------------------------------------------------------------------- # This is what the user defines either via preload or through the UI #-------------------------------------------------------------------------------- # Where the egg is egg_url = self.egg_url # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist, egg_uri=egg_url) pfunc_id = self.dataset_management.create_parameter_function(pf) #-------------------------------------------------------------------------------- self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a data process definition dpd = DataProcessDefinition(name='add_arrays', description='Sums two arrays') dpd_id = self.data_process_management.create_data_process_definition(dpd, pfunc_id) # TODO: assert assoc exists argmap = {'a':'temp', 'b':'pressure'} dp_id = self.data_process_management.create_data_process(dpd_id, [data_product_id], argument_map=argmap, out_param_name='array_sum') # Verify that the function worked! granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0,60,2)) # Verify that we can inspect it as well source_code = self.data_process_management.inspect_data_process_definition(dpd_id) self.assertEquals(source_code, 'def add_arrays(a, b):\n return a+b\n') url = self.data_process_management.get_data_process_definition_url(dpd_id) self.assertEquals(url, 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg') dpd_ids, _ = self.resource_registry.find_resources(name='dataqc_spiketest', restype=RT.DataProcessDefinition, id_only=True) dpd_id = dpd_ids[0] url = self.data_process_management.get_data_process_definition_url(dpd_id) self.assertEquals(url, 'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py')
def recv_packet(self, granule, stream_route, stream_id): tool = RecordDictionaryTool.load_from_granule(granule) msg = '' for (k,v) in tool.iteritems(): msg += '\n\t' + repr(k) + " => " + repr(v) if msg: log.debug('have granule with payload:' + msg) else: log.info('have empty granule')
def validation(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) np.testing.assert_array_almost_equal(rdt['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.7144739593881])) np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.935132729668283])) validated.set()
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) try: granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) except BadRequest: data_products, _ = self.container.resource_registry.find_subjects( object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.exception('Failed to perform QC Post Processing on %s', data_product.name) log.error('Calculated Start Time: %s', st) log.error('Calculated End Time: %s', et) raise log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def test_ccov_visualization(self): ''' Tests Complex Coverage aggregation of array types and proper splitting of coverages tests pydap and the visualization ''' if not CFG.get_safe('bootstrap.use_pydap',False): raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)') data_product_id, stream_def_id = self.make_array_data_product() # Make a granule with an array type, give it a few values # Send it to google_dt transform, verify output rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(2208988800, 2208988810) rdt['temp_sample'] = np.arange(10*4).reshape(10,4) rdt['cond_sample'] = np.arange(10*4).reshape(10,4) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt, connection_id='abc1', connection_index='1') self.assertTrue(dataset_monitor.event.wait(10)) dataset_monitor.event.clear() rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(2208988810, 2208988820) rdt['temp_sample'] = np.arange(10*4).reshape(10,4) rdt['cond_sample'] = np.arange(10*4).reshape(10,4) self.ph.publish_rdt_to_data_product(data_product_id, rdt, connection_id='abc2', connection_index='1') self.assertTrue(dataset_monitor.event.wait(10)) dataset_monitor.event.clear() qstring = '{"stride_time": 1, "parameters": [], "query_type": "highcharts_data", "start_time": 0, "use_direct_access": 0, "end_time": 19}' graph = self.visualization.get_visualization_data(data_product_id, qstring) self.assertIn('temp_sample[3]', graph) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp_sample'][0], np.arange(4)) pydap_host = CFG.get_safe('server.pydap.host','localhost') pydap_port = CFG.get_safe('server.pydap.port',8001) url = 'http://%s:%s/%s' %(pydap_host, pydap_port, dataset_id) ds = open_url(url) temp_sample, time = ds['temp_sample'] temp_values, dim = temp_sample[0] np.testing.assert_array_equal(temp_values, np.array(['0.0,1.0,2.0,3.0']))
def recv_packet(self, msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) document = {} for k,v in rdt.iteritems(): value_array = np.atleast_1d(v[:]) if 'f' in value_array.dtype.str: document[k] = float(value_array[-1]) elif 'i' in value_array.dtype.str: document[k] = int(value_array[-1]) self.stored_value_manager.stored_value_cas(self.document_key, document)
def recv_packet(self, msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) document = {} for k, v in rdt.iteritems(): value_array = np.atleast_1d(v[:]) if 'f' in value_array.dtype.str: document[k] = float(value_array[-1]) elif 'i' in value_array.dtype.str: document[k] = int(value_array[-1]) self.stored_value_manager.stored_value_cas(self.document_key, document)
def test_append_parameter(self): # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() # Grab the egg egg_url = self.egg_url egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) self.addCleanup(os.remove, egg_path) # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist) pfunc_id = self.dataset_management.create_parameter_function(pf) self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a context (instance of the function) context = ParameterContext(name='array_sum', units="1", fill_value="-9999", parameter_function_id=pfunc_id, parameter_type="function", value_encoding="float32", display_name="Array Summation", parameter_function_map={'a':'temp','b':'pressure'}) #pfunc = DatasetManagementService.get_coverage_function(pf) #pfunc.param_map = {'a':'temp', 'b':'pressure'} #ctxt = ParameterContext('array_sum', param_type=ParameterFunctionType(pfunc)) #ctxt_dump = ctxt.dump() #ctxt_id = self.dataset_management.create_parameter_context('array_sum', ctxt_dump) ctxt_id = self.dataset_management.create_parameter(context) self.dataset_management.add_parameter_to_dataset(ctxt_id, dataset_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0,60,2))
def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) extents = self.dataset_management.dataset_extents(dataset_id, rdt._pdict.temporal_parameter_name)[0] if rdt[rdt._pdict.temporal_parameter_name] and rdt[rdt._pdict.temporal_parameter_name][0] != rdt._pdict.get_context(rdt._pdict.temporal_parameter_name).fill_value and extents >= data_size: done = True else: gevent.sleep(0.2)
def write_to_data_product(self, data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects( stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read( data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values( rdt._pdict.get_context(field).param_type, 40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def test_array_visualization(self): data_product_id, stream_def_id = self.make_array_data_product() # Make a granule with an array type, give it a few values # Send it to google_dt transform, verify output rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(2208988800, 2208988810) rdt['temp_sample'] = np.arange(10*4).reshape(10,4) rdt['cond_sample'] = np.arange(10*4).reshape(10,4) granule = rdt.to_granule() dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) dataset_monitor.event.wait(10) gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def) rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) testval = {'data_content': [ [0.0 , 0.0 , 1.0 , 2.0 , 3.0 , 0.0 , 2.0 , 4.0 , 6.0 , 0.0 , 1.0 , 2.0 , 3.0] , [1.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 10.0 , 12.0 , 14.0 , 4.0 , 5.0 , 6.0 , 7.0] , [2.0 , 8.0 , 9.0 , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0 , 9.0 , 10.0 , 11.0] , [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0] , [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0] , [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0] , [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0] , [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0] , [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0] , [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] , 'data_description': [('time', 'number', 'time'), ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}), ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}), ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}), ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}), ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}), ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}), ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}), ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}), ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}), ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}), ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}), ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})], 'viz_product_type': 'google_dt'} self.assertEquals(rdt['google_dt_components'][0], testval)
def test_add_parameter_to_data_product(self): #self.preload_ui() self.test_add_parameter_function() data_product_id = self.data_product_id stream_def_id = self.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0] pdict_id = self.resource_registry.find_objects(stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0] # Create a new data product htat represents the L1 temp from the ctd simulator dp = DataProduct(name='CTD Simulator TEMPWAT L1', category=DataProductTypeEnum.DERIVED) stream_def_id = self.pubsub_management.create_stream_definition(name='tempwat_l1', parameter_dictionary_id=pdict_id, available_fields=['time','temp']) dp_id = self.data_product_management.create_data_product(dp, stream_definition_id=stream_def_id, parent_data_product_id=data_product_id) parameter_function = ParameterFunction(name='linear_corr', function_type=PFT.NUMEXPR, function='a * x + b', args=['x','a','b']) pf_id = self.dataset_management.create_parameter_function(parameter_function) dpd = DataProcessDefinition(name='linear_corr', description='Linear Correction') self.data_process_management.create_data_process_definition(dpd, pf_id) parameter = ParameterContext(name='temperature_corrected', parameter_type='function', parameter_function_id=pf_id, parameter_function_map={'x':'temp', 'a':1.03, 'b':0.25}, value_encoding='float32', units='deg_C', display_name='Temperature Corrected') p_id = self.dataset_management.create_parameter(parameter) # Add it to the parent or parsed data product self.data_product_management.add_parameter_to_data_product(p_id, data_product_id) # Then update the child's stream definition to include it stream_def = self.pubsub_management.read_stream_definition(stream_def_id) stream_def.available_fields.append('temperature_corrected') self.resource_registry.update(stream_def) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save... gevent.sleep(10) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt['temperature_corrected'], np.arange(30,dtype=np.float32) * 1.03 + 0.25, decimal=5)
def test_filter(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields,['time','temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k,v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
def test_coefficient_compatibility(self): data_product_id = self.create_data_product(name='Calibration Coefficient Test Data product', stream_def_id=self.stream_def_id) self.data_product_management.activate_data_product_persistence(data_product_id) self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = [10] * 10 rdt['cc_coefficient'] = [2] * 10 dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) rdt2 = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id)) np.testing.assert_array_equal(rdt2['offset'],[12]*10)
def rdt_to_granule(self, context, value_array, comp_val=None): pdict = ParameterDictionary() pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def rdt_to_granule(self, context, value_array, comp_val=None): pdict = ParameterDictionary() pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt["test"] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2["test"] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def write_to_data_product(self,data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id,40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def check_presta_instrument_data_products(self, reference_designator): # Check the parsed data product make sure it's got everything it needs and can be published persisted etc. # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc # Let's go ahead and publish some fake data!!! # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf # Appendix A. Example 1. # p_psia_tide = 14.8670 # the tide should be 10.2504 passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800. rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['absolute_pressure'] = [14.8670] passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue( dataset_monitor.wait()) # Bumped to 20 to keep buildbot happy if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) passing &= self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4) return passing
def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40): ''' Loops until there is a sufficient amount of data in the dataset ''' done = False with gevent.Timeout(40): while not done: granule = self.data_retriever.retrieve_last_data_points( dataset_id, 1) rdt = RecordDictionaryTool.load_from_granule(granule) extents = self.dataset_management.dataset_extents( dataset_id, rdt._pdict.temporal_parameter_name)[0] if rdt[rdt._pdict.temporal_parameter_name] and rdt[ rdt._pdict. temporal_parameter_name][0] != rdt._pdict.get_context( rdt._pdict.temporal_parameter_name ).fill_value and extents >= data_size: done = True else: gevent.sleep(0.2)
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def rdt_to_granule(self, context, value_array, comp_val=None): time = ParameterContext(name='time', param_type=QuantityType(value_encoding=np.float64)) pdict = ParameterDictionary() pdict.add_context(time, is_temporal=True) pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['time'] = np.arange(len(value_array)) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def rdt_to_granule(self, context, value_array, comp_val=None): time = ParameterContext( name='time', param_type=QuantityType(value_encoding=np.float64)) pdict = ParameterDictionary() pdict.add_context(time, is_temporal=True) pdict.add_context(context) rdt = RecordDictionaryTool(param_dictionary=pdict) rdt['time'] = np.arange(len(value_array)) rdt['test'] = value_array granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) testval = comp_val if comp_val is not None else value_array actual = rdt2['test'] if isinstance(testval, basestring): self.assertEquals(testval, actual) else: np.testing.assert_array_equal(testval, actual)
def check_tempsf_instrument_data_product(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() now = time.time() ntp_now = now + 2208988800 rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [ntp_now] rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009 ]] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now]) passing &= self.assert_array_almost_equal(rdt['temperature'], [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098, 5.3456, 4.2994, 4.3009 ]]) return passing
def test_add_parameter_to_data_product(self): #self.preload_ui() self.test_add_parameter_function() data_product_id = self.data_product_id stream_def_id = self.resource_registry.find_objects( data_product_id, PRED.hasStreamDefinition, id_only=True)[0][0] pdict_id = self.resource_registry.find_objects( stream_def_id, PRED.hasParameterDictionary, id_only=True)[0][0] # Create a new data product htat represents the L1 temp from the ctd simulator dp = DataProduct(name='CTD Simulator TEMPWAT L1', category=DataProductTypeEnum.DERIVED) stream_def_id = self.pubsub_management.create_stream_definition( name='tempwat_l1', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) dp_id = self.data_product_management.create_data_product( dp, stream_definition_id=stream_def_id, parent_data_product_id=data_product_id) parameter_function = ParameterFunction(name='linear_corr', function_type=PFT.NUMEXPR, function='a * x + b', args=['x', 'a', 'b']) pf_id = self.dataset_management.create_parameter_function( parameter_function) dpd = DataProcessDefinition(name='linear_corr', description='Linear Correction') self.data_process_management.create_data_process_definition(dpd, pf_id) parameter = ParameterContext(name='temperature_corrected', parameter_type='function', parameter_function_id=pf_id, parameter_function_map={ 'x': 'temp', 'a': 1.03, 'b': 0.25 }, value_encoding='float32', units='deg_C', display_name='Temperature Corrected') p_id = self.dataset_management.create_parameter(parameter) # Add it to the parent or parsed data product self.data_product_management.add_parameter_to_data_product( p_id, data_product_id) # Then update the child's stream definition to include it stream_def = self.pubsub_management.read_stream_definition( stream_def_id) stream_def.available_fields.append('temperature_corrected') self.resource_registry.update(stream_def) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) # For some reason, it takes numerous seconds of yielding with gevent for the coverage to actually save... gevent.sleep(10) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal( rdt['temperature_corrected'], np.arange(30, dtype=np.float32) * 1.03 + 0.25, decimal=5)
def test_add_parameter_function(self): # req-tag: NEW SA - 31 # Make a CTDBP Data Product data_product_id = self.make_ctd_data_product() self.data_product_id = data_product_id dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) # Throw some data in it rdt = self.ph.rdt_for_data_product(data_product_id) rdt['time'] = np.arange(30) rdt['temp'] = np.arange(30) rdt['pressure'] = np.arange(30) self.ph.publish_rdt_to_data_product(data_product_id, rdt) self.assertTrue(dataset_monitor.wait()) dataset_monitor.event.clear() #-------------------------------------------------------------------------------- # This is what the user defines either via preload or through the UI #-------------------------------------------------------------------------------- # Where the egg is egg_url = self.egg_url # Make a parameter function owner = 'ion_example.add_arrays' func = 'add_arrays' arglist = ['a', 'b'] pf = ParameterFunction(name='add_arrays', function_type=PFT.PYTHON, owner=owner, function=func, args=arglist, egg_uri=egg_url) pfunc_id = self.dataset_management.create_parameter_function(pf) #-------------------------------------------------------------------------------- self.addCleanup(self.dataset_management.delete_parameter_function, pfunc_id) # Make a data process definition dpd = DataProcessDefinition(name='add_arrays', description='Sums two arrays') dpd_id = self.data_process_management.create_data_process_definition( dpd, pfunc_id) # TODO: assert assoc exists argmap = {'a': 'temp', 'b': 'pressure'} dp_id = self.data_process_management.create_data_process( dpd_id, [data_product_id], argument_map=argmap, out_param_name='array_sum') # Verify that the function worked! granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['array_sum'], np.arange(0, 60, 2)) # Verify that we can inspect it as well source_code = self.data_process_management.inspect_data_process_definition( dpd_id) self.assertEquals(source_code, 'def add_arrays(a, b):\n return a+b\n') url = self.data_process_management.get_data_process_definition_url( dpd_id) self.assertEquals( url, 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) dpd_ids, _ = self.resource_registry.find_resources( name='dataqc_spiketest', restype=RT.DataProcessDefinition, id_only=True) dpd_id = dpd_ids[0] url = self.data_process_management.get_data_process_definition_url( dpd_id) self.assertEquals( url, 'https://github.com/ooici/ion-functions/blob/master/ion_functions/qc/qc_functions.py' )
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition( 'ctd', parameter_dictionary_id=pdict_id) pdict = DatasetManagementService.get_parameter_dictionary_by_name( 'ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) self.xps.append('xp1') publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() subscription_id = self.pubsub_management.create_subscription( 'sub', stream_ids=[stream_id]) self.xns.append('sub') self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1: 1} publisher.publish( rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1: 1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) filtered_stream_def_id = self.pubsub_management.create_stream_definition( 'filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp']) self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id) rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id) self.assertEquals(rdt._available_fields, ['time', 'temp']) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) with self.assertRaises(KeyError): rdt['pressure'] = np.arange(20) granule = rdt.to_granule() rdt2 = RecordDictionaryTool.load_from_granule(granule) self.assertEquals(rdt._available_fields, rdt2._available_fields) self.assertEquals(rdt.fields, rdt2.fields) for k, v in rdt.iteritems(): self.assertTrue(np.array_equal(rdt[k], rdt2[k])) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None, None, None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time'))
def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set()
def check_vel3d_instrument_data_products(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary( stream_def_id) self.assertEquals(pdict.name, 'vel3d_b_sample') rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) lat = 14.6846 lon = -51.044 ts = np.array([ 3319563600, 3319567200, 3319570800, 3319574400, 3319578000, 3319581600, 3319585200, 3319588800, 3319592400, 3319596000 ], dtype=np.float) ve = np.array([-3.2, 0.1, 0., 2.3, -0.1, 5.6, 5.1, 5.8, 8.8, 10.3]) vn = np.array([18.2, 9.9, 12., 6.6, 7.4, 3.4, -2.6, 0.2, -1.5, 4.1]) vu = np.array([-1.1, -0.6, -1.4, -2, -1.7, -2, 1.3, -1.6, -1.1, -4.5]) ve_expected = np.array([ -0.085136, -0.028752, -0.036007, 0.002136, -0.023158, 0.043218, 0.056451, 0.054727, 0.088446, 0.085952 ]) vn_expected = np.array([ 0.164012, 0.094738, 0.114471, 0.06986, 0.07029, 0.049237, -0.009499, 0.019311, 0.012096, 0.070017 ]) vu_expected = np.array([ -0.011, -0.006, -0.014, -0.02, -0.017, -0.02, 0.013, -0.016, -0.011, -0.045 ]) rdt['time'] = ts rdt['lat'] = [lat] * 10 rdt['lon'] = [lon] * 10 rdt['turbulent_velocity_east'] = ve rdt['turbulent_velocity_north'] = vn rdt['turbulent_velocity_up'] = vu passing &= self.assert_array_almost_equal( rdt['eastward_turbulent_velocity'], ve_expected) passing &= self.assert_array_almost_equal( rdt['northward_turbulent_velocity'], vn_expected) passing &= self.assert_array_almost_equal( rdt['upward_turbulent_velocity'], vu_expected) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal( rdt['eastward_turbulent_velocity'], ve_expected) passing &= self.assert_array_almost_equal( rdt['northward_turbulent_velocity'], vn_expected) passing &= self.assert_array_almost_equal( rdt['upward_turbulent_velocity'], vu_expected) return passing
def sync_rdt_with_granule(self, granule): rdt = RecordDictionaryTool.load_from_granule(granule) self.rdt = rdt return rdt
def check_rsn_instrument_data_product(self): passing = True # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products # Parsed, Engineering # SFLPRES-0 SFLPRES-1 # Check for the two data products and make sure they have the proper parameters # SFLPRES-0 should data_products, _ = self.RR.find_resources_ext( alt_id_ns='PRE', alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID', id_only=True) passing &= self.assertTrue(len(data_products) == 1) if not data_products: return passing data_product_id = data_products[0] stream_defs, _ = self.RR.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=False) passing &= self.assertTrue(len(stream_defs) == 1) if not stream_defs: return passing # Assert that the stream definition has the correct reference designator stream_def = stream_defs[0] passing &= self.assertEquals( stream_def.stream_configuration['reference_designator'], 'RS03AXBS-MJ03A-06-PRESTA301') # Get the pdict and make sure that the parameters corresponding to the available fields # begin with the appropriate data product identifier pdict_ids, _ = self.RR.find_objects(stream_def, PRED.hasParameterDictionary, id_only=True) passing &= self.assertEquals(len(pdict_ids), 1) if not pdict_ids: return passing pdict_id = pdict_ids[0] pdict = DatasetManagementService.get_parameter_dictionary(pdict_id) available_params = [ pdict.get_context(i) for i in pdict.keys() if i in stream_def.available_fields ] for p in available_params: if p.name == 'time': # Ignore the domain parameter continue passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES')) passing &= self.check_presta_instrument_data_products( 'RS01SLBS-MJ01A-06-PRESTA101') passing &= self.check_vel3d_instrument_data_products( 'RS01SLBS-MJ01A-12-VEL3DB101') passing &= self.check_presta_instrument_data_products( 'RS03AXBS-MJ03A-06-PRESTA301') passing &= self.check_vel3d_instrument_data_products( 'RS03AXBS-MJ03A-12-VEL3DB301') passing &= self.check_tempsf_instrument_data_product( 'RS03ASHS-MJ03B-07-TMPSFA301') passing &= self.check_vel3d_instrument_data_products( 'RS03INT2-MJ03D-12-VEL3DB304') passing &= self.check_trhph_instrument_data_products( 'RS03INT1-MJ03C-10-TRHPHA301') self.data_product_management.activate_data_product_persistence( data_product_id) dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset( data_product_id) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4) self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4) self.data_product_management.suspend_data_product_persistence( data_product_id) # Should do nothing and not raise anything return passing
def test_activate_suspend_data_product(self): #------------------------------------------------------------------------------------------------ # create a stream definition for the data from the ctd simulator #------------------------------------------------------------------------------------------------ pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition( name='Simulated CTD data', parameter_dictionary_id=pdict_id) log.debug("Created stream def id %s" % ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # test creating a new data product w/o a stream definition #------------------------------------------------------------------------------------------------ # Construct temporal and spatial Coordinate Reference System objects dp_obj = IonObject(RT.DataProduct, name='DP1', description='some new dp') log.debug("Created an IonObject for a data product: %s" % dp_obj) #------------------------------------------------------------------------------------------------ # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary #------------------------------------------------------------------------------------------------ dp_id = self.dpsc_cli.create_data_product( data_product=dp_obj, stream_definition_id=ctd_stream_def_id) #------------------------------------------------------------------------------------------------ # Subscribe to persist events #------------------------------------------------------------------------------------------------ queue = gevent.queue.Queue() def info_event_received(message, headers): queue.put(message) es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True) es.start() self.addCleanup(es.stop) #------------------------------------------------------------------------------------------------ # test activate and suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.activate_data_product_persistence(dp_id) dp_obj = self.dpsc_cli.read_data_product(dp_id) self.assertIsNotNone(dp_obj) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id, route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) #-------------------------------------------------------------------------------- # Now get the data in one chunk using an RPC Call to start_retreive #-------------------------------------------------------------------------------- replay_data = self.data_retriever.retrieve(dataset_ids[0]) self.assertIsInstance(replay_data, Granule) log.debug( "The data retriever was able to replay the dataset that was attached to the data product " "we wanted to be persisted. Therefore the data product was indeed persisted with " "otherwise we could not have retrieved its dataset using the data retriever. Therefore " "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'" ) data_product_object = self.rrclient.read(dp_id) self.assertEquals(data_product_object.name, 'DP1') self.assertEquals(data_product_object.description, 'some new dp') log.debug( "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. " " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the " "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description, data_product_object.name, data_product_object.description)) #------------------------------------------------------------------------------------------------ # test suspend data product persistence #------------------------------------------------------------------------------------------------ self.dpsc_cli.suspend_data_product_persistence(dp_id) dataset_modified.clear() rdt['time'] = np.arange(20, 40) publisher.publish(rdt.to_granule()) self.assertFalse(dataset_modified.wait(2)) self.dpsc_cli.activate_data_product_persistence(dp_id) dataset_modified.clear() publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt['time'], np.arange(40)) dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True) self.assertEquals(len(dataset_ids), 1) self.dpsc_cli.suspend_data_product_persistence(dp_id) self.dpsc_cli.force_delete_data_product(dp_id) # now try to get the deleted dp object with self.assertRaises(NotFound): dp_obj = self.rrclient.read(dp_id) info_event_counter = 0 runtime = 0 starttime = time.time() caught_events = [] #check that the four InfoStatusEvents were received while info_event_counter < 4 and runtime < 60: a = queue.get(timeout=60) caught_events.append(a) info_event_counter += 1 runtime = time.time() - starttime self.assertEquals(info_event_counter, 4)
def test_single_device_single_site(self): from interface.objects import DataProductTypeEnum starting = str(calendar.timegm(datetime(2014, 1, 1, 0).timetuple())) site_1_id, device_1_id, dataset_1_id, deployment_1_id, param_dict_a, data_product_1_id = self.create_device_site_deployment( dep_name="Site 1 - Device 1", starting=starting) site = self.resource_registry.read(site_1_id) # Create SDPs # This logis is also in preload, but testing preload is painful. # Testing it manually here for now. for i, scfg in enumerate(site.stream_configurations): pdict = self.container.resource_registry.find_resources( name=scfg.parameter_dictionary_name, restype=RT.ParameterDictionary, id_only=False)[0][0] # Clone/Create the new ParameterDictionary del pdict._id del pdict._rev sdp_pdict_id, _ = self.container.resource_registry.create(pdict) stream_def_id = self.create_stream_definition( name='CTD 1 - SDP', parameter_dictionary_id=sdp_pdict_id) sdp_id = self.create_data_product(name="SDP_%d" % i, stream_def_id=stream_def_id, stream_configuration=scfg) self.activate_data_product(sdp_id) self.container.resource_registry.create_association( subject=site_1_id, predicate=PRED.hasOutputProduct, object=sdp_id, assoc_type=RT.DataProduct) sdp = self.resource_registry.read(sdp_id) sdp.category = DataProductTypeEnum.SITE self.resource_registry.update(sdp) self.observatory_management.activate_deployment( deployment_id=deployment_1_id) # Get Deployment start time deployment_obj = self.resource_registry.read(deployment_1_id) for constraint in deployment_obj.constraint_list: if constraint.type_ == OT.TemporalBounds: assert constraint.start_datetime == starting # Get information about the new SiteDataProduct that should have been created site_data_product_1_id = self.resource_registry.find_objects( site_1_id, PRED.hasOutputProduct, id_only=True)[0][0] stream_def_2_id = self.resource_registry.find_objects( site_data_product_1_id, PRED.hasStreamDefinition, id_only=True)[0][0] param_dict_b = self.resource_registry.find_objects( stream_def_2_id, PRED.hasParameterDictionary)[0][0] # Check associations self.assertEquals( self.resource_registry.find_objects(site_1_id, PRED.hasDevice, id_only=True)[0][0], device_1_id) self.assertEquals( self.resource_registry.find_objects(site_1_id, PRED.hasDeployment, id_only=True)[0][0], deployment_1_id) self.assertEquals( self.resource_registry.find_objects(device_1_id, PRED.hasDeployment, id_only=True)[0][0], deployment_1_id) self.assertEquals( self.resource_registry.find_objects(device_1_id, PRED.hasOutputProduct, id_only=True)[0][0], data_product_1_id) self.assertEquals( self.resource_registry.find_objects(site_1_id, PRED.hasOutputProduct, id_only=True)[0][0], site_data_product_1_id) site_data_product_1 = self.resource_registry.find_objects( site_1_id, PRED.hasOutputProduct)[0][0] self.assertEquals(site_data_product_1.category, DataProductTypeEnum.SITE) self.assertEquals(len(site_data_product_1.dataset_windows), 1) assert site_data_product_1.dataset_windows[ 0].dataset_id == dataset_1_id assert site_data_product_1.dataset_windows[ 0].bounds.start_datetime == starting assert site_data_product_1.dataset_windows[0].bounds.end_datetime == '' # Check that param dicts have equal members self.assertEquals(param_dict_a.name, param_dict_b.name) self.observatory_management.deactivate_deployment( deployment_id=deployment_1_id) # Verify the window has an ending time site_data_product_1 = self.resource_registry.find_objects( site_1_id, PRED.hasOutputProduct)[0][0] self.assertEquals(site_data_product_1.category, DataProductTypeEnum.SITE) self.assertEquals(len(site_data_product_1.dataset_windows), 1) assert site_data_product_1.dataset_windows[ 0].dataset_id == dataset_1_id assert site_data_product_1.dataset_windows[ 0].bounds.start_datetime == starting assert int(site_data_product_1.dataset_windows[0].bounds.end_datetime ) - calendar.timegm(datetime.utcnow().timetuple()) < 10 # Verify that data is there granule = self.data_retriever.retrieve(dataset_1_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_allclose(rdt['time'], np.arange(4)) np.testing.assert_allclose(rdt['temp'], np.arange(10, 14))
def check_trhph_instrument_data_products(self, reference_designator): passing = True info_list = [] passing &= self.check_data_product_reference(reference_designator, info_list) if not passing: return passing data_product_id, stream_def_id, dataset_id = info_list.pop() pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary( stream_def_id) passing &= self.assertEquals(pdict.name, 'trhph_sample') rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) # calibration constants a = 1.98e-9 b = -2.45e-6 c = 9.28e-4 d = -0.0888 e = 0.731 V_s = 1.506 V_c = 0. T = 11.8 r1 = 0.906 r2 = 4.095 r3 = 4.095 ORP_V = 1.806 Cl = np.nan offset = 2008 gain = 4.0 # Normally this would be 50 per the DPS but the precision is %4.0f which truncates the values to the nearest 1... ORP = ((ORP_V * 1000.) - offset) / gain ntp_now = time.time() + 2208988800 rdt['cc_a'] = [a] rdt['cc_b'] = [b] rdt['cc_c'] = [c] rdt['cc_d'] = [d] rdt['cc_e'] = [e] rdt['ref_temp_volts'] = [V_s] rdt['resistivity_temp_volts'] = [V_c] rdt['eh_sensor'] = [ORP_V] rdt['resistivity_5'] = [r1] rdt['resistivity_x1'] = [r2] rdt['resistivity_x5'] = [r3] rdt['cc_offset'] = [offset] rdt['cc_gain'] = [gain] rdt['time'] = [ntp_now] passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'], [T], 2) passing &= self.assert_array_almost_equal( rdt['vent_fluid_chloride_conc'], [Cl], 4) passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP], 4) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt) passing &= self.assertTrue(dataset_monitor.wait()) if not passing: return passing granule = self.data_retriever.retrieve(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'], [T], 2) passing &= self.assert_array_almost_equal( rdt['vent_fluid_chloride_conc'], [Cl], 4) passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP], 4) return passing