Пример #1
0
    def test_tmpsf_arrays(self):
        self.preload_tmpsf()
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('tmpsf_sample', id_only=True)
        stream_def_id = self.create_stream_definition('tmpsf', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('tmpsf', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        rdt = ParameterHelper.rdt_for_data_product(data_product_id)
        tomato = {'quality_flag': 'ok', 'preferred_timestamp':
                'port_timestamp', 'internal_timestamp': 3223662780.0,
                'stream_name': 'tmpsf_sample', 'values': [{'value_id':
                    'timestamp', 'value': 3223662780.0}, {'value_id':
                        'temperature', 'value': [21.4548, 21.0132, 20.9255,
                            21.1266, 21.1341, 21.5606, 21.2156, 21.4749,
                            21.3044, 21.132, 21.1798, 21.2352, 21.3488,
                            21.1214, 21.6426, 21.1479, 21.0069, 21.5426,
                            21.3204, 21.2402, 21.3968, 21.4371, 21.0411,
                            21.4361]}, {'value_id': 'battery_voltage', 'value':
                                11.5916}, {'value_id': 'serial_number',
                                    'value': '021964'}], 'port_timestamp':
                                1378230448.439269, 'driver_timestamp':
                                3587219248.444593, 'pkt_format_id':
                                'JSON_Data', 'pkt_version': 1}
        from ion.agents.populate_rdt import populate_rdt
        rdt = populate_rdt(rdt, [tomato])
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        breakpoint(locals())
Пример #2
0
    def test_example_preload(self):
        print 'preloading...'
        self.preload_example1()

        data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD102', alt_id_ns='PRE')
        data_product_id = data_product_ids[0]
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        with DirectCoverageAccess() as dca:
            dca.upload_calibration_coefficients(dataset_id, 'test_data/sbe16coeffs.csv', 'test_data/sbe16coeffs.yml')

        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        rdt = ph.rdt_for_data_product(data_product_id)
        rdt['time'] = [time.time() + 2208988800]
        rdt['temperature'] = [248471]
        rdt['pressure'] = [528418]
        rdt['conductivity'] = [1673175]
        rdt['thermistor_temperature']=[24303]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)
        g = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(g)

        breakpoint(locals())
Пример #3
0
    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for k,v in rdt.iteritems():
                self.assertIsInstance(rdt[k], np.ndarray)
                self.assertIsInstance(v, np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(60))
Пример #4
0
    def test_example2_preload(self):
        print 'preloading...'
        self.preload_example2()

        data_product_ids, _ = self.container.resource_registry.find_resources_ext(alt_id='DPROD104', alt_id_ns='PRE')
        data_product_id = data_product_ids[0]
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        with DirectCoverageAccess() as dca:
            dca.upload_calibration_coefficients(dataset_id, 'test_data/vel3d_coeff.csv', 'test_data/vel3d_coeff.yml')

        from ion_functions.data.test.test_vel_functions import TS, VE, VN, VU

        rdt = ParameterHelper.rdt_for_data_product(data_product_id)
        rdt['time'] = [time.time() + 2208988800]
        rdt['velocity_east'] = [VE[0]]
        rdt['velocity_north'] = [VN[0]]
        rdt['velocity_up'] = [VU[0]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)
        g = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(g)

        breakpoint(locals())
    def check_tempsf_instrument_data_product(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing: return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832,
            29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383,
            15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876,
            10.9834, 11.0098, 5.3456, 4.2994, 4.3009]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.event.wait(20))
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['temperature'], [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009]])
        return passing
    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process = TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(
            node=self.container.node, process=process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client,
                                  self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)
class TestDynamicParameters(DMTestCase):
    def setUp(self):
        DMTestCase.setUp(self)

        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_cc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition('Calibration Coefficients', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, self.stream_def_id)

    def test_coefficient_compatibility(self):
        data_product_id = self.create_data_product(name='Calibration Coefficient Test Data product', stream_def_id=self.stream_def_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = [10] * 10
        rdt['cc_coefficient'] = [2] * 10
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())

        rdt2 = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(dataset_id))
        np.testing.assert_array_equal(rdt2['offset'],[12]*10)
Пример #9
0
    def test_qc_interval_integration(self):

        # 1 need to make a dataset that only has one discrete qc violation
        # 2 Launch the process
        # 3 Setup the scheduler to run it say three times
        # 4 Get the Events and verify the data
    
        #-------------------------------------------------------------------------------- 
        # Make a dataset that has only one discrete qc violation
        #-------------------------------------------------------------------------------- 

        dp_id, dataset_id, stream_def_id = self.make_data_product()
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        for rdt in self.populate_vectors(stream_def_id, 1, lambda x : [41] + [39] * (x-1)):
            ph.publish_rdt_to_data_product(dp_id, rdt)
        self.assertTrue(monitor.event.wait(10)) # Give it 10 seconds to populate


        #--------------------------------------------------------------------------------
        # Launch the process
        #--------------------------------------------------------------------------------

        interval_key = uuid4().hex
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = ['glblrng_qc'] # The others are tested in other tests for completeness
        self.sync_launch(config)

        async_queue = Queue()
        def callback(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), 1)
            async_queue.put(1)
        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dp_id, callback=callback, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #--------------------------------------------------------------------------------
        # Setup the scheduler
        #--------------------------------------------------------------------------------


        timer_id = self.scheduler_service.create_interval_timer(start_time=time.time(),
                end_time=time.time()+13,
                interval=5,
                event_origin=interval_key)


        #--------------------------------------------------------------------------------
        # Get the events and verify them
        #--------------------------------------------------------------------------------

        try:
            for i in xrange(2):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC Events not raised')
Пример #10
0
    def create_lookup_rdt(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()

        stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt
Пример #11
0
    def create_lookup_rdt(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()

        stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt
    def setUp(self):
        DMTestCase.setUp(self)

        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_cc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition(
            'Calibration Coefficients', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        self.stream_def_id)
Пример #13
0
    def test_hydrophone_simulator(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.crete_simple_array_pdict()
        stream_def_id = self.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('ctd hydrophone', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        s = HydrophoneSimulator(data_product_id, interval=4)
        breakpoint(locals())

        s.stop()
Пример #14
0
    def make_data_product(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_simple_qc_pdict()

        stream_def_id = self.create_stream_definition('global range', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':'QCTEST'})

        self.populate_qc_tables()

        dp_id = self.create_data_product('qc data product', stream_def_id=stream_def_id)
        self.activate_data_product(dp_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(dp_id)
        return dp_id, dataset_id, stream_def_id
Пример #15
0
    def make_data_product(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_simple_qc_pdict()

        stream_def_id = self.create_stream_definition(
            'global range',
            parameter_dictionary_id=pdict_id,
            stream_configuration={'reference_designator': 'QCTEST'})

        self.populate_qc_tables()

        dp_id = self.create_data_product('qc data product',
                                         stream_def_id=stream_def_id)
        self.activate_data_product(dp_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            dp_id)
        return dp_id, dataset_id, stream_def_id
Пример #16
0
 def run(self):
     while not self.finished.wait(self.interval):
         rdt = ParameterHelper.rdt_for_data_product(self.data_product_id)
         now = time.time()
         if self.simple_time:
             rdt['time'] = [self.i]
         else:
             rdt['time'] = np.array([now + 2208988800])
         rdt['temp'] = self.float_range(10,14,np.array([now]))
         rdt['pressure'] = self.float_range(11,12,np.array([now]))
         rdt['lat'] = [41.205]
         rdt['lon'] = [-71.74]
         rdt['conductivity'] = self.float_range(3.3,3.5,np.array([now]))
         rdt['driver_timestamp'] = np.array([now + 2208988800])
         rdt['preferred_timestamp'] = ['driver_timestamp']
         ParameterHelper.publish_rdt_to_data_product(self.data_product_id, rdt)
         self.i += 1
    def setUp(self):
        DMTestCase.setUp(self)

        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_cc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition('Calibration Coefficients', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, self.stream_def_id)
Пример #18
0
    def test_create_dataset(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)


        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        breakpoint(locals(), globals())
    def setup_resource(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        self.offering_id = dataset_id
Пример #20
0
    def test_fdt_created_during(self):
    # generate a data product and check that the FDT exists
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)


        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        print coverage_path
        print "--------------------------------"
Пример #21
0
    def test_get_data_from_FDW(self):
        # generate a data product and check that the FDW can get data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        print coverage_path
        print "--------------------------------"

        #verify table exists in the DB (similar to above)
        # ....code...

        # check that the geoserver layer exists as above
        # ... code ....

        # make a WMS/WFS request...somet like this (or both)
        url = self.gs_host + '/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv'
        r = requests.get(url)
        assertTrue(r.status_code == 200)
Пример #22
0
    def check_presta_instrument_data_products(self, reference_designator):
        # Check the parsed data product make sure it's got everything it needs and can be published persisted etc.

        # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc
        # Let's go ahead and publish some fake data!!!
        # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf
        # Appendix A. Example 1.
        # p_psia_tide = 14.8670
        # the tide should be 10.2504
        passing = True

        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800.

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['absolute_pressure'] = [14.8670]
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'],
                                                  [10.2504], 4)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(
            dataset_monitor.wait())  # Bumped to 20 to keep buildbot happy
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'],
                                                  [10.2504], 4)
        passing &= self.assert_array_almost_equal(rdt['absolute_pressure'],
                                                  [14.8670], 4)

        return passing
    def test_pydap(self):
        if not CFG.get_safe('bootstrap.use_pydap',False):
            raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)')
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
        
        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt,10)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))


        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        pydap_host = CFG.get_safe('server.pydap.host','localhost')
        pydap_port = CFG.get_safe('server.pydap.port',8001)
        url = 'http://%s:%s/%s' %(pydap_host, pydap_port, dataset_id)

        ds = open_url(url)
        np.testing.assert_array_equal(ds['time'][:], np.arange(10))
        untested = []
        for k,v in rdt.iteritems():
            if k==rdt.temporal_parameter:
                continue
            context = rdt.context(k)
            if isinstance(context.param_type, QuantityType):
                np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
            elif isinstance(context.param_type, ArrayType):
                values = np.empty(rdt[k].shape, dtype='O')
                for i,obj in enumerate(rdt[k]):
                    values[i] = str(obj)
                np.testing.assert_array_equal(ds[k][k][:][0], values)
            elif isinstance(context.param_type, ConstantType):
                np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
            elif isinstance(context.param_type, CategoryType):
                np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
            else:
                untested.append('%s (%s)' % (k,context.param_type))
        if untested:
            raise AssertionError('Untested parameters: %s' % untested)
Пример #24
0
    def setUp(self):
        DMTestCase.setUp(self)
        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_qc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition('global range', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':'QCTEST'})
        self.addCleanup(self.pubsub_management.delete_stream_definition, self.stream_def_id)

        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        self.svm = StoredValueManager(self.container)
Пример #25
0
    def test_create_dataset_verify_geoserver_layer(self):
        #generate layer and check that the service created it in geoserver
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        # verify that the layer exists in geoserver
        try:
            r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id +
                             '_ooi.xml',
                             auth=(self.username, self.PASSWORD))
            self.assertTrue(r.status_code == 200)
        except Exception as e:
            log.error("check service and layer exist...%s", e)
            self.assertTrue(False)
Пример #26
0
    def make_large_dataset(self, temp_vector):

        monitor_queue = Queue()
        # Make 27 hours of data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        data_product_id, dataset_id, stream_def_id = self.make_data_product()
        es = EventSubscriber(event_type=OT.DatasetModified, origin=dataset_id, auto_delete=True, callback = lambda *args, **kwargs : monitor_queue.put(1))
        es.start()
        self.addCleanup(es.stop)
        for rdt in self.populate_vectors(stream_def_id, 3, temp_vector):
            ph.publish_rdt_to_data_product(data_product_id, rdt)

        try:
            for i in xrange(3):
                monitor_queue.get(timeout=10)
        except Empty:
            raise AssertionError('Failed to populate dataset in time')

            
        return data_product_id
Пример #27
0
    def test_global_range_lookup(self):
        reference_designator = "CE01ISSM-MF005-01-CTDBPC999"
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_simple_qc_pdict()
        svm = StoredValueManager(self.container)
        doc_key = 'grt_%s_TEMPWAT' % reference_designator
        svm.stored_value_cas(doc_key, {'grt_min_value':-2, 'grt_max_value':40})
        
        stream_def_id = self.pubsub_management.create_stream_definition('qc parsed', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':reference_designator})
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20]
        rdt.fetch_lookup_values()
        min_field = [i for i in rdt.fields if 'grt_min_value' in i][0]
        max_field = [i for i in rdt.fields if 'grt_max_value' in i][0]

        np.testing.assert_array_almost_equal(rdt[min_field], [-2.])
        np.testing.assert_array_almost_equal(rdt[max_field], [40.])

        np.testing.assert_array_almost_equal(rdt['tempwat_glblrng_qc'],[1])
Пример #28
0
    def test_lctest_preload(self):
        self.preload_lctest()


        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sparse_dict', id_only=True)
        stream_def_id = self.create_stream_definition('sparse_example', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('sparse_example', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        rdt = ParameterHelper.rdt_for_data_product(data_product_id)
        rdt['time'] = [time.time() + 2208988800]
        rdt['sparse_float'] = [3.14159265358979323]
        rdt['sparse_double'] = [2.7182818284590452353602874713526624977572470936999595]
        rdt['sparse_int'] = [131071] # 6th mersenne prime
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        for i in xrange(10):
            dataset_monitor.event.clear()
            rdt = ParameterHelper.rdt_for_data_product(data_product_id)
            rdt['time'] = [time.time() + 2208988800]
            ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
            dataset_monitor.event.wait(10)


        g = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(g)
            
        breakpoint(locals())
    def check_presta_instrument_data_products(self, reference_designator):
        # Check the parsed data product make sure it's got everything it needs and can be published persisted etc.

        # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc
        # Let's go ahead and publish some fake data!!!
        # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf
        # Appendix A. Example 1.
        # p_psia_tide = 14.8670
        # the tide should be 10.2504
        passing = True
        

        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800.

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['absolute_pressure'] = [14.8670]
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.event.wait(20)) # Bumped to 20 to keep buildbot happy
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        passing &= self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)

        return passing
Пример #30
0
 def run(self):
     connection = uuid4().hex
     while not self.finished.wait(self.interval):
         rdt = ParameterHelper.rdt_for_data_product(self.data_product_id)
         now = time.time()
         if self.simple_time:
             rdt["time"] = [self.i]
         else:
             rdt["time"] = np.array([now + 2208988800])
         rdt["temp"] = self.float_range(10, 14, np.array([now]))
         rdt["pressure"] = self.float_range(11, 12, np.array([now]))
         rdt["lat"] = [41.205]
         rdt["lon"] = [-71.74]
         rdt["conductivity"] = self.float_range(3.3, 3.5, np.array([now]))
         rdt["driver_timestamp"] = np.array([now + 2208988800])
         rdt["preferred_timestamp"] = ["driver_timestamp"]
         if self.connection:
             ParameterHelper.publish_rdt_to_data_product(
                 self.data_product_id, rdt, connection_id=connection, connection_index=self.i
             )
         else:
             ParameterHelper.publish_rdt_to_data_product(self.data_product_id, rdt)
         self.i += 1
Пример #31
0
    def make_large_dataset(self, temp_vector):

        monitor_queue = Queue()
        # Make 27 hours of data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        data_product_id, dataset_id, stream_def_id = self.make_data_product()
        es = EventSubscriber(
            event_type=OT.DatasetModified,
            origin=dataset_id,
            auto_delete=True,
            callback=lambda *args, **kwargs: monitor_queue.put(1))
        es.start()
        self.addCleanup(es.stop)
        for rdt in self.populate_vectors(stream_def_id, 3, temp_vector):
            ph.publish_rdt_to_data_product(data_product_id, rdt)

        try:
            for i in xrange(3):
                monitor_queue.get(timeout=10)
        except Empty:
            raise AssertionError('Failed to populate dataset in time')

        return data_product_id
class TestDynamicParameters(DMTestCase):
    def setUp(self):
        DMTestCase.setUp(self)

        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_cc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition(
            'Calibration Coefficients', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        self.stream_def_id)

    def test_coefficient_compatibility(self):
        data_product_id = self.create_data_product(
            name='Calibration Coefficient Test Data product',
            stream_def_id=self.stream_def_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = [10] * 10
        rdt['cc_coefficient'] = [2] * 10
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.wait())

        rdt2 = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(dataset_id))
        np.testing.assert_array_equal(rdt2['offset'], [12] * 10)
Пример #33
0
    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))
Пример #34
0
    def check_tempsf_instrument_data_product(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing: return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['temperature'] = [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009
        ]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['temperature'], [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009
        ]])
        return passing
Пример #35
0
    def test_create_dataset(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        breakpoint(locals(), globals())
Пример #36
0
    def test_fdt_created_during(self):
        # generate a data product and check that the FDT exists
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(
            dataset_id)
        print coverage_path
        print "--------------------------------"
    def test_get_data_from_FDW(self):
        # generate a data product and check that the FDW can get data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        print "--------------------------------"
        print dataset_id
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        print coverage_path
        print "--------------------------------"

        #verify table exists in the DB (similar to above)
        # ....code...

        # check that the geoserver layer exists as above
        # ... code ....

        # make a WMS/WFS request...somet like this (or both)
        url = self.gs_host+'/geoserver/geonode/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=geonode:ooi_' + dataset_id + '_ooi&maxFeatures=1&outputFormat=csv'
        r = requests.get(url)
        assertTrue(r.status_code == 200)
Пример #38
0
    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingestion_config_id,
            dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream,
                        stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'],
                                             np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'],
                                             np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'],
                                             np.array([3.068]))
        np.testing.assert_array_almost_equal(
            rdt_out['density'], np.array([1021.7144739593881],
                                         dtype='float32'))
        np.testing.assert_array_almost_equal(
            rdt_out['salinity'], np.array([30.935132729668283],
                                          dtype='float32'))
    def test_create_dataset_verify_geoserver_layer(self):
        #generate layer and check that the service created it in geoserver
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        self.data_product_management.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.data_product_management.suspend_data_product_persistence, data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id, PRED.hasDataset, id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(1) # Yield to other greenlets, had an issue with connectivity

        log.debug("--------------------------------")
        log.debug(dataset_id)
        coverage_path = DatasetManagementService()._get_coverage_path(dataset_id)
        log.debug(coverage_path)
        log.debug("--------------------------------")

        # verify that the layer exists in geoserver
        try:
            r = requests.get(self.gs_rest_url + '/layers/ooi_' + dataset_id + '_ooi.xml', auth=(self.username, self.PASSWORD))
            self.assertTrue(r.status_code == 200)
        except Exception as e:
            log.error("check service and layer exist...%s", e)
            self.assertTrue(False)
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process=TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process = process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)
Пример #41
0
 def setUp(self):
     self._start_container()
     self.container.start_rel_from_url('res/deploy/r2deploy.yml')
     self.resource_registry = self.container.resource_registry
     self.RR2 = EnhancedResourceRegistryClient(self.resource_registry)
     self.data_acquisition_management = DataAcquisitionManagementServiceClient()
     self.pubsub_management =  PubsubManagementServiceClient()
     self.instrument_management = InstrumentManagementServiceClient()
     self.discovery = DiscoveryServiceClient()
     self.dataset_management =  DatasetManagementServiceClient()
     self.process_dispatcher = ProcessDispatcherServiceClient()
     self.data_process_management = DataProcessManagementServiceClient()
     self.data_product_management = DataProductManagementServiceClient()
     self.data_retriever = DataRetrieverServiceClient()
     self.dataset_management = DatasetManagementServiceClient()
     self.user_notification = UserNotificationServiceClient()
     self.observatory_management = ObservatoryManagementServiceClient()
     self.visualization = VisualizationServiceClient()
     self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
     self.ctd_count = 0
Пример #42
0
    def setup_resource(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        dp = DataProduct(name='example')

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 100)
        ph.publish_rdt_to_data_product(data_product_id, rdt)

        # Yield to other greenlets, had an issue with connectivity
        gevent.sleep(1)

        self.offering_id = dataset_id
Пример #43
0
class TestQCFunctions(DMTestCase):
    def setUp(self):
        DMTestCase.setUp(self)
        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = self.ph.create_simple_qc_pdict()

        self.stream_def_id = self.pubsub_management.create_stream_definition('global range', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':'QCTEST'})
        self.addCleanup(self.pubsub_management.delete_stream_definition, self.stream_def_id)

        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        self.svm = StoredValueManager(self.container)

    def test_global_range_test(self):
        self.svm.stored_value_cas('grt_QCTEST_TEMPWAT', {'grt_min_value':10., 'grt_max_value':20.})

        self.rdt['time'] = np.arange(8)
        self.rdt['temp'] = [9, 10, 16, 17, 18, 19, 20, 25]
        self.rdt.fetch_lookup_values()
        np.testing.assert_array_almost_equal(self.rdt['tempwat_glblrng_qc'], [0, 1, 1, 1, 1, 1, 1, 0])

    def test_spike_test(self): # I know how redundant this sounds
        self.svm.stored_value_cas('spike_QCTEST_TEMPWAT', {'acc':0.1, 'spike_n':5., 'spike_l':5.})

        self.rdt['time'] = np.arange(8)
        self.rdt['temp'] = [-1, 3, 40, -1, 1, -6, -6, 1]
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_almost_equal(self.rdt['tempwat_spketst_qc'], [1, 1, 0, 1, 1, 1, 1, 1])

    def test_stuck_value_test(self):
        self.svm.stored_value_cas('svt_QCTEST_TEMPWAT', {'svt_resolution':0.001, 'svt_n': 4.})

        self.rdt['time'] = np.arange(10)
        self.rdt['temp'] = [4.83, 1.40, 3.33, 3.33, 3.33, 3.33, 4.09, 2.97, 2.85, 3.67]
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_almost_equal(self.rdt['tempwat_stuckvl_qc'], [1, 1, 0, 0, 0, 0, 1, 1, 1, 1])
Пример #44
0
 def setUp(self):
     DMTestCase.setUp(self)
     self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
class TestTransformWorker(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process = TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(
            node=self.container.node, process=process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client,
                                  self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    def push_granule(self, data_product_id):
        '''
        Publishes and monitors that the granule arrived
        '''
        datasets, _ = self.rrclient.find_objects(data_product_id,
                                                 PRED.hasDataset,
                                                 id_only=True)
        dataset_monitor = DatasetMonitor(datasets[0])

        rdt = self.ph.rdt_for_data_product(data_product_id)
        self.ph.fill_parsed_rdt(rdt)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)

        assert dataset_monitor.wait()
        dataset_monitor.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(
            restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess,
                                                 id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)

        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(
            dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(
            dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[
            output_data_product_id[0]]['parents'][self.input_dp_id]
                        ['data_process_definition_id'] == dataprocessdef_id)

        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time'] = [0]  # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure'] = [2]
            rdt['salinity'] = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))

        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(
            self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(
            dataprocessdef_id)
        self.assertIn('def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(
            dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasInputProduct,
            object_type=RT.DataProduct,
            id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(
            subject_type=RT.DataProcessDefinition,
            predicate=PRED.hasDataProcess,
            object=dataprocess_id,
            id_only=True)
        self.assertTrue(dpd_objs is not None)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_instrumentdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        instDevice_obj, _ = self.rrclient.find_resources(
            restype=RT.InstrumentDevice, name='test_ctd_device')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name='test_ctd_device',
                                       description="test_ctd_device",
                                       serial_number="12345")
            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )

        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(instDevice_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[instDevice_id]['type']
                        == 'InstrumentDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_platformdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        platform_device_obj, _ = self.rrclient.find_resources(
            restype=RT.PlatformDevice, name='TestPlatform')
        if platform_device_obj:
            platform_device_id = platform_device_obj[0]._id
        else:
            platform_device_obj = IonObject(RT.PlatformDevice,
                                            name='TestPlatform',
                                            description="TestPlatform",
                                            serial_number="12345")
            platform_device_id = self.imsclient.create_platform_device(
                platform_device=platform_device_obj)

        self.damsclient.assign_data_product(
            input_resource_id=platform_device_id,
            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(platform_device_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[platform_device_id]
                        ['type'] == 'PlatformDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=self.event_data_process_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_bad_argument_map(self):
        self._output_stream_ids = []

        # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during
        # data process creation and that the correct exception is raised for both input and output.

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS)
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process with invalid argument map
        argument_map = {"arr1": "foo", "arr2": "bar"}
        output_param = "salinity"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Input data product does not contain the parameters defined in argument map"
        )

        # create the data process with invalid output parameter name
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "foo"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Output data product does not contain the output parameter name provided"
        )

    def create_event_data_processes(self):

        # two data processes using one transform and one DPD
        argument_map = {"a": "salinity"}

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='validate_salinity_array',
            description='validate_salinity_array',
            function='validate_salinity_array',
            module="ion.processes.data.transforms.test.test_transform_worker",
            arguments=['a'],
            function_type=TransformFunctionType.TRANSFORM)

        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='validate_salinity_array',
            description='validate_salinity_array',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id,
            add_array_dpd_id,
            binding='validate_salinity_array')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=None,
            argument_map=argument_map)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process(self):

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            version_label='1.0a')
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=[dp1_func_output_dp_id],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        # add an attachment object to this DPD to test new SA-21
        import msgpack
        attachment_content = 'foo bar'
        attachment_obj = IonObject(RT.Attachment,
                                   name='test_attachment',
                                   attachment_type=AttachmentType.ASCII,
                                   content_type='text/plain',
                                   content=msgpack.packb(attachment_content))
        att_id = self.rrclient.create_attachment(add_array_dpd_id,
                                                 attachment_obj)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id

    def create_output_data_product(self):
        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])

        subscription_id = self.pubsub_client.create_subscription(
            'validator', data_product_ids=[dp1_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)
            self.granule_verified.set()

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        # if data process already created, check origin
        if self.dp_list:
            self.assertIn(data_process_event.origin, self.dp_list)

            # if this is a heartbeat event then 100 granules have been processed
            if 'data process status update.' in data_process_event.description:
                self.heartbeat_event_verified.set()

        else:
            # else check that this is the assign event

            if 'Data process assigned to transform worker' in data_process_event.description:
                self.worker_assigned_event_verified.set()
            elif 'Data process created for data product' in data_process_event.description:
                self.dp_created_event_verified.set()

    def validate_output_granule(self, msg, route, stream_id):
        self.assertIn(stream_id, self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)

    def validate_transform_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        status_alert_event = args[0]

        np.testing.assert_array_equal(status_alert_event.origin,
                                      self.stream_id)
        np.testing.assert_array_equal(status_alert_event.values,
                                      np.array([self.event_data_process_id]))
        log.debug("DeviceStatusAlertEvent: %s",
                  str(status_alert_event.__dict__))
        self.event_verified.set()

    def start_event_transform_listener(self):
        es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent,
                             callback=self.validate_transform_event)
        es.start()

        self.addCleanup(es.stop)

    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1, 2)
        self.assertEquals(a, 3)
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition(
            'lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(
            data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = [
            'offset_document'
        ]
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc, _ = self.rrclient.create_association(
            subject=data_product_id,
            object=data_producer_id,
            predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)

        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a': 2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                   predicate=PRED.hasStream,
                                                   id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0]))

        svm.stored_value_cas('updated_document', {'offset_a': 3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id,
                         reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2)  # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0, 23.0]))
Пример #47
0
 def setUp(self):
     DMTestCase.setUp(self)
     self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
     self.pdict_id = self.ph.create_simple_qc_pdict()
     self.svm = StoredValueManager(self.container)
    def test_pydap(self):
        if not CFG.get_safe('bootstrap.use_pydap', False):
            raise unittest.SkipTest('PyDAP is off (bootstrap.use_pydap)')
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition(
            'example', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='example')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(
            dp, stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product,
                        data_product_id)

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        self.addCleanup(
            self.data_product_management.suspend_data_product_persistence,
            data_product_id)

        dataset_id = self.resource_registry.find_objects(data_product_id,
                                                         PRED.hasDataset,
                                                         id_only=True)[0][0]
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)

        rdt = ph.get_rdt(stream_def_id)
        ph.fill_rdt(rdt, 10)
        ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(monitor.event.wait(10))

        gevent.sleep(
            1)  # Yield to other greenlets, had an issue with connectivity

        pydap_host = CFG.get_safe('server.pydap.host', 'localhost')
        pydap_port = CFG.get_safe('server.pydap.port', 8001)
        url = 'http://%s:%s/%s' % (pydap_host, pydap_port, dataset_id)

        for i in xrange(
                3
        ):  # Do it three times to test that the cache doesn't corrupt the requests/responses
            ds = open_url(url)
            np.testing.assert_array_equal(ds['time'][:], np.arange(10))
            untested = []
            for k, v in rdt.iteritems():
                if k == rdt.temporal_parameter:
                    continue
                context = rdt.context(k)
                if isinstance(context.param_type, QuantityType):
                    np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
                elif isinstance(context.param_type, ArrayType):
                    if context.param_type.inner_encoding is None:
                        values = np.empty(rdt[k].shape, dtype='O')
                        for i, obj in enumerate(rdt[k]):
                            values[i] = str(obj)
                        np.testing.assert_array_equal(ds[k][k][:][0], values)
                    elif len(rdt[k].shape) > 1:
                        values = np.empty(rdt[k].shape[0], dtype='O')
                        for i in xrange(rdt[k].shape[0]):
                            values[i] = ','.join(
                                map(lambda x: str(x), rdt[k][i].tolist()))
                elif isinstance(context.param_type, ConstantType):
                    np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
                elif isinstance(context.param_type, CategoryType):
                    np.testing.assert_array_equal(ds[k][k][:][0], rdt[k])
                else:
                    untested.append('%s (%s)' % (k, context.param_type))
            if untested:
                raise AssertionError('Untested parameters: %s' % untested)
Пример #49
0
    def test_lookup_values_ingest_replay(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'lookups', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()
        config.process.lookup_docs = ['test1', 'test2']
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingestion_config_id,
            dataset_id=dataset_id,
            config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream,
                        stream_id, ingestion_config_id)

        stored_value_manager = StoredValueManager(self.container)
        stored_value_manager.stored_value_cas('test1', {
            'offset_a': 10.0,
            'offset_b': 13.1
        })

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = [20.0] * 20

        granule = rdt.to_granule()

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20))
        np.testing.assert_array_almost_equal(rdt_out['temp'],
                                             np.array([20.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'],
                                             np.array([30.] * 20))
        np.testing.assert_array_equal(
            rdt_out['offset_b'],
            np.array([rdt_out.fill_value('offset_b')] * 20))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20, 40)
        rdt['temp'] = [20.0] * 20
        granule = rdt.to_granule()

        dataset_monitor.event.clear()

        stored_value_manager.stored_value_cas('test1', {'offset_a': 20.0})
        stored_value_manager.stored_value_cas('coefficient_document',
                                              {'offset_b': 10.0})
        gevent.sleep(2)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40))
        np.testing.assert_array_almost_equal(rdt_out['temp'],
                                             np.array([20.] * 20 + [20.] * 20))
        np.testing.assert_array_equal(rdt_out['offset_b'],
                                      np.array([10.] * 40))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'],
                                             np.array([30.] * 20 + [40.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated_b'],
                                             np.array([40.] * 20 + [50.] * 20))
Пример #50
0
    def check_vel3d_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(
            stream_def_id)
        self.assertEquals(pdict.name, 'vel3d_b_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        lat = 14.6846
        lon = -51.044
        ts = np.array([
            3319563600, 3319567200, 3319570800, 3319574400, 3319578000,
            3319581600, 3319585200, 3319588800, 3319592400, 3319596000
        ],
                      dtype=np.float)

        ve = np.array([-3.2, 0.1, 0., 2.3, -0.1, 5.6, 5.1, 5.8, 8.8, 10.3])

        vn = np.array([18.2, 9.9, 12., 6.6, 7.4, 3.4, -2.6, 0.2, -1.5, 4.1])
        vu = np.array([-1.1, -0.6, -1.4, -2, -1.7, -2, 1.3, -1.6, -1.1, -4.5])
        ve_expected = np.array([
            -0.085136, -0.028752, -0.036007, 0.002136, -0.023158, 0.043218,
            0.056451, 0.054727, 0.088446, 0.085952
        ])
        vn_expected = np.array([
            0.164012, 0.094738, 0.114471, 0.06986, 0.07029, 0.049237,
            -0.009499, 0.019311, 0.012096, 0.070017
        ])
        vu_expected = np.array([
            -0.011, -0.006, -0.014, -0.02, -0.017, -0.02, 0.013, -0.016,
            -0.011, -0.045
        ])

        rdt['time'] = ts
        rdt['lat'] = [lat] * 10
        rdt['lon'] = [lon] * 10
        rdt['turbulent_velocity_east'] = ve
        rdt['turbulent_velocity_north'] = vn
        rdt['turbulent_velocity_up'] = vu

        passing &= self.assert_array_almost_equal(
            rdt['eastward_turbulent_velocity'], ve_expected)
        passing &= self.assert_array_almost_equal(
            rdt['northward_turbulent_velocity'], vn_expected)
        passing &= self.assert_array_almost_equal(
            rdt['upward_turbulent_velocity'], vu_expected)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(
            rdt['eastward_turbulent_velocity'], ve_expected)
        passing &= self.assert_array_almost_equal(
            rdt['northward_turbulent_velocity'], vn_expected)
        passing &= self.assert_array_almost_equal(
            rdt['upward_turbulent_velocity'], vu_expected)
        return passing
Пример #51
0
    def check_trhph_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing

        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(
            stream_def_id)
        passing &= self.assertEquals(pdict.name, 'trhph_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        # calibration constants
        a = 1.98e-9
        b = -2.45e-6
        c = 9.28e-4
        d = -0.0888
        e = 0.731

        V_s = 1.506
        V_c = 0.
        T = 11.8

        r1 = 0.906
        r2 = 4.095
        r3 = 4.095

        ORP_V = 1.806
        Cl = np.nan

        offset = 2008
        gain = 4.0
        # Normally this would be 50 per the DPS but the precision is %4.0f which truncates the values to the nearest 1...
        ORP = ((ORP_V * 1000.) - offset) / gain

        ntp_now = time.time() + 2208988800

        rdt['cc_a'] = [a]
        rdt['cc_b'] = [b]
        rdt['cc_c'] = [c]
        rdt['cc_d'] = [d]
        rdt['cc_e'] = [e]
        rdt['ref_temp_volts'] = [V_s]
        rdt['resistivity_temp_volts'] = [V_c]
        rdt['eh_sensor'] = [ORP_V]
        rdt['resistivity_5'] = [r1]
        rdt['resistivity_x1'] = [r2]
        rdt['resistivity_x5'] = [r3]
        rdt['cc_offset'] = [offset]
        rdt['cc_gain'] = [gain]
        rdt['time'] = [ntp_now]

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'],
                                                  [T], 2)
        passing &= self.assert_array_almost_equal(
            rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP],
                                                  4)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'],
                                                  [T], 2)
        passing &= self.assert_array_almost_equal(
            rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP],
                                                  4)

        return passing
Пример #52
0
class TestQCFunctions(DMTestCase):
    def setUp(self):
        DMTestCase.setUp(self)
        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)
        self.pdict_id = self.ph.create_simple_qc_pdict()
        self.svm = StoredValueManager(self.container)

    def new_rdt(self,ref='QCTEST'):
        self.stream_def_id = self.create_stream_definition(uuid4().hex, parameter_dictionary_id=self.pdict_id, stream_configuration={'reference_designator':'QCTEST'})
        self.rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)

    def test_qc_functions(self):
        self.check_global_range()
        self.check_spike()
        self.check_stuck_value()
        self.check_trend()
        self.check_gradient()
        self.check_localrange()
        self.check_propagate()


    def check_global_range(self):
        log.info('check_global_range')
        self.new_rdt()
        self.svm.stored_value_cas('grt_QCTEST_TEMPWAT', {'grt_min_value':10., 'grt_max_value':20.})

        self.rdt['time'] = np.arange(8)
        self.rdt['temp'] = [9, 10, 16, 17, 18, 19, 20, 25]
        self.rdt.fetch_lookup_values()
        np.testing.assert_array_almost_equal(self.rdt['tempwat_glblrng_qc'], [0, 1, 1, 1, 1, 1, 1, 0])

    def check_spike(self): 
        log.info('check_spike')
        self.new_rdt()
        self.svm.stored_value_cas('spike_QCTEST_TEMPWAT', {'acc':0.1, 'spike_n':5., 'spike_l':5.})

        self.rdt['time'] = np.arange(8)
        self.rdt['temp'] = [-1, 3, 40, -1, 1, -6, -6, 1]
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_almost_equal(self.rdt['tempwat_spketst_qc'], [1, 1, 0, 1, 1, 1, 1, 1])

    def check_stuck_value(self):
        log.info('check_stuck_value')
        self.new_rdt()
        self.svm.stored_value_cas('svt_QCTEST_TEMPWAT', {'svt_resolution':0.001, 'svt_n': 4.})

        self.rdt['time'] = np.arange(10)
        self.rdt['temp'] = [4.83, 1.40, 3.33, 3.33, 3.33, 3.33, 4.09, 2.97, 2.85, 3.67]
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_almost_equal(self.rdt['tempwat_stuckvl_qc'], [1, 1, 0, 0, 0, 0, 1, 1, 1, 1])

    def check_trend(self):
        log.info('check_trend')
        self.new_rdt()
        self.svm.stored_value_cas('trend_QCTEST_TEMPWAT', {'time_interval':0, 'polynomial_order': 1, 'standard_deviation': 3})
        self.rdt['time'] = np.arange(10)
        self.rdt['temp'] = [0.8147, 0.9058, 0.1270, 0.9134, 0.6324, 0.0975, 0.2785, 0.5469, 0.9575, 0.9649]

        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_trndtst_qc'], [1] * 10)


    def check_propagate(self):
        log.info('check_propagate')
        self.new_rdt()
        self.rdt['time'] = np.arange(8)
        self.rdt['temp'] = [9, 10, 16, 17, 18, 19, 20, 25]
        self.rdt['tempwat_glblrng_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['tempwat_spketst_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['tempwat_stuckvl_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['tempwat_gradtst_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['tempwat_trndtst_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['tempwat_loclrng_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['preswat_glblrng_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['preswat_spketst_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['preswat_stuckvl_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['preswat_gradtst_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['preswat_trndtst_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        self.rdt['preswat_loclrng_qc'] = [0, 1, 1, 1, 1, 1, 1, 0]
        np.testing.assert_array_equal(self.rdt['cmbnflg_qc'], [0, 1, 1, 1, 1, 1, 1, 0])
    
    def check_gradient(self):
        log.info('check_gradient')
        self.new_rdt()
        self.svm.stored_value_cas('grad_QCTEST_TEMPWAT_time', {'d_dat_dx': 50, 'min_dx': 0, 'start_dat': 0, 'tol_dat': 5})
        self.rdt['time'] = np.arange(5)
        self.rdt['temp'] = [3, 5, 98, 99, 4]
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_gradtst_qc'], [1, 1, 0, 0, 1])

    def check_localrange(self):
        log.info('check_localrange')
        self.new_rdt()
        t = np.array([3580144703.7555027, 3580144704.7555027, 3580144705.7555027, 3580144706.7555027, 3580144707.7555027, 3580144708.7555027, 3580144709.7555027, 3580144710.7555027, 3580144711.7555027, 3580144712.7555027])
        pressure = np.random.rand(10) * 2 + 33.0
        t_v = ntp_to_month(t)
        dat = t_v + pressure + np.arange(16,26)
        def lim1(p,m):
            return p+m+10
        def lim2(p,m):
            return p+m+20

        pressure_grid, month_grid = np.meshgrid(np.arange(0,150,10), np.arange(11))
        points = np.column_stack([pressure_grid.flatten(), month_grid.flatten()])
        datlim_0 = lim1(points[:,0], points[:,1])
        datlim_1 = lim2(points[:,0], points[:,1])
        datlim = np.column_stack([datlim_0, datlim_1])
        datlimz = points

        self.svm.stored_value_cas('lrt_QCTEST_TEMPWAT', {'datlim':datlim.tolist(), 'datlimz':datlimz.tolist(), 'dims':['pressure', 'month']})
        self.rdt['time'] = t
        self.rdt['temp'] = dat
        self.rdt['pressure'] = pressure
        
        self.rdt.fetch_lookup_values()

        np.testing.assert_array_equal(self.rdt['tempwat_loclrng_qc'], [1 ,1 ,1 ,1 ,1 ,0 ,0 ,0 ,0 ,0])
Пример #53
0
    def test_qc_interval_integration(self):

        # 1 need to make a dataset that only has one discrete qc violation
        # 2 Launch the process
        # 3 Setup the scheduler to run it say three times
        # 4 Get the Events and verify the data

        #--------------------------------------------------------------------------------
        # Make a dataset that has only one discrete qc violation
        #--------------------------------------------------------------------------------

        dp_id, dataset_id, stream_def_id = self.make_data_product()
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        for rdt in self.populate_vectors(stream_def_id, 1,
                                         lambda x: [41] + [39] * (x - 1)):
            ph.publish_rdt_to_data_product(dp_id, rdt)
        self.assertTrue(
            monitor.event.wait(10))  # Give it 10 seconds to populate

        #--------------------------------------------------------------------------------
        # Launch the process
        #--------------------------------------------------------------------------------

        interval_key = uuid4().hex
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = [
            'glblrng_qc'
        ]  # The others are tested in other tests for completeness
        self.sync_launch(config)

        async_queue = Queue()

        def callback(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), 1)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=dp_id,
                             callback=callback,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #--------------------------------------------------------------------------------
        # Setup the scheduler
        #--------------------------------------------------------------------------------

        timer_id = self.scheduler_service.create_interval_timer(
            start_time=time.time(),
            end_time=time.time() + 13,
            interval=5,
            event_origin=interval_key)

        #--------------------------------------------------------------------------------
        # Get the events and verify them
        #--------------------------------------------------------------------------------

        try:
            for i in xrange(2):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC Events not raised')
Пример #54
0
    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)
        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))
        dataset_monitor.event.clear()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])
        np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45]))
        np.testing.assert_array_almost_equal(rdt_out['lon'], np.array([-71]))

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'],
                                             np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'],
                                             np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'],
                                             np.array([3.068]))
        np.testing.assert_array_almost_equal(
            rdt_out['density'], np.array([1021.7144739593881],
                                         dtype='float32'))
        np.testing.assert_array_almost_equal(
            rdt_out['salinity'], np.array([30.935132729668283],
                                          dtype='float32'))

        rdt = ph.get_rdt(stream_def_id)
        rdt['lat'] = [46]
        rdt['lon'] = [-73]

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))
        dataset_monitor.event.clear()

        rdt = ph.get_rdt(stream_def_id)
        rdt['lat'] = [1000]
        rdt['lon'] = [3]

        publisher.publish(rdt.to_granule())

        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['pressure'] = [256.8]

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))
        dataset_monitor.event.clear()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)
        np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45,
                                                                       46]))
        np.testing.assert_array_almost_equal(rdt_out['lon'],
                                             np.array([-71, -73]))
Пример #55
0
    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id,dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        # Publish initial granule
        # the first one has the sparse value set inside it, sets lat to 45 and lon to -71
        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = ['']
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        # Check the values and make sure they're correct
        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        np.testing.assert_allclose(rdt_out['temp'], rdt['temp'])
        np.testing.assert_allclose(rdt_out['lat'], np.array([45]))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71]))

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


        # We're going to change the lat/lon
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = time.time() + 2208988800
        rdt['lat'] = [46]
        rdt['lon'] = [-73]
        
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()


        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        
        for i in xrange(9):
            ntp_now = time.time() + 2208988800
            rdt['time'] = [ntp_now]
            rdt['internal_timestamp'] = [ntp_now]
            rdt['temp'] = [300000]
            rdt['preferred_timestamp'] = ['driver_timestamp']
            rdt['port_timestamp'] = [ntp_now]
            rdt['quality_flag'] = [None]
            rdt['conductivity'] = [4341400]
            rdt['driver_timestamp'] = [ntp_now]
            rdt['pressure'] = [256.8]

            publisher.publish(rdt.to_granule())
            self.assertTrue(dataset_monitor.wait())
            dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10))
        np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
Пример #56
0
class TestDMExtended(DMTestCase):
    '''
    ion/services/dm/test/test_dm_extended.py:TestDMExtended
    '''
    def setUp(self):
        DMTestCase.setUp(self)
        self.ph = ParameterHelper(self.dataset_management, self.addCleanup)

    @attr('UTIL')
    def test_pydap_handlers(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_def_id = self.create_stream_definition('ctd', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('ctd', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)


        rdt = self.ph.get_rdt(stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id,rdt)
        dataset_monitor.event.wait(10)

        from pydap.client import open_url
        pydap_host = CFG.get_safe('server.pydap.host','localhost')
        pydap_port = CFG.get_safe('server.pydap.port',8001)
        url = 'http://%s:%s/%s' %(pydap_host, pydap_port, dataset_id)
        ds = open_url(url)
        ds['temp']['temp'][:]

    def make_array_data_product(self):
        pdict_id = self.ph.crete_simple_array_pdict()
        stream_def_id = self.create_stream_definition('test_array_flow_paths', parameter_dictionary_id=pdict_id)

        data_product_id = self.create_data_product('test_array_flow_paths', stream_def_id)
        self.activate_data_product(data_product_id)
        return data_product_id, stream_def_id

    def preload_beta(self):
        config = DotDict()
        config.op = 'load'
        config.loadui=True
        config.ui_path =  "https://userexperience.oceanobservatories.org/database-exports/Candidates"
        config.attachments = "res/preload/r2_ioc/attachments"
        config.scenario = 'BETA'
        config.categories='ParameterFunctions,ParameterDefs,ParameterDictionary'
        self.container.spawn_process('preloader', 'ion.processes.bootstrap.ion_loader', 'IONLoader', config)
    
    def create_google_dt_workflow_def(self):
        # Check to see if the workflow defnition already exist
        workflow_def_ids,_ = self.resource_registry.find_resources(restype=RT.WorkflowDefinition, name='Realtime_Google_DT', id_only=True)

        if len(workflow_def_ids) > 0:
            workflow_def_id = workflow_def_ids[0]
        else:
            # Build the workflow definition
            workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Realtime_Google_DT',description='Convert stream data to Google Datatable')

            #Add a transformation process definition
            google_dt_procdef_id = self.create_google_dt_data_process_definition()
            workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id)
            workflow_def_obj.workflow_steps.append(workflow_step_obj)

            #Create it in the resource registry
            workflow_def_id = self.workflow_management.create_workflow_definition(workflow_def_obj)

        return workflow_def_id
   
    def create_google_dt_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.resource_registry.find_resources(restype=RT.DataProcessDefinition, name='google_dt_transform')
        if len(dpd) > 0:
            return dpd[0]

        # Data Process Definition
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='google_dt_transform',
            description='Convert data streams to Google DataTables',
            module='ion.processes.data.transforms.viz.google_dt',
            class_name='VizTransformGoogleDT')
        try:
            procdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new VizTransformGoogleDT data process definition: %s" %ex)

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt', id_only=True)

        # create a stream definition for the data from the
        stream_def_id = self.pubsub_management.create_stream_definition(name='VizTransformGoogleDT', parameter_dictionary_id=pdict_id)
        self.data_process_management.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id, binding='google_dt' )

        return procdef_id

    @attr('UTIL')
    def test_dm_realtime_visualization(self):
        self.preload_beta()

        # Create the google_dt workflow definition since there is no preload for the test
        workflow_def_id = self.create_google_dt_workflow_def()

        #Create the input data product
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_simulator', id_only=True)
        stream_def_id = self.create_stream_definition('ctd sim L2', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('ctd simulator', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        #viz_token = self.visualization.initiate_realtime_visualization_data(data_product_id=data_product_id)

        streamer = Streamer(data_product_id)
        self.addCleanup(streamer.stop)

        breakpoint(locals())
#        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
#        ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id)
#
#        vis_params ={}
#        vis_token_resp = self.vis_client.initiate_realtime_visualization_data(data_product_id=ctd_parsed_data_product_id, visualization_parameters=simplejson.dumps(vis_params))
#        print ">>>>>>>>>>>>>>>>>>> vis_token_resp : ", vis_token_resp
#
#        import ast
#        vis_token = ast.literal_eval(vis_token_resp)["rt_query_token"]
#
#        result = gevent.event.AsyncResult()
#
#        def get_vis_messages(get_data_count=7):  #SHould be an odd number for round robbin processing by service workers
#
#
#            get_cnt = 0
#            while get_cnt < get_data_count:
#
#                vis_data = self.vis_client.get_realtime_visualization_data(vis_token)
#                if (vis_data):
#                    self.validate_google_dt_transform_results(vis_data)
#
#                get_cnt += 1
#                gevent.sleep(5) # simulates the polling from UI
#
#            result.set(get_cnt)
#
#        gevent.spawn(get_vis_messages)
#
#        result.get(timeout=90)
#
#        #Trying to continue to receive messages in the queue
#        gevent.sleep(2.0)  # Send some messages - don't care how many
#
#
#        # Cleanup
#        self.vis_client.terminate_realtime_visualization_data(vis_token)
#
#
#        #Turning off after everything - since it is more representative of an always on stream of data!
#        self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data

    
    @attr('INT',group='dm')
    def test_array_visualization(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        # Make a granule with an array type, give it a few values
        # Send it to google_dt transform, verify output

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(2208988800, 2208988810)
        rdt['temp_sample'] = np.arange(10*4).reshape(10,4)
        rdt['cond_sample'] = np.arange(10*4).reshape(10,4)

        granule = rdt.to_granule()
        dataset_monitor = DatasetMonitor(self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id))
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        dataset_monitor.event.wait(10)

        gdt_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(granule, params=gdt_stream_def)

        rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        testval = {'data_content': [
            [0.0 , 0.0  , 1.0  , 2.0  , 3.0  , 0.0  , 2.0  , 4.0  , 6.0  , 0.0  , 1.0  , 2.0  , 3.0]   ,
            [1.0 , 4.0  , 5.0  , 6.0  , 7.0  , 8.0  , 10.0 , 12.0 , 14.0 , 4.0  , 5.0  , 6.0  , 7.0]   ,
            [2.0 , 8.0  , 9.0  , 10.0 , 11.0 , 16.0 , 18.0 , 20.0 , 22.0 , 8.0  , 9.0  , 10.0 , 11.0]  ,
            [3.0 , 12.0 , 13.0 , 14.0 , 15.0 , 24.0 , 26.0 , 28.0 , 30.0 , 12.0 , 13.0 , 14.0 , 15.0]  ,
            [4.0 , 16.0 , 17.0 , 18.0 , 19.0 , 32.0 , 34.0 , 36.0 , 38.0 , 16.0 , 17.0 , 18.0 , 19.0]  ,
            [5.0 , 20.0 , 21.0 , 22.0 , 23.0 , 40.0 , 42.0 , 44.0 , 46.0 , 20.0 , 21.0 , 22.0 , 23.0]  ,
            [6.0 , 24.0 , 25.0 , 26.0 , 27.0 , 48.0 , 50.0 , 52.0 , 54.0 , 24.0 , 25.0 , 26.0 , 27.0]  ,
            [7.0 , 28.0 , 29.0 , 30.0 , 31.0 , 56.0 , 58.0 , 60.0 , 62.0 , 28.0 , 29.0 , 30.0 , 31.0]  ,
            [8.0 , 32.0 , 33.0 , 34.0 , 35.0 , 64.0 , 66.0 , 68.0 , 70.0 , 32.0 , 33.0 , 34.0 , 35.0]  ,
            [9.0 , 36.0 , 37.0 , 38.0 , 39.0 , 72.0 , 74.0 , 76.0 , 78.0 , 36.0 , 37.0 , 38.0 , 39.0]] ,
                 'data_description': [('time', 'number', 'time'),
              ('temp_sample[0]', 'number', 'temp_sample[0]', {'precision': '5'}),
              ('temp_sample[1]', 'number', 'temp_sample[1]', {'precision': '5'}),
              ('temp_sample[2]', 'number', 'temp_sample[2]', {'precision': '5'}),
              ('temp_sample[3]', 'number', 'temp_sample[3]', {'precision': '5'}),
              ('temp_offset[0]', 'number', 'temp_offset[0]', {'precision': '5'}),
              ('temp_offset[1]', 'number', 'temp_offset[1]', {'precision': '5'}),
              ('temp_offset[2]', 'number', 'temp_offset[2]', {'precision': '5'}),
              ('temp_offset[3]', 'number', 'temp_offset[3]', {'precision': '5'}),
              ('cond_sample[0]', 'number', 'cond_sample[0]', {'precision': '5'}),
              ('cond_sample[1]', 'number', 'cond_sample[1]', {'precision': '5'}),
              ('cond_sample[2]', 'number', 'cond_sample[2]', {'precision': '5'}),
              ('cond_sample[3]', 'number', 'cond_sample[3]', {'precision': '5'})],
             'viz_product_type': 'google_dt'}
        self.assertEquals(rdt['google_dt_components'][0], testval)

    @attr('INT',group='dm')
    def test_array_flow_paths(self):
        data_product_id, stream_def_id = self.make_array_data_product()

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        dm = DatasetMonitor(dataset_id)
        self.addCleanup(dm.stop)


        # I need to make sure that we can fill the RDT with its values
        # Test for one timestep
        # Test for multiple timesteps
        # Publishes 
        # Ingests correctly
        # Retrieves correctly

        #--------------------------------------------------------------------------------
        # Ensure that the RDT can be filled with ArrayType values
        #--------------------------------------------------------------------------------
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp_sample'] = [[0,1,2,3,4]]
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]]))

        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dm.event.wait(10))
        dm.event.clear()

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4]]))

        #--------------------------------------------------------------------------------
        # Ensure that it deals with multiple values
        #--------------------------------------------------------------------------------

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1,2,3]
        rdt['temp_sample'] = [[0,1,2,3,4],[1],[5,5,5,5,5]]

        m = rdt.fill_value('temp_sample') or np.finfo(np.float32).max
        np.testing.assert_equal(m,np.finfo(np.float32).max)
        np.testing.assert_array_equal(rdt['temp_sample'], [[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]])
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dm.event.wait(10))
        dm.event.clear()


        #--------------------------------------------------------------------------------
        # Retrieve and Verify
        #--------------------------------------------------------------------------------

        retrieved_granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(retrieved_granule)
        np.testing.assert_array_equal(rdt['time'], np.array([0,1,2,3]))
        np.testing.assert_array_equal(rdt['temp_sample'], np.array([[0,1,2,3,4],[0,1,2,3,4],[1,m,m,m,m],[5,5,5,5,5]]))
        
    @attr('UTIL')
    def test_creation_args(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_def_id = self.create_stream_definition('ctd', parameter_dictionary_id=pdict_id)
        data_product_id = self.create_data_product('ctd', stream_def_id=stream_def_id)
        self.activate_data_product(data_product_id)

        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)

        breakpoint(locals())

        rdt = self.ph.get_rdt(stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        self.ph.publish_rdt_to_data_product(data_product_id,rdt)
        dataset_monitor.event.wait(10)

        breakpoint(locals())

        granule = self.data_retriever.retrieve(dataset_id)

        breakpoint(locals())