Пример #1
0
    def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))
    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))
Пример #3
0
    def create_test_granules(self, buffer_data=False):
        """
        Generate test granules from particles.  If buffer data is set to true then
        try to buffer data into a granule.  If the particle has the new sequence
        flag set then a new granule will be generated.  This method emulates the
        agent_stream_publisher module.
        :return: list of granules generated.
        """
        base_timestamp = 3583861263.0
        connection_index = 0

        particles = []
        particles.append(
            self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0))
        particles.append(
            self.get_particle(base_timestamp + 1, 10.5915, 161.07, 4.1871,
                              2693.1))
        particles.append(
            self.get_particle(base_timestamp + 2, 10.5916, 161.08, 4.1872,
                              2693.2))
        particles.append(
            self.get_particle(base_timestamp + 3, 10.5917, 161.09, 4.1873,
                              2693.3, True))
        particles.append(
            self.get_particle(base_timestamp + 4, 10.5918, 161.10, 4.1874,
                              2693.4))

        data_groups = []
        result_granules = []
        data_groups_index = 0

        for particle in particles:
            # If we need a new connection then start a new group, but only if we have found
            # something in the current group
            if (particle.get('new_sequence', False) or buffer_data == False) and \
               (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0):
                data_groups_index += 1

            if len(data_groups) <= data_groups_index:
                data_groups.append([])

            data_groups[data_groups_index].append(particle)

        log.debug("Granules to create: %s", len(data_groups))

        for data in data_groups:
            connection_id = uuid.uuid4()
            connection_index += 1
            rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict())

            rdt = populate_rdt(rdt, data)

            g = rdt.to_granule(data_producer_id='agent_res_id',
                               connection_id=connection_id.hex,
                               connection_index=str(connection_index))

            result_granules.append(g)

        return result_granules
 def create_granule(self, stream_name, param_dict_name, particle_list):
     pd_id = self.dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True)
     stream_def_id = self.pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id)        
     stream_def = self.pubsub_client.read_stream_definition(stream_def_id)
     rdt = RecordDictionaryTool(stream_definition=stream_def)
     rdt = populate_rdt(rdt, particle_list)
     log.trace("RDT: %s", str(rdt))
     g = rdt.to_granule(data_producer_id='fake_agent_id')
     return g
Пример #5
0
    def publish_hifi(self,stream_id,stream_route,offset=0):
        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())
 def create_granule(self, stream_name, param_dict_name, particle_list):
     pd_id = self.dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True)
     stream_def_id = self.pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id)        
     stream_def = self.pubsub_client.read_stream_definition(stream_def_id)
     rdt = RecordDictionaryTool(stream_definition=stream_def)
     rdt = populate_rdt(rdt, particle_list)
     log.trace("RDT: %s", str(rdt))
     g = rdt.to_granule(data_producer_id='fake_agent_id')
     return g
Пример #7
0
    def test_retrieve_and_transform(self):

        # Stream definition for the CTD data
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id)


        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        #--------------------------------------------------------------------------------
        # Again with this ridiculous problem
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)
        self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 2)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)
Пример #8
0
    def _publish_stream_buffer(self, stream_name):
        """
        ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp']
        ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw",
        "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123}
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed",
        "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527,   34.2719, 1506.862, 19 Dec 2012, 01:03:07'],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]}
        """

        try:
            buf_len = len(self._stream_buffers[stream_name])
            if buf_len == 0:
                return

            stream_def = self._stream_defs[stream_name]
            if isinstance(stream_def, str):
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            else:
                rdt = RecordDictionaryTool(stream_definition=stream_def)

            publisher = self._publishers[stream_name]

            vals = []
            for x in xrange(buf_len):
                vals.append(self._stream_buffers[stream_name].pop())

            rdt = populate_rdt(rdt, vals)

            log.info('Outgoing granule: %s',
                     ['%s: %s' % (k, v) for k, v in rdt.iteritems()])
            g = rdt.to_granule(data_producer_id=self._agent.resource_id,
                               connection_id=self._connection_ID.hex,
                               connection_index=str(
                                   self._connection_index[stream_name]))

            publisher.publish(g)
            log.info(
                'Instrument agent %s published data granule on stream %s.',
                self._agent._proc_name, stream_name)
            log.info('Connection id: %s, connection index: %i.',
                     self._connection_ID.hex,
                     self._connection_index[stream_name])
            self._connection_index[stream_name] += 1
        except:
            log.exception(
                'Instrument agent %s could not publish data on stream %s.',
                self._agent._proc_name, stream_name)
Пример #9
0
    def publish_loop(self):
        t_i = 0
        while not self.finished.is_set():
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id)
            rdt['time'] = numpy.arange(10) + t_i*10
            rdt['temp'] = numpy.random.random_sample(10)*(30-0)+0

            self.publish(rdt.to_granule())
            gevent.sleep(self.interval)
            t_i += 1
Пример #10
0
    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            "ctd_parsed_param_dict", id_only=True
        )
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            "sal data", parameter_dictionary_id=salinity_pdict_id
        )

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt["time"] = np.arange(10)
        rdt["temp"] = np.random.randn(10) * 10 + 30
        rdt["conductivity"] = np.random.randn(10) * 2 + 10
        rdt["pressure"] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt["time"] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            "ion.processes.data.transforms.ctd.ctd_L2_salinity",
            "CTDL2SalinityTransformAlgorithm",
            kwargs=dict(params=sal_stream_def_id),
        )
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt["salinity"]:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)
Пример #11
0
    def publish_loop(self):

        #@todo - add lots of comments in here
        while not self.finished.is_set():

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array(
                [random.uniform(0.0, 75.0) for i in xrange(length)])
            t = numpy.array(
                [random.uniform(-1.7, 21.0) for i in xrange(length)])
            p = numpy.array(
                [random.lognormvariate(1, 2) for i in xrange(length)])
            lat = numpy.array(
                [random.uniform(-90.0, 90.0) for i in xrange(length)])
            lon = numpy.array(
                [random.uniform(0.0, 360.0) for i in xrange(length)])
            tvar = numpy.array(
                [self.last_time + i for i in xrange(1, length + 1)])
            self.last_time = max(tvar)

            parameter_dictionary = self._create_parameter()
            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            # This is an example of using groups it is not a normative statement about how to use groups

            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom', 'r') as rand:
                rdt['raw_fixed'] = numpy.array(
                    [rand.read(32) for i in xrange(length)], dtype='a32')

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom', 'r') as rand:
                rdt['raw_blob'] = numpy.array(
                    [rand.read(random.randint(1, 40)) for i in xrange(length)],
                    dtype=object)

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon

            g = rdt.to_granule()

            log.info('Sending %d values!' % length)
            if isinstance(g, Granule):
                self.publish(g)

            gevent.sleep(self.interval)
    def _publish_stream_buffer(self, stream_name):
        """
        ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp']
        ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw",
        "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123}
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed",
        "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527,   34.2719, 1506.862, 19 Dec 2012, 01:03:07'],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]}
        """

        try:
            buf_len = len(self._stream_buffers[stream_name])
            if buf_len == 0:
                return

            stream_def = self._stream_defs[stream_name]
            if isinstance(stream_def, str):
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            else:
                rdt = RecordDictionaryTool(stream_definition=stream_def)
                
            publisher = self._publishers[stream_name]
                
            vals = []
            for x in xrange(buf_len):
                vals.append(self._stream_buffers[stream_name].pop())
    
            rdt = populate_rdt(rdt, vals)
            
            #log.info('Outgoing granule: %s',
                     #['%s: %s'%(k,v) for k,v in rdt.iteritems()])
            #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0])
            #log.info('Outgoing granule destined for stream: %s', stream_name)
            g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex,
                    connection_index=str(self._connection_index[stream_name]))
            
            publisher.publish(g)
            #log.info('Instrument agent %s published data granule on stream %s.',
                #self._agent._proc_name, stream_name)
            #log.info('Connection id: %s, connection index: %i.',
                     #self._connection_ID.hex, self._connection_index[stream_name])
            self._connection_index[stream_name] += 1
        except:
            log.exception('Instrument agent %s could not publish data on stream %s.',
                self._agent._proc_name, stream_name)
Пример #13
0
    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id
Пример #14
0
    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())
Пример #15
0
    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            'sal data', parameter_dictionary_id=salinity_pdict_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'CTDL2SalinityTransformAlgorithm',
            kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)
Пример #16
0
 def publish_loop(self):
     t_i = 0
     while not self.finished.is_set():
         rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id)
         rdt['time']         = numpy.arange(10) + t_i*10
         rdt['temp']         = numpy.random.random(10) * 10
         rdt['lat']          = numpy.array([0] * 10)
         rdt['lon']          = numpy.array([0] * 10)
         rdt['conductivity'] = numpy.random.random(10) * 10
         rdt['binary']         = numpy.array(['hi'] * 10, dtype='object')
         
         self.publish(rdt.to_granule())
         gevent.sleep(self.interval)
         t_i += 1
Пример #17
0
    def publish_loop(self):

        #@todo - add lots of comments in here
        while not self.finished.is_set():

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])
            t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)])
            p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)])
            lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)])
            lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])
            tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)])
            self.last_time = max(tvar)

            parameter_dictionary = self._create_parameter()
            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            # This is an example of using groups it is not a normative statement about how to use groups



            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom','r') as rand:
                rdt['raw_fixed'] = numpy.array([rand.read(32) for i in xrange(length)], dtype='a32')

            #add a value sequence of raw bytes - not sure the type below is correct?
            with open('/dev/urandom','r') as rand:
                rdt['raw_blob'] = numpy.array([rand.read(random.randint(1,40)) for i in xrange(length)], dtype=object)



            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon


            g = rdt.to_granule()

            log.info('Sending %d values!' % length)
            if isinstance(g,Granule):
                self.publish(g)

            gevent.sleep(self.interval)
Пример #18
0
    def create_test_granules(self, buffer_data=False):
        """
        Generate test granules from particles.  If buffer data is set to true then
        try to buffer data into a granule.  If the particle has the new sequence
        flag set then a new granule will be generated.  This method emulates the
        agent_stream_publisher module.
        :return: list of granules generated.
        """
        base_timestamp = 3583861263.0
        connection_index = 0

        particles = []
        particles.append(self.get_particle(base_timestamp, 10.5914, 161.06, 4.1870, 2693.0))
        particles.append(self.get_particle(base_timestamp+1, 10.5915, 161.07, 4.1871, 2693.1))
        particles.append(self.get_particle(base_timestamp+2, 10.5916, 161.08, 4.1872, 2693.2))
        particles.append(self.get_particle(base_timestamp+3, 10.5917, 161.09, 4.1873, 2693.3, True))
        particles.append(self.get_particle(base_timestamp+4, 10.5918, 161.10, 4.1874, 2693.4))

        data_groups = []
        result_granules = []
        data_groups_index = 0

        for particle in particles:
            # If we need a new connection then start a new group, but only if we have found
            # something in the current group
            if (particle.get('new_sequence', False) or buffer_data == False) and \
               (len(data_groups) > 0 and len(data_groups[data_groups_index]) > 0):
                data_groups_index += 1

            if len(data_groups) <= data_groups_index:
                data_groups.append([])

            data_groups[data_groups_index].append(particle)

        log.debug("Granules to create: %s", len(data_groups))

        for data in data_groups:
            connection_id = uuid.uuid4()
            connection_index += 1
            rdt = RecordDictionaryTool(param_dictionary=self.get_param_dict())

            rdt = populate_rdt(rdt, data)

            g = rdt.to_granule(data_producer_id='agent_res_id', connection_id=connection_id.hex,
                               connection_index=str(connection_index))

            result_granules.append(g)

        return result_granules
Пример #19
0
    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection1, connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection1, connection_index='1'))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id,
                              rdt.to_granule(
                                  connection_id=connection1,
                                  connection_index='3'))  # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id,
                              rdt.to_granule(
                                  connection_id=connection2,
                                  connection_index='3'))  # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection2, connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(
            dataset_id,
            rdt.to_granule(connection_id=connection2, connection_index='5'))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id
Пример #20
0
    def test_lookup_values_ingest_replay(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsub_management.create_stream_definition(
            'lookups', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        stream_def_id)

        stream_id, route = self.pubsub_management.create_stream(
            'example',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()
        config.process.lookup_docs = ['test1', 'test2']
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingestion_config_id,
            dataset_id=dataset_id,
            config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream,
                        stream_id, ingestion_config_id)

        stored_value_manager = StoredValueManager(self.container)
        stored_value_manager.stored_value_cas('test1', {
            'offset_a': 10.0,
            'offset_b': 13.1
        })

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = [20.0] * 20

        granule = rdt.to_granule()

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20))
        np.testing.assert_array_almost_equal(rdt_out['temp'],
                                             np.array([20.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'],
                                             np.array([30.] * 20))
        np.testing.assert_array_equal(
            rdt_out['offset_b'],
            np.array([rdt_out.fill_value('offset_b')] * 20))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20, 40)
        rdt['temp'] = [20.0] * 20
        granule = rdt.to_granule()

        dataset_monitor.event.clear()

        stored_value_manager.stored_value_cas('test1', {'offset_a': 20.0})
        stored_value_manager.stored_value_cas('coefficient_document',
                                              {'offset_b': 10.0})
        gevent.sleep(2)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40))
        np.testing.assert_array_almost_equal(rdt_out['temp'],
                                             np.array([20.] * 20 + [20.] * 20))
        np.testing.assert_array_equal(rdt_out['offset_b'],
                                      np.array([10.] * 40))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'],
                                             np.array([30.] * 20 + [40.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated_b'],
                                             np.array([40.] * 20 + [50.] * 20))
    def _publish_stream_buffer(self, stream_name):
        """
        ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp']
        ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw",
        "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123}
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed",
        "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527,   34.2719, 1506.862, 19 Dec 2012, 01:03:07'],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]}
        """

        try:
            ### Flush the agent state to the object store.  This was added for the dataset agent publishers who store
            ### their driver state in the object store.  We had talked about about flushing the state after publiction
            ### by grabbing current state here, doing out work, and then saving this state.  However, flush_state
            ### doesn't accept parameters.  It seems more complex than simply flushing here.  There is a slight downside
            ### if publishing fails then the state will be slightly out of sync.
            if self._flush_on_publish:
                log.debug("ASP Flush Agent State")
                self._agent._flush_state()

            buf_len = len(self._stream_buffers[stream_name])
            if buf_len == 0:
                return

            stream_def = self._stream_defs[stream_name]
            if isinstance(stream_def, str):
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            else:
                rdt = RecordDictionaryTool(stream_definition=stream_def)
                
            publisher = self._publishers[stream_name]
            vals = []
            for x in xrange(buf_len):
                vals.append(self._stream_buffers[stream_name].pop())
    
            rdt = populate_rdt(rdt, vals)
            
            #log.info('Outgoing granule: %s',
                     #['%s: %s'%(k,v) for k,v in rdt.iteritems()])
            #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0])
            #log.info('Outgoing granule destined for stream: %s', stream_name)
            g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex,
                    connection_index=str(self._connection_index[stream_name]))
            
            publisher.publish(g)
            #log.info('Instrument agent %s published data granule on stream %s.',
                #self._agent._proc_name, stream_name)
            #log.info('Connection id: %s, connection index: %i.',
                     #self._connection_ID.hex, self._connection_index[stream_name])
            self._connection_index[stream_name] += 1
        except:
            log.exception('Instrument agent %s could not publish data on stream %s.',
                self._agent._proc_name, stream_name)
Пример #22
0
    def test_lookup_values_ingest_replay(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsub_management.create_stream_definition('lookups', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()
        config.process.lookup_docs = ['test1', 'test2']
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        stored_value_manager = StoredValueManager(self.container)
        stored_value_manager.stored_value_cas('test1',{'offset_a':10.0, 'offset_b':13.1})
        
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = [20.0] * 20

        granule = rdt.to_granule()

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))
        
        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20,40)
        rdt['temp'] = [20.0] * 20
        granule = rdt.to_granule()

        dataset_monitor.event.clear()

        stored_value_manager.stored_value_cas('test1',{'offset_a':20.0})
        stored_value_manager.stored_value_cas('coefficient_document',{'offset_b':10.0})
        gevent.sleep(2)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20 + [40.]*20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20))
    def _publish_stream_buffer(self, stream_name):
        """
        ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp']
        ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw",
        "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123}
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed",
        "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527,   34.2719, 1506.862, 19 Dec 2012, 01:03:07'],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]}
        """

        try:
            buf_len = len(self._stream_buffers[stream_name])
            if buf_len == 0:
                return

            stream_def = self._stream_defs[stream_name]
            rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            publisher = self._publishers[stream_name]
                
            vals = []
            for x in range(buf_len):
                vals.append(self._stream_buffers[stream_name].pop())
    
            data_arrays = {}
            for x in rdt.fields:
                data_arrays[x] = [None for y in range(buf_len)]

            for i in range(buf_len):
                tomato = vals[i]
                for (tk, tv) in tomato.iteritems():
                    if tk == 'values':
                        for tval_dict in tv:
                            tval_id = tval_dict['value_id']
                            if tval_id in rdt:
                                tval_val = tval_dict['value']
                                if tval_dict.get('binary', None):
                                    tval_val = base64.b64decode(tval_val)
                                data_arrays[tval_id][i] = tval_val
                                                               
                    elif tk in rdt:
                        data_arrays[tk][i] = tv
                        if tk == 'driver_timestamp':
                            data_arrays['time'][i] = tv    
            
            for (k,v) in data_arrays.iteritems():
                rdt[k] = numpy.array(v)

            log.info('Outgoing granule: %s',
                     ['%s: %s'%(k,v) for k,v in rdt.iteritems()])
            g = rdt.to_granule(data_producer_id=self._agent.resource_id)
            g.connection_id = self._connection_ID.hex
            g.connection_index = self._connection_index[stream_name]
            
            publisher.publish(g)
            log.info('Instrument agent %s published data granule on stream %s.',
                self._agent._proc_name, stream_name)
            log.info('Connection id: %s, connection index: %i.',
                     self._connection_ID.hex, self._connection_index[stream_name])
            
        except:
            log.exception('Instrument agent %s could not publish data on stream %s.',
                self._agent._proc_name, stream_name)

        else:
            self._connection_index[stream_name] += 1
            
    def _publish_stream_buffer(self, stream_name):
        """
        ['quality_flag', 'preferred_timestamp', 'port_timestamp', 'lon', 'raw', 'internal_timestamp', 'time', 'lat', 'driver_timestamp']
        ['quality_flag', 'preferred_timestamp', 'temp', 'density', 'port_timestamp', 'lon', 'salinity', 'pressure', 'internal_timestamp', 'time', 'lat', 'driver_timestamp', 'conductivit
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "raw",
        "values": [{"binary": true, "value": "MzIuMzkxOSw5MS4wOTUxMiwgNzg0Ljg1MywgICA2LjE5OTQsIDE1MDUuMTc5LCAxOSBEZWMgMjAxMiwgMDA6NTI6Mjc=", "value_id": "raw"}]}', 'time': 1355878347.744123}
        
        {"driver_timestamp": 3564867147.743795, "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed",
        "values": [{"value": 32.3919, "value_id": "temp"}, {"value": 91.09512, "value_id": "conductivity"}, {"value": 784.853, "value_id": "pressure"}]}', 'time': 1355878347.744127}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'port_timestamp': [None], 'lon': [None], 'raw': ['-4.9733,16.02390, 539.527,   34.2719, 1506.862, 19 Dec 2012, 01:03:07'],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117]}
        
        {'quality_flag': [u'ok'], 'preferred_timestamp': [u'driver_timestamp'], 'temp': [-4.9733], 'density': [None], 'port_timestamp': [None], 'lon': [None], 'salinity': [None], 'pressure': [539.527],
        'internal_timestamp': [None], 'time': [3564867788.0627117], 'lat': [None], 'driver_timestamp': [3564867788.0627117], 'conductivity': [16.0239]}
        """

        try:
            ### Flush the agent state to the object store.  This was added for the dataset agent publishers who store
            ### their driver state in the object store.  We had talked about about flushing the state after publiction
            ### by grabbing current state here, doing out work, and then saving this state.  However, flush_state
            ### doesn't accept parameters.  It seems more complex than simply flushing here.  There is a slight downside
            ### if publishing fails then the state will be slightly out of sync.
            if self._flush_on_publish:
                log.debug("ASP Flush Agent State")
                self._agent._flush_state()

            buf_len = len(self._stream_buffers[stream_name])
            if buf_len == 0:
                return

            stream_def = self._stream_defs[stream_name]
            if isinstance(stream_def, str):
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            else:
                rdt = RecordDictionaryTool(stream_definition=stream_def)
                
            publisher = self._publishers[stream_name]
            vals = []
            for x in xrange(buf_len):
                vals.append(self._stream_buffers[stream_name].pop())
    
            rdt = populate_rdt(rdt, vals)
            
            #log.info('Outgoing granule: %s',
                     #['%s: %s'%(k,v) for k,v in rdt.iteritems()])
            #log.info('Outgoing granule preferred timestamp: %s' % rdt['preferred_timestamp'][0])
            #log.info('Outgoing granule destined for stream: %s', stream_name)
            g = rdt.to_granule(data_producer_id=self._agent.resource_id, connection_id=self._connection_ID.hex,
                    connection_index=str(self._connection_index[stream_name]))
            
            publisher.publish(g)
            #log.info('Instrument agent %s published data granule on stream %s.',
                #self._agent._proc_name, stream_name)
            #log.info('Connection id: %s, connection index: %i.',
                     #self._connection_ID.hex, self._connection_index[stream_name])
            self._connection_index[stream_name] += 1
        except:
            log.exception('Instrument agent %s could not publish data on stream %s.',
                self._agent._proc_name, stream_name)