Exemplo n.º 1
0
    def _start_data_subscribers(self, count, raw_count):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self._data_subscribers = []
        self._samples_received = []
        self._raw_samples_received = []
        self._async_sample_result = AsyncResult()
        self._async_raw_sample_result = AsyncResult()

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            log.info('Received parsed data on %s (%s,%s)', stream_id,
                     stream_route.exchange_point, stream_route.routing_key)
            self._samples_received.append(message)
            if len(self._samples_received) == count:
                self._async_sample_result.set()

        def recv_raw_data(message, stream_route, stream_id):
            log.info('Received raw data on %s (%s,%s)', stream_id,
                     stream_route.exchange_point, stream_route.routing_key)
            self._raw_samples_received.append(message)
            if len(self._raw_samples_received) == raw_count:
                self._async_raw_sample_result.set()

        from pyon.util.containers import create_unique_identifier

        stream_name = 'parsed'
        parsed_config = self._stream_config[stream_name]
        stream_id = parsed_config['stream_id']
        exchange_name = create_unique_identifier("%s_queue" % stream_name)
        self._purge_queue(exchange_name)
        sub = StandaloneStreamSubscriber(exchange_name, recv_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = pubsub_client.create_subscription(name=exchange_name,
                                                   stream_ids=[stream_id],
                                                   timeout=120.2)
        pubsub_client.activate_subscription(sub_id, timeout=120.3)
        sub.subscription_id = sub_id  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)

        stream_name = 'raw'
        parsed_config = self._stream_config[stream_name]
        stream_id = parsed_config['stream_id']
        exchange_name = create_unique_identifier("%s_queue" % stream_name)
        self._purge_queue(exchange_name)
        sub = StandaloneStreamSubscriber(exchange_name, recv_raw_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = pubsub_client.create_subscription(name=exchange_name,
                                                   stream_ids=[stream_id],
                                                   timeout=120.4)
        pubsub_client.activate_subscription(sub_id, timeout=120.5)
        sub.subscription_id = sub_id  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
Exemplo n.º 2
0
    def _start_data_subscribers(self, count, raw_count):
        """
        """        
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
                
        # Create streams and subscriptions for each stream named in driver.
        self._data_subscribers = []
        self._samples_received = []
        self._raw_samples_received = []
        self._async_sample_result = AsyncResult()
        self._async_raw_sample_result = AsyncResult()

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            log.info('Received parsed data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key)
            self._samples_received.append(message)
            if len(self._samples_received) == count:
                self._async_sample_result.set()

        def recv_raw_data(message, stream_route, stream_id):
            log.info('Received raw data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key)
            self._raw_samples_received.append(message)
            if len(self._raw_samples_received) == raw_count:
                self._async_raw_sample_result.set()

        from pyon.util.containers import create_unique_identifier

        stream_name = 'parsed'
        parsed_config = self._stream_config[stream_name]
        stream_id = parsed_config['stream_id']
        exchange_name = create_unique_identifier("%s_queue" %
                    stream_name)
        self._purge_queue(exchange_name)
        sub = StandaloneStreamSubscriber(exchange_name, recv_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id])
        pubsub_client.activate_subscription(sub_id)
        sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
        
        stream_name = 'raw'
        parsed_config = self._stream_config[stream_name]
        stream_id = parsed_config['stream_id']
        exchange_name = create_unique_identifier("%s_queue" %
                    stream_name)
        self._purge_queue(exchange_name)
        sub = StandaloneStreamSubscriber(exchange_name, recv_raw_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id])
        pubsub_client.activate_subscription(sub_id)
        sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
    def _start_data_subscribers(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume_data(message, headers):
            log.info("Subscriber received data message: %s.", str(message))
            self._samples_received.append(message)
            if self._no_samples and self._no_samples == len(self._samples_received):
                self._async_data_result.set()

        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(process=self.container, node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self._stream_config = {}
        self._data_subscribers = []
        for (stream_name, val) in PACKET_CONFIG.iteritems():
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = pubsub_client.create_stream_definition(container=stream_def)
            stream_id = pubsub_client.create_stream(
                name=stream_name, stream_definition_id=stream_def_id, original=True, encoding="ION R2"
            )
            self._stream_config[stream_name] = stream_id

            # Create subscriptions for each stream.
            exchange_name = "%s_queue" % stream_name
            sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume_data)
            self._listen(sub)
            self._data_subscribers.append(sub)
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = pubsub_client.create_subscription(query=query, exchange_name=exchange_name)
            pubsub_client.activate_subscription(sub_id)
Exemplo n.º 4
0
    def _start_data_subscribers(self, count):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self._data_subscribers = []
        self._samples_received = []
        #self._async_data_result = AsyncResult()

        strXterm = "xterm -T InstrumentScienceData -sb -rightbar "
        pOpenString = strXterm + " -e tail -f " + PIPE_PATH
        subprocess.Popen([
            'xterm', '-T', 'InstrumentScienceData', '-e', 'tail', '-f',
            PIPE_PATH
        ])

        #subprocess.Popen(pOpenString)

        #self.pipeData = open(PIPE_PATH, "w", 1)

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            print 'Received message on ' + str(stream_id) + ' (' + str(
                stream_route.exchange_point) + ',' + str(
                    stream_route.routing_key) + ')'
            log.info('Received message on %s (%s,%s)', stream_id,
                     stream_route.exchange_point, stream_route.routing_key)

            self.pipeData = open(PIPE_PATH, "w", 1)
            self.pipeData.write(str(message))
            self.pipeData.flush()
            self.pipeData.close()

            self._samples_received.append(message)
            #if len(self._samples_received) == count:
            #self._async_data_result.set()

        for (stream_name, stream_config) in self._stream_config.iteritems():

            stream_id = stream_config['stream_id']

            # Create subscriptions for each stream.

            exchange_name = '%s_queue' % stream_name
            self._purge_queue(exchange_name)
            sub = StandaloneStreamSubscriber(exchange_name, recv_data)
            sub.start()
            self._data_subscribers.append(sub)
            print 'stream_id: %s' % stream_id
            sub_id = pubsub_client.create_subscription(name=exchange_name,
                                                       stream_ids=[stream_id])
            pubsub_client.activate_subscription(sub_id)
            sub.subscription_id = sub_id  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
    def _start_data_subscribers(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume_data(message, headers):
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            if self._no_samples and self._no_samples == len(self._samples_received):
                self._async_data_result.set()
                
        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(process=self.container,
                                                container=self.container)

        # Create streams and subscriptions for each stream named in driver.
        self._stream_config = {}
        self._data_subscribers = []
        # TODO the following is a mininal adjustment to at least let the test
        # continue:
#        for (stream_name, val) in PACKET_CONFIG.iteritems():
        for stream_name in PACKET_CONFIG:
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = pubsub_client.create_stream_definition(
                                                    container=stream_def)        
            stream_id = pubsub_client.create_stream(
                        name=stream_name,
                        stream_definition_id=stream_def_id,
                        original=True,
                        encoding='ION R2')

            taxy = get_taxonomy(stream_name)
            stream_config = dict(
                id=stream_id,
                taxonomy=taxy.dump()
            )
            self._stream_config[stream_name] = stream_config
#            self._stream_config[stream_name] = stream_id

            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name,
                                                         callback=consume_data)
            self._listen(sub)
            self._data_subscribers.append(sub)
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = pubsub_client.create_subscription(
                                query=query, exchange_name=exchange_name, exchange_point='science_data')
            pubsub_client.activate_subscription(sub_id)
Exemplo n.º 6
0
    def run_external_transform(self):
        '''
        This example script illustrates how a transform can interact with the an outside process (very basic)
        it launches an external_transform example which uses the operating system command 'bc' to add 1 to the input

        Producer -> A -> 'FS.TEMP/transform_output'
        A is an external transform that spawns an OS process to increment the input by 1
        '''
        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)
        
        #-------------------------------
        # Process Definition
        #-------------------------------
        process_definition = ProcessDefinition(name='external_transform_definition')
        process_definition.executable['module'] = 'ion.processes.data.transforms.transform_example'
        process_definition.executable['class'] = 'ExternalTransform'
        process_definition_id = procd_cli.create_process_definition(process_definition=process_definition)

        #-------------------------------
        # Streams
        #-------------------------------

        input_stream_id = pubsub_cli.create_stream(name='input_stream', original=True)
        
        #-------------------------------
        # Subscription
        #-------------------------------

        query = StreamQuery(stream_ids=[input_stream_id])
        input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue')

        #-------------------------------
        # Launch Transform
        #-------------------------------

        transform_id = tms_cli.create_transform(name='external_transform', 
              in_subscription_id=input_subscription_id,
              process_definition_id=process_definition_id,
              configuration={})
        tms_cli.activate_transform(transform_id)

        #-------------------------------
        # Launch Producer
        #-------------------------------

        id_p = self.container.spawn_process('myproducer', 'ion.processes.data.transforms.transform_example', 'TransformExampleProducer', {'process':{'type':'stream_process','publish_streams':{'out_stream':input_stream_id}},'stream_producer':{'interval':4000}})
        self.container.proc_manager.procs[id_p].start()
Exemplo n.º 7
0
    def _start_data_subscriber(self, config, callback):
        """
        Setup and start a data subscriber
        """
        exchange_point = config['exchange_point']
        stream_id = config['stream_id']

        sub = StandaloneStreamSubscriber(exchange_point, callback)
        sub.start()
        self._data_subscribers.append(sub)

        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        sub_id = pubsub_client.create_subscription(name=exchange_point, stream_ids=[stream_id])
        pubsub_client.activate_subscription(sub_id)
        sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
    def __init__(self, packet_config = {}, stream_definition = None, original = True, encoding = "ION R2"):
        log.info("Start data subscribers")

        self.no_samples = None
        self.async_data_result = AsyncResult()

        self.data_greenlets = []
        self.stream_config = {}
        self.samples_received = []
        self.data_subscribers = []
        self.container = Container.instance
        if not self.container:
            raise NoContainer()

        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume_data(message, headers):
            log.info('Subscriber received data message: %s.', str(message))

            self.samples_received.append(message)
            if self.no_samples and self.no_samples == len(self.samples_received):
                self.async_data_result.set()

        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(process=self.container, node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self.stream_config = {}
        self.data_subscribers = []
        for (stream_name, val) in packet_config.iteritems():
            stream_def_id = pubsub_client.create_stream_definition(container=stream_definition)
            stream_id = pubsub_client.create_stream(
                name=stream_name,
                stream_definition_id=stream_def_id,
                original=original,
                encoding=encoding)
            self.stream_config[stream_name] = stream_id

            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume_data)
            self._listen(sub)
            self.data_subscribers.append(sub)
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = pubsub_client.create_subscription(query=query, exchange_name=exchange_name)
            pubsub_client.activate_subscription(sub_id)
Exemplo n.º 9
0
    def _start_data_subscriber(self, config, callback):
        """
        Setup and start a data subscriber
        """
        exchange_point = config['exchange_point']
        stream_id = config['stream_id']

        sub = StandaloneStreamSubscriber(exchange_point, callback)
        sub.start()
        self._data_subscribers.append(sub)

        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        sub_id = pubsub_client.create_subscription(name=exchange_point,
                                                   stream_ids=[stream_id])
        pubsub_client.activate_subscription(sub_id)
        sub.subscription_id = sub_id  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
Exemplo n.º 10
0
    def on_start(self):
        super(CacheLauncher, self).on_start()
        tms_cli = TransformManagementServiceClient()
        pubsub_cli = PubsubManagementServiceClient()
        pd_cli = ProcessDispatcherServiceClient()
        dname = CACHE_DATASTORE_NAME
        number_of_workers = self.CFG.get_safe('process.number_of_workers', 2)

        proc_def = ProcessDefinition(
            name='last_update_worker_process',
            description=
            'Worker process for caching the last update from a stream')
        proc_def.executable['module'] = 'ion.processes.data.last_update_cache'
        proc_def.executable['class'] = 'LastUpdateCache'
        proc_def_id = pd_cli.create_process_definition(
            process_definition=proc_def)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([get_sys_name(), xp_base])
        except ValueError:
            raise StandardError(
                'Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure'
                % xs_dot_xp)

        subscription_id = pubsub_cli.create_subscription(
            query=ExchangeQuery(), exchange_name='last_update_cache')

        config = {
            'couch_storage': {
                'datastore_name': dname,
                'datastore_profile': 'SCIDATA'
            }
        }

        for i in xrange(number_of_workers):

            transform_id = tms_cli.create_transform(
                name='last_update_cache%d' % i,
                description=
                'last_update that compiles an aggregate of metadata',
                in_subscription_id=subscription_id,
                process_definition_id=proc_def_id,
                configuration=config)

        tms_cli.activate_transform(transform_id=transform_id)
Exemplo n.º 11
0
    def _start_data_subscribers(self, count):
        """
        """        
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
                
        # Create streams and subscriptions for each stream named in driver.
        self._data_subscribers = []
        self._samples_received = []
        #self._async_data_result = AsyncResult()

        strXterm = "xterm -T InstrumentScienceData -sb -rightbar "
        pOpenString = strXterm + " -e tail -f " + PIPE_PATH
        subprocess.Popen(['xterm', '-T', 'InstrumentScienceData', '-e', 'tail', '-f', PIPE_PATH])        
        #subprocess.Popen(pOpenString)
                
        #self.pipeData = open(PIPE_PATH, "w", 1) 

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            print 'Received message on ' + str(stream_id) + ' (' + str(stream_route.exchange_point) + ',' + str(stream_route.routing_key) + ')'
            log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key)
             
            self.pipeData = open(PIPE_PATH, "w", 1) 
            self.pipeData.write(str(message))
            self.pipeData.flush()
            self.pipeData.close()      

            self._samples_received.append(message)
            #if len(self._samples_received) == count:
                #self._async_data_result.set()

        for (stream_name, stream_config) in self._stream_config.iteritems():
            
            stream_id = stream_config['stream_id']
            
            # Create subscriptions for each stream.

            exchange_name = '%s_queue' % stream_name
            self._purge_queue(exchange_name)
            sub = StandaloneStreamSubscriber(exchange_name, recv_data)
            sub.start()
            self._data_subscribers.append(sub)
            print 'stream_id: %s' % stream_id
            sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id])
            pubsub_client.activate_subscription(sub_id)
            sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
    def _start_data_subscribers(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume_data(message, headers):
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            if self._no_samples and self._no_samples == len(
                    self._samples_received):
                self._async_data_result.set()

        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(
            process=self.container, container=self.container)

        # Create streams and subscriptions for each stream named in driver.
        self._stream_config = {}
        self._data_subscribers = []
        for stream_name in PACKET_CONFIG:
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = pubsub_client.create_stream_definition(
                container=stream_def)
            stream_id = pubsub_client.create_stream(
                name=stream_name,
                stream_definition_id=stream_def_id,
                original=True,
                encoding='ION R2')

            taxy = get_taxonomy(stream_name)
            stream_config = dict(id=stream_id, taxonomy=taxy.dump())
            self._stream_config[stream_name] = stream_config

            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(
                exchange_name=exchange_name, callback=consume_data)
            self._listen(sub)
            self._data_subscribers.append(sub)
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = pubsub_client.create_subscription(
                query=query,
                exchange_name=exchange_name,
                exchange_point='science_data')
            pubsub_client.activate_subscription(sub_id)
    def start_data_subscribers(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self.data_subscribers = {}
        self.samples_received = {}

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            log.info('Received message on %s (%s,%s)', stream_id,
                     stream_route.exchange_point, stream_route.routing_key)

            name = None

            for (stream_name, stream_config) in self.stream_config.iteritems():
                if stream_route.routing_key == stream_config['routing_key']:
                    log.debug("NAME: %s %s == %s" %
                              (stream_name, stream_route.routing_key,
                               stream_config['routing_key']))
                    name = stream_name
                    if (not self.samples_received.get(stream_name)):
                        self.samples_received[stream_name] = []
                    self.samples_received[stream_name].append(message)
                    log.debug("Add message to stream '%s' value: %s" %
                              (stream_name, message))

        for (stream_name, stream_config) in self.stream_config.iteritems():
            stream_id = stream_config['stream_id']

            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            self._purge_queue(exchange_name)
            sub = StandaloneStreamSubscriber(exchange_name, recv_data)
            sub.start()
            self.data_subscribers[stream_name] = sub
            sub_id = pubsub_client.create_subscription(name=exchange_name,
                                                       stream_ids=[stream_id])
            pubsub_client.activate_subscription(sub_id)
            sub.subscription_id = sub_id  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
    def start_data_subscribers(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self.data_subscribers = {}
        self.samples_received = {}

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            log.info("Received message on %s (%s,%s)", stream_id, stream_route.exchange_point, stream_route.routing_key)

            name = None

            for (stream_name, stream_config) in self.stream_config.iteritems():
                if stream_route.routing_key == stream_config["routing_key"]:
                    log.debug(
                        "NAME: %s %s == %s" % (stream_name, stream_route.routing_key, stream_config["routing_key"])
                    )
                    name = stream_name
                    if not self.samples_received.get(stream_name):
                        self.samples_received[stream_name] = []
                    self.samples_received[stream_name].append(message)
                    log.debug("Add message to stream '%s' value: %s" % (stream_name, message))

        for (stream_name, stream_config) in self.stream_config.iteritems():
            stream_id = stream_config["stream_id"]

            # Create subscriptions for each stream.
            exchange_name = "%s_queue" % stream_name
            self._purge_queue(exchange_name)
            sub = StandaloneStreamSubscriber(exchange_name, recv_data)
            sub.start()
            self.data_subscribers[stream_name] = sub
            sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id])
            pubsub_client.activate_subscription(sub_id)
            sub.subscription_id = (
                sub_id
            )  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
Exemplo n.º 15
0
    def on_start(self):
        super(CacheLauncher,self).on_start()
        tms_cli = TransformManagementServiceClient()
        pubsub_cli = PubsubManagementServiceClient()
        pd_cli = ProcessDispatcherServiceClient()
        dname = CACHE_DATASTORE_NAME
        number_of_workers = self.CFG.get_safe('process.number_of_workers', 2)

        proc_def = ProcessDefinition(name='last_update_worker_process',description='Worker process for caching the last update from a stream')
        proc_def.executable['module'] = 'ion.processes.data.last_update_cache'
        proc_def.executable['class'] = 'LastUpdateCache'
        proc_def_id = pd_cli.create_process_definition(process_definition=proc_def)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([get_sys_name(), xp_base])
        except ValueError:
            raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp)

        subscription_id = pubsub_cli.create_subscription(query=ExchangeQuery(), exchange_name='last_update_cache')

        config = {
            'couch_storage' : {
                'datastore_name' : dname,
                'datastore_profile' : 'SCIDATA'
            }
        }

        for i in xrange(number_of_workers):

            transform_id = tms_cli.create_transform(
                name='last_update_cache%d' % i,
                description='last_update that compiles an aggregate of metadata',
                in_subscription_id=subscription_id,
                process_definition_id=proc_def_id,
                configuration=config
            )


        tms_cli.activate_transform(transform_id=transform_id)
Exemplo n.º 16
0
    def _start_data_subscribers(self, count):
        """
        """        
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
                
        # Create streams and subscriptions for each stream named in driver.
        self._data_subscribers = []
        self._samples_received = []
        self._async_sample_result = AsyncResult()

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key)
            #print '######################## stream message:'
            #print str(message)
            self._samples_received.append(message)
            if len(self._samples_received) == count:
                self._async_sample_result.set()

        for (stream_name, stream_config) in self._stream_config.iteritems():
            
            if stream_name == 'parsed':
                
                # Create subscription for parsed stream only.

                stream_id = stream_config['stream_id']
                
                from pyon.util.containers import create_unique_identifier
                # exchange_name = '%s_queue' % stream_name
                exchange_name = create_unique_identifier("%s_queue" %
                        stream_name)
                self._purge_queue(exchange_name)
                sub = StandaloneStreamSubscriber(exchange_name, recv_data)
                sub.start()
                self._data_subscribers.append(sub)
                print 'stream_id: %s' % stream_id
                sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id])
                pubsub_client.activate_subscription(sub_id)
                sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
Exemplo n.º 17
0
    def _start_data_subscribers(self, count):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)

        # Create streams and subscriptions for each stream named in driver.
        self._data_subscribers = []
        self._samples_received = []
        self._async_sample_result = AsyncResult()

        # A callback for processing subscribed-to data.
        def recv_data(message, stream_route, stream_id):
            log.info("Received message on %s (%s,%s)", stream_id, stream_route.exchange_point, stream_route.routing_key)
            self._samples_received.append(message)
            if len(self._samples_received) == count:
                self._async_sample_result.set()

        for (stream_name, stream_config) in self._stream_config.iteritems():

            stream_id = stream_config["stream_id"]

            # Create subscriptions for each stream.

            from pyon.util.containers import create_unique_identifier

            # exchange_name = '%s_queue' % stream_name
            exchange_name = create_unique_identifier("%s_queue" % stream_name)
            self._purge_queue(exchange_name)
            sub = StandaloneStreamSubscriber(exchange_name, recv_data)
            sub.start()
            self._data_subscribers.append(sub)
            print "stream_id: %s" % stream_id
            sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id])
            pubsub_client.activate_subscription(sub_id)
            sub.subscription_id = (
                sub_id
            )  # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
Exemplo n.º 18
0
    def run_external_transform(self):
        '''
        This example script illustrates how a transform can interact with the an outside process (very basic)
        it launches an external_transform example which uses the operating system command 'bc' to add 1 to the input

        Producer -> A -> 'FS.TEMP/transform_output'
        A is an external transform that spawns an OS process to increment the input by 1
        '''
        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        #-------------------------------
        # Process Definition
        #-------------------------------
        process_definition = ProcessDefinition(
            name='external_transform_definition')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.transform_example'
        process_definition.executable['class'] = 'ExternalTransform'
        process_definition_id = procd_cli.create_process_definition(
            process_definition=process_definition)

        #-------------------------------
        # Streams
        #-------------------------------

        input_stream_id = pubsub_cli.create_stream(name='input_stream',
                                                   original=True)

        #-------------------------------
        # Subscription
        #-------------------------------

        query = StreamQuery(stream_ids=[input_stream_id])
        input_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='input_queue')

        #-------------------------------
        # Launch Transform
        #-------------------------------

        transform_id = tms_cli.create_transform(
            name='external_transform',
            in_subscription_id=input_subscription_id,
            process_definition_id=process_definition_id,
            configuration={})
        tms_cli.activate_transform(transform_id)

        #-------------------------------
        # Launch Producer
        #-------------------------------

        id_p = self.container.spawn_process(
            'myproducer', 'ion.processes.data.transforms.transform_example',
            'TransformExampleProducer', {
                'process': {
                    'type': 'stream_process',
                    'publish_streams': {
                        'out_stream': input_stream_id
                    }
                },
                'stream_producer': {
                    'interval': 4000
                }
            })
        self.container.proc_manager.procs[id_p].start()
Exemplo n.º 19
0
class TestCTDPChain(IonIntegrationTestCase):
    def setUp(self):
        super(TestCTDPChain, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0
        self.cnt = 0

        # Cleanup of queue created by the subscriber
        self.queue_cleanup = []
        self.data_process_cleanup = []

    def _get_new_ctd_L0_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def clean_queues(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()

    def cleaning_operations(self):
        for dproc_id in self.data_process_cleanup:
            self.data_process_management.delete_data_process(dproc_id)

    def test_ctdp_chain(self):
        """
        Test that packets are processed by a chain of CTDP transforms: L0, L1 and L2
        """

        #-------------------------------------------------------------------------------------
        # Prepare the stream def to be used for transform chain
        #-------------------------------------------------------------------------------------

        #todo Check whether the right parameter dictionary is being used
        self._prepare_stream_def_for_transform_chain()

        #-------------------------------------------------------------------------------------
        # Prepare the data proc defs and in and out data products for the transforms
        #-------------------------------------------------------------------------------------

        # list_args_L0 = [data_proc_def_id, input_dpod_id, output_dpod_id]
        list_args_L0 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L0')

        list_args_L1 = self._prepare_things_you_need_to_launch_transform(name_of_transform='L1')

        list_args_L2_density = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_density')

        list_args_L2_salinity = self._prepare_things_you_need_to_launch_transform(name_of_transform='L2_salinity')

        log.debug("Got the following args: L0 = %s, L1 = %s, L2 density = %s, L2 salinity = %s", list_args_L0, list_args_L1, list_args_L2_density, list_args_L2_salinity )

        #-------------------------------------------------------------------------------------
        # Launch the CTDP transforms
        #-------------------------------------------------------------------------------------

        L0_data_proc_id = self._launch_transform('L0', *list_args_L0)

        L1_data_proc_id = self._launch_transform('L1', *list_args_L1)

        L2_density_data_proc_id = self._launch_transform('L2_density', *list_args_L2_density)

        L2_salinity_data_proc_id = self._launch_transform('L2_salinity', *list_args_L2_salinity)

        log.debug("Launched the transforms: L0 = %s, L1 = %s", L0_data_proc_id, L1_data_proc_id)

        #-------------------------------------------------------------------------
        # Start a subscriber listening to the output of each of the transforms
        #-------------------------------------------------------------------------

        ar_L0 = self.start_subscriber_listening_to_L0_transform(out_data_prod_id=list_args_L0[2])
        ar_L1 = self.start_subscriber_listening_to_L1_transform(out_data_prod_id=list_args_L1[2])

        ar_L2_density = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_density[2])
        ar_L2_salinity = self.start_subscriber_listening_to_L2_density_transform(out_data_prod_id=list_args_L2_salinity[2])


        #-------------------------------------------------------------------
        # Publish the parsed packets that the L0 transform is listening for
        #-------------------------------------------------------------------

        stream_id, stream_route = self.get_stream_and_route_for_data_prod(data_prod_id= list_args_L0[1])
        self._publish_for_L0_transform(stream_id, stream_route)

        #-------------------------------------------------------------------
        # Check the granules being outputted by the transforms
        #-------------------------------------------------------------------
        self._check_granule_from_L0_transform(ar_L0)
        self._check_granule_from_L1_transform(ar_L1)
        self._check_granule_from_L2_density_transform(ar_L2_density)
        self._check_granule_from_L2_salinity_transform(ar_L2_salinity)


    def _prepare_stream_def_for_transform_chain(self):

        # Get the stream definition for the stream using the parameter dictionary
#        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(parameter_dict_name, id_only=True)
        pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_for_L0')
        self.in_stream_def_id_for_L0 = self.pubsub.create_stream_definition(name='stream_def_for_L0', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, self.in_stream_def_id_for_L0)

        pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'params_for_other_transforms')
        self.stream_def_id = self.pubsub.create_stream_definition(name='stream_def_for_CTDBP_transforms', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, self.stream_def_id)

        log.debug("Got the parsed parameter dictionary: id: %s", pdict_id)
        log.debug("Got the stream def for parsed input to L0: %s", self.in_stream_def_id_for_L0)
        log.debug("Got the stream def for other other streams: %s", self.stream_def_id)

    def _prepare_things_you_need_to_launch_transform(self, name_of_transform = ''):

        module, class_name = self._get_class_module(name_of_transform)

        #-------------------------------------------------------------------------
        # Data Process Definition
        #-------------------------------------------------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name= 'CTDBP_%s_Transform' % name_of_transform,
            description= 'Data Process Definition for the CTDBP %s transform.' % name_of_transform,
            module= module,
            class_name=class_name)

        data_proc_def_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, data_proc_def_id)
        log.debug("created data process definition: id = %s", data_proc_def_id)

        #-------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects for the data product objects
        #-------------------------------------------------------------------------

        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        #-------------------------------------------------------------------------
        # Get the names of the input and output data products
        #-------------------------------------------------------------------------

        input_dpod_id = ''
        output_dpod_id = ''

        if name_of_transform == 'L0':

            input_dpod_id = self._create_input_data_product('parsed', tdom, sdom)
            output_dpod_id = self._create_output_data_product('L0', tdom, sdom)

            self.in_prod_for_L1 = output_dpod_id

        elif name_of_transform == 'L1':

            input_dpod_id = self.in_prod_for_L1
            output_dpod_id = self._create_output_data_product('L1', tdom, sdom)

            self.in_prod_for_L2 = output_dpod_id

        elif name_of_transform == 'L2_density':

            input_dpod_id = self.in_prod_for_L2
            output_dpod_id = self._create_output_data_product('L2_density', tdom, sdom)

        elif name_of_transform == 'L2_salinity':

            input_dpod_id = self.in_prod_for_L2
            output_dpod_id = self._create_output_data_product('L2_salinity', tdom, sdom)

        else:
            self.fail("something bad happened")

        return [data_proc_def_id, input_dpod_id, output_dpod_id]

    def _get_class_module(self, name_of_transform):

        options = {'L0' : self._class_module_L0,
                   'L1' : self._class_module_L1,
                   'L2_density' : self._class_module_L2_density,
                   'L2_salinity' : self._class_module_L2_salinity}

        return options[name_of_transform]()

    def _class_module_L0(self):
        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L0'
        class_name = 'CTDBP_L0_all'
        return module, class_name

    def _class_module_L1(self):

        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L1'
        class_name = 'CTDBP_L1_Transform'
        return module, class_name

    def _class_module_L2_density(self):

        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_density'
        class_name = 'CTDBP_DensityTransform'

        return module, class_name

    def _class_module_L2_salinity(self):

        module = 'ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity'
        class_name = 'CTDBP_SalinityTransform'

        return module, class_name

    def _create_input_data_product(self, name_of_transform = '', tdom = None, sdom = None):

        dpod_obj = IonObject(RT.DataProduct,
            name='dprod_%s' % name_of_transform,
            description='for_%s' % name_of_transform,
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("the stream def id: %s", self.stream_def_id)

        if name_of_transform == 'L0':
            stream_def_id = self.in_stream_def_id_for_L0
        else:
            stream_def_id = self.stream_def_id


        dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj,
            stream_definition_id= stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id)

        log.debug("got the data product out. id: %s", dpod_id)

        return dpod_id

    def _create_output_data_product(self, name_of_transform = '', tdom = None, sdom = None):

        dpod_obj = IonObject(RT.DataProduct,
            name='dprod_%s' % name_of_transform,
            description='for_%s' % name_of_transform,
            temporal_domain = tdom,
            spatial_domain = sdom)

        if name_of_transform == 'L0':
            stream_def_id = self.in_stream_def_id_for_L0
        else:
            stream_def_id = self.stream_def_id

        dpod_id = self.dataproduct_management.create_data_product(data_product=dpod_obj,
            stream_definition_id=stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, dpod_id)

        return dpod_id

    def _launch_transform(self, name_of_transform = '', data_proc_def_id = None, input_dpod_id = None, output_dpod_id = None):

        # We need the key name here to be "L2_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L2_stream when the config is used to launch the data process

        if name_of_transform in ['L0', 'L1']:
            binding = '%s_stream' % name_of_transform
        elif name_of_transform == 'L2_salinity':
            binding = 'salinity'
        elif name_of_transform == 'L2_density':
            binding = 'density'

        config = None
        if name_of_transform == 'L1':
            config = self._create_calibration_coefficients_dict()
        elif name_of_transform == 'L2_density':
            config = DotDict()
            config.process = {'lat' : 32.7153, 'lon' : 117.1564}

        log.debug("launching transform for name: %s",name_of_transform )
        log.debug("launching transform for data_proc_def_id: %s\ninput_dpod_id: %s\noutput_dpod_id: %s", data_proc_def_id, input_dpod_id, output_dpod_id )

        data_proc_id = self.data_process_management.create_data_process(
                                                        data_process_definition_id = data_proc_def_id,
                                                        in_data_product_ids= [input_dpod_id],
                                                        out_data_product_ids = [output_dpod_id],
                                                        configuration = config)

        self.addCleanup(self.data_process_management.delete_data_process, data_proc_id)

        self.data_process_management.activate_data_process(data_proc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, data_proc_id)

        log.debug("Created a data process for ctdbp %s transform: id = %s", name_of_transform, data_proc_id)

        return data_proc_id

    def get_stream_and_route_for_data_prod(self, data_prod_id = ''):

        stream_ids, _ = self.resource_registry.find_objects(data_prod_id, PRED.hasStream, RT.Stream, True)
        stream_id = stream_ids[0]
        input_stream = self.resource_registry.read(stream_id)
        stream_route = input_stream.stream_route

        return stream_id, stream_route

    def start_subscriber_listening_to_L0_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L0 = self._start_subscriber_to_transform( name_of_transform = 'L0',stream_id=output_stream_id_of_transform)

        return ar_L0


    def start_subscriber_listening_to_L1_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L1 = self._start_subscriber_to_transform( name_of_transform = 'L1',stream_id=output_stream_id_of_transform)

        return ar_L1


    def start_subscriber_listening_to_L2_density_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_density', stream_id=output_stream_id_of_transform)

        return ar_L2_density

    def start_subscriber_listening_to_L2_salinity_transform(self, out_data_prod_id = ''):

        #----------- Create subscribers to listen to the two transforms --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(out_data_prod_id, PRED.hasStream, RT.Stream, True)
        output_stream_id_of_transform = stream_ids[0]

        ar_L2_density = self._start_subscriber_to_transform( name_of_transform = 'L2_salinity',stream_id=output_stream_id_of_transform)

        return ar_L2_density

    def _start_subscriber_to_transform(self,  name_of_transform = '', stream_id = ''):

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub_%s' % name_of_transform, callback=subscriber)

        # Note that this running the below line creates an exchange since none of that name exists before
        sub_id = self.pubsub.create_subscription('subscriber_to_transform_%s' % name_of_transform,
            stream_ids=[stream_id],
            exchange_name='sub_%s' % name_of_transform)
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        return ar

    def _check_granule_from_L0_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L0 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L0_algorithm(granule_from_transform)


    def _check_granule_from_L1_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L1 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L1_algorithm(granule_from_transform)

    def _check_granule_from_L2_density_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L2 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L2_density_algorithm(granule_from_transform)

    def _check_granule_from_L2_salinity_transform(self, ar = None):

        granule_from_transform = ar.get(timeout=20)
        log.debug("Got the following granule from the L2 transform: %s", granule_from_transform)

        # Check the algorithm being applied
        self._check_application_of_L2_salinity_algorithm(granule_from_transform)


    def _check_application_of_L0_algorithm(self, granule = None):
        """ Check the algorithm applied by the L0 transform """

        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = ['time', 'pressure', 'conductivity', 'temperature']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _check_application_of_L1_algorithm(self, granule = None):
        """ Check the algorithm applied by the L1 transform """
        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = [ 'time', 'pressure', 'conductivity', 'temp']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _check_application_of_L2_density_algorithm(self, granule = None):
        """ Check the algorithm applied by the L2 transform """
        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = ['time', 'density']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _check_application_of_L2_salinity_algorithm(self, granule = None):
        """ Check the algorithm applied by the L2 transform """
        rdt = RecordDictionaryTool.load_from_granule(granule)

        list_of_expected_keys = ['time', 'salinity']

        for key in list_of_expected_keys:
            self.assertIn(key, rdt)

    def _publish_for_L0_transform(self, input_stream_id = None, stream_route = None):

        #----------- Publish on that stream so that the transform can receive it --------------------------------
        self._publish_to_transform(input_stream_id, stream_route )

    def _publish_to_transform(self, stream_id = '', stream_route = None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)
        publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5)
        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)


    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = 'Siemens_per_meter'
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = 'Pascal'
        pdict.add_context(pres_ctxt)

        if parameter_dict_name == 'input_param_for_L0':
            temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        else:
            temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32')))

        temp_ctxt.uom = 'degree_kelvin'
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = 'g/m'
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = 'PSU'
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            if parameter_dict_name == 'input_param_for_L0':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)
            elif pc[1].name == 'temp':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id



    def _create_calibration_coefficients_dict(self):
        config = DotDict()
        config.process.calibration_coeffs = {
                  'temp_calibration_coeffs': {
                      'TA0' : 1.561342e-03,
                      'TA1' : 2.561486e-04,
                      'TA2' : 1.896537e-07,
                      'TA3' : 1.301189e-07,
                      'TOFFSET' : 0.000000e+00
                  },

                  'cond_calibration_coeffs':  {
                      'G' : -9.896568e-01,
                      'H' : 1.316599e-01,
                      'I' : -2.213854e-04,
                      'J' : 3.292199e-05,
                      'CPCOR' : -9.570000e-08,
                      'CTCOR' : 3.250000e-06,
                      'CSLOPE' : 1.000000e+00
                  },

                  'pres_calibration_coeffs' : {
                      'PA0' : 4.960417e-02,
                      'PA1' : 4.883682e-04,
                      'PA2' : -5.687309e-12,
                      'PTCA0' : 5.249802e+05,
                      'PTCA1' : 7.595719e+00,
                      'PTCA2' : -1.322776e-01,
                      'PTCB0' : 2.503125e+01,
                      'PTCB1' : 5.000000e-05,
                      'PTCB2' : 0.000000e+00,
                      'PTEMPA0' : -6.431504e+01,
                      'PTEMPA1' : 5.168177e+01,
                      'PTEMPA2' : -2.847757e-01,
                      'POFFSET' : 0.000000e+00
                  }

              }


        return config
class TestInstrumentAgent(IonIntegrationTestCase):
    """
    Test cases for instrument agent class. Functions in this class provide
    instrument agent integration tests and provide a tutorial on use of
    the agent setup and interface.
    """
 
    def customCleanUp(self):
        log.info('CUSTOM CLEAN UP ******************************************************************************')

    def setUp(self):
        """
        Setup the test environment to exersice use of instrumet agent, including:
        * define driver_config parameters.
        * create container with required services and container client.
        * create publication stream ids for each driver data stream.
        * create stream_config parameters.
        * create and activate subscriptions for agent data streams.
        * spawn instrument agent process and create agent client.
        * add cleanup functions to cause subscribers to get stopped.
        """


 #       params = { ('CTD', 'TA2'): -1.9434316e-05,
 #       ('CTD', 'PTCA1'): 1.3206866,
 #       ('CTD', 'TCALDATE'): [8, 11, 2006] }

 #       for tup in params:
 #           print tup




        self.addCleanup(self.customCleanUp)
        # Names of agent data streams to be configured.
        parsed_stream_name = 'ctd_parsed'        
        raw_stream_name = 'ctd_raw'        

        # Driver configuration.
        #Simulator

        self.driver_config = {
            'svr_addr': 'localhost',
            'cmd_port': 5556,
            'evt_port': 5557,
            'dvr_mod': 'ion.services.mi.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'comms_config': {
                SBE37Channel.CTD: {
                    'method':'ethernet',
                    'device_addr': CFG.device.sbe37.host,
                    'device_port': CFG.device.sbe37.port,
                    'server_addr': 'localhost',
                    'server_port': 8888
                }                
            }
        }

        #Hardware

        '''
        self.driver_config = {
            'svr_addr': 'localhost',
            'cmd_port': 5556,
            'evt_port': 5557,
            'dvr_mod': 'ion.services.mi.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'comms_config': {
                SBE37Channel.CTD: {
                    'method':'ethernet',
                    'device_addr': '137.110.112.119',
                    'device_port': 4001,
                    'server_addr': 'localhost',
                    'server_port': 8888
                }
            }
        }
        '''

        # Start container.
        self._start_container()

        # Establish endpoint with container (used in tests below)
        self._container_client = ContainerAgentClient(node=self.container.node,
                                                      name=self.container.name)
        
        # Bring up services in a deploy file (no need to message)
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        # Create a pubsub client to create streams.
        self._pubsub_client = PubsubManagementServiceClient(
                                                    node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume(message, headers):
            log.info('Subscriber received message: %s', str(message))

        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(process=self.container,
                                                         container=self.container)

        self.subs = []

        # Create streams for each stream named in driver.
        self.stream_config = {}
        for (stream_name, val) in PACKET_CONFIG.iteritems():
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = self._pubsub_client.create_stream_definition(
                                                    container=stream_def)        
            stream_id = self._pubsub_client.create_stream(
                        name=stream_name,
                        stream_definition_id=stream_def_id,
                        original=True,
                        encoding='ION R2')
            self.stream_config[stream_name] = stream_id
            
            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume)
            sub.start()
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = self._pubsub_client.create_subscription(\
                                query=query, exchange_name=exchange_name)
            self._pubsub_client.activate_subscription(sub_id)
            self.subs.append(sub)
            
        # Add cleanup function to stop subscribers.        
        def stop_subscriber(sub_list):
            for sub in sub_list:
                sub.stop()            
        self.addCleanup(stop_subscriber, self.subs)            
            

        # Create agent config.

        self.agent_resource_id = '123xyz'

        self.agent_config = {
            'driver_config' : self.driver_config,
            'stream_config' : self.stream_config,
            'agent'         : {'resource_id': self.agent_resource_id}
        }

        # Launch an instrument agent process.
        self._ia_name = 'agent007'
        self._ia_mod = 'ion.services.mi.instrument_agent'
        self._ia_class = 'InstrumentAgent'
        self._ia_pid = self._container_client.spawn_process(name=self._ia_name,
                                       module=self._ia_mod, cls=self._ia_class,
                                       config=self.agent_config)


        log.info('got pid=%s', str(self._ia_pid))


        self._ia_client = None
        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(self.agent_resource_id, process=FakeProcess())
        log.info('got ia client %s', str(self._ia_client))



    def test_initialize(self):
        """
        Test agent initialize command. This causes creation of
        driver process and transition to inactive.
        """


        cmd = AgentCommand(command='initialize')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        log.info(reply)

        time.sleep(2)

        caps = gw_agent_get_capabilities(self.agent_resource_id)
        log.info('Capabilities: %s',str(caps))

        time.sleep(2)

        cmd = AgentCommand(command='reset')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        log.info(reply)


    def test_direct_access(self):
        """
        Test agent direct_access command. This causes creation of
        driver process and transition to direct access.
        """
        print("test initing")
        cmd = AgentCommand(command='initialize')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test go_active")
        cmd = AgentCommand(command='go_active')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test run")
        cmd = AgentCommand(command='run')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test go_da")
        cmd = AgentCommand(command='go_direct_access')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        print("reply=" + str(reply))
        time.sleep(2)

        print("test go_ob")
        cmd = AgentCommand(command='go_observatory')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test go_inactive")
        cmd = AgentCommand(command='go_inactive')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test reset")
        cmd = AgentCommand(command='reset')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

    def test_go_active(self):
        """
        Test agent go_active command. This causes a driver process to
        launch a connection broker, connect to device hardware, determine
        entry state of driver and initialize driver parameters.
        """
        cmd = AgentCommand(command='initialize')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        cmd = AgentCommand(command='go_active')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        cmd = AgentCommand(command='go_inactive')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        cmd = AgentCommand(command='reset')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)
Exemplo n.º 21
0
    def run_basic_transform(self):
        ''' Runs a basic example of a transform. It chains two transforms together, each add 1 to their input

        Producer -> A -> B
        Producer generates a number every four seconds and publishes it on the 'ctd_output_stream'
          the producer is acting as a CTD or instrument in this example.
        A is a basic transform that increments its input and publishes it on the 'transform_output' stream.
        B is a basic transform that receives input.
        All transforms write logging data to 'FS.TEMP/transform_output' so you can visually see activity of the transforms
        '''

        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        #-------------------------------
        # Process Definition
        #-------------------------------

        process_definition = IonObject(RT.ProcessDefinition,
                                       name='transform_process_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class': 'TransformExample'
        }
        process_definition_id = procd_cli.create_process_definition(
            process_definition)

        #-------------------------------
        # First Transform
        #-------------------------------

        # Create a dummy output stream from a 'ctd' instrument
        ctd_output_stream_id = pubsub_cli.create_stream(
            name='ctd_output_stream', original=True)

        # Create the subscription to the ctd_output_stream
        query = StreamQuery(stream_ids=[ctd_output_stream_id])
        ctd_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='ctd_output')

        # Create an output stream for the transform
        transform_output_stream_id = pubsub_cli.create_stream(
            name='transform_output', original=True)

        configuration = {}

        # Launch the first transform process
        transform_id = tms_cli.create_transform(
            name='basic_transform',
            in_subscription_id=ctd_subscription_id,
            out_streams={'output': transform_output_stream_id},
            process_definition_id=process_definition_id,
            configuration=configuration)
        tms_cli.activate_transform(transform_id)

        #-------------------------------
        # Second Transform
        #-------------------------------

        # Create a SUBSCRIPTION to this output stream for the second transform
        query = StreamQuery(stream_ids=[transform_output_stream_id])
        second_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='final_output')

        # Create a final output stream
        final_output_id = pubsub_cli.create_stream(name='final_output',
                                                   original=True)

        configuration = {}

        second_transform_id = tms_cli.create_transform(
            name='second_transform',
            in_subscription_id=second_subscription_id,
            out_streams={'output': final_output_id},
            process_definition_id=process_definition_id,
            configuration=configuration)
        tms_cli.activate_transform(second_transform_id)

        #-------------------------------
        # Producer (Sample Input)
        #-------------------------------

        # Create a producing example process
        id_p = self.container.spawn_process(
            'myproducer', 'ion.processes.data.transforms.transform_example',
            'TransformExampleProducer', {
                'process': {
                    'type': 'stream_process',
                    'publish_streams': {
                        'out_stream': ctd_output_stream_id
                    }
                },
                'stream_producer': {
                    'interval': 4000
                }
            })
        self.container.proc_manager.procs[id_p].start()
Exemplo n.º 22
0
class LastUpdateCacheTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.datastore_name = CACHE_DATASTORE_NAME
        self.container.start_rel_from_url('res/deploy/r2dm.yml')
        self.db = self.container.datastore_manager.get_datastore(self.datastore_name,DataStore.DS_PROFILE.SCIDATA)
        self.tms_cli = TransformManagementServiceClient()
        self.pubsub_cli = PubsubManagementServiceClient()
        self.pd_cli = ProcessDispatcherServiceClient()
        self.rr_cli = ResourceRegistryServiceClient()
        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([bootstrap.get_sys_name(), xp_base])
        except ValueError:
            raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp)


    def make_points(self,definition,stream_id='I am very special', N=100):
        from prototype.sci_data.constructor_apis import PointSupplementConstructor
        import numpy as np
        import random


        definition.stream_resource_id = stream_id


        total = N
        n = 10 # at most n records per granule
        i = 0

        while i < total:
            r = random.randint(1,n)

            psc = PointSupplementConstructor(point_definition=definition, stream_id=stream_id)
            for x in xrange(r):
                i+=1
                point_id = psc.add_point(time=i, location=(0,0,0))
                psc.add_scalar_point_coverage(point_id=point_id, coverage_id='temperature', value=np.random.normal(loc=48.0,scale=4.0, size=1)[0])
                psc.add_scalar_point_coverage(point_id=point_id, coverage_id='pressure', value=np.float32(1.0))
                psc.add_scalar_point_coverage(point_id=point_id, coverage_id='conductivity', value=np.float32(2.0))
            granule = psc.close_stream_granule()
            hdf_string = granule.identifiables[definition.data_stream_id].values
            granule.identifiables[definition.data_stream_id].values = hdf_string
            yield granule
        return

    def start_worker(self):


        proc_def = ProcessDefinition()
        proc_def.executable['module'] = 'ion.processes.data.last_update_cache'
        proc_def.executable['class'] = 'LastUpdateCache'
        proc_def_id = self.pd_cli.create_process_definition(process_definition=proc_def)


        subscription_id = self.pubsub_cli.create_subscription(query=ExchangeQuery(), exchange_name='ingestion_aggregate')

        config = {
            'couch_storage' : {
                'datastore_name' :self.datastore_name,
                'datastore_profile' : 'SCIDATA'
            }
        }

        transform_id = self.tms_cli.create_transform(
            name='last_update_cache',
            description='LastUpdate that compiles an aggregate of metadata',
            in_subscription_id=subscription_id,
            process_definition_id=proc_def_id,
            configuration=config
        )


        self.tms_cli.activate_transform(transform_id=transform_id)
        transform = self.rr_cli.read(transform_id)
        pid = transform.process_id
        handle = self.container.proc_manager.procs[pid]
        return handle

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_last_update_cache(self):
        handle = self.start_worker()
        queue = Queue()
        o_process = handle.process
        def new_process(msg):
            o_process(msg)
            queue.put(True)
        handle.process = new_process



        definition = SBE37_CDM_stream_definition()
        publisher = Publisher()

        stream_def_id = self.pubsub_cli.create_stream_definition(container=definition)
        stream_id = self.pubsub_cli.create_stream(stream_definition_id=stream_def_id)

        time = float(0.0)

        for granule in self.make_points(definition=definition, stream_id=stream_id, N=10):

            publisher.publish(granule, to_name=(self.XP, stream_id+'.data'))
            # Determinism sucks
            try:
                queue.get(timeout=5)
            except Empty:
                self.assertTrue(False, 'Process never received the message.')

            doc = self.db.read(stream_id)
            ntime = doc.variables['time'].value
            self.assertTrue(ntime >= time, 'The documents did not sequentially get updated correctly.')
            time = ntime
class TestDataProcessManagementPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dataset_management      = DatasetManagementServiceClient()
        self.resource_registry       = self.container.resource_registry
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self.validators = 0


    def lc_preload(self):
        config = DotDict()
        config.op = 'load'
        config.scenario = 'BASE,LC_TEST'
        config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary'
        config.path = 'res/preload/r2_ioc'
        
        self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config)

    def ctd_plain_input_data_product(self):
        available_fields = [
                'internal_timestamp', 
                'temp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'conductivity', 
                'driver_timestamp', 
                'lon', 
                'pressure']
        return self.make_data_product('ctd_parsed_param_dict', 'ctd plain test', available_fields)


    def ctd_plain_salinity(self):
        available_fields = [
                'internal_timestamp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'driver_timestamp', 
                'lon', 
                'salinity']
        return self.make_data_product('ctd_parsed_param_dict', 'salinity', available_fields)

    def ctd_plain_density(self):
        available_fields = [
                'internal_timestamp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'driver_timestamp', 
                'lon', 
                'density']
        return self.make_data_product('ctd_parsed_param_dict', 'density', available_fields)

    def ctd_instrument_data_product(self):
        available_fields = [
                'internal_timestamp', 
                'temp', 
                'preferred_timestamp', 
                'time', 
                'port_timestamp', 
                'quality_flag', 
                'lat', 
                'conductivity', 
                'driver_timestamp', 
                'lon', 
                'pressure']
        return self.make_data_product('ctd_LC_TEST', 'ctd instrument', available_fields)

    def make_data_product(self, pdict_name, dp_name, available_fields=[]):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(pdict_name, id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('%s stream_def' % dp_name, parameter_dictionary_id=pdict_id, available_fields=available_fields or None)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        dp_obj = DataProduct(name=dp_name)
        dp_obj.temporal_domain = tdom
        dp_obj.spatial_domain = sdom
        data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)
        return data_product_id

    def google_dt_data_product(self):
        return self.make_data_product('google_dt', 'visual')

    def ctd_derived_data_product(self):
        return self.make_data_product('ctd_LC_TEST', 'ctd derived products')
        
    def publish_to_plain_data_product(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()
        route = self.pubsub_management.read_stream_route(stream_id)
        stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_definition._id
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        now = time.time()
        ntp_now = now + 2208988800 # Do not use in production, this is a loose translation

        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [20.0]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['time'] = [ntp_now]
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4.2914]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [3.068]

        granule = rdt.to_granule()
        publisher.publish(granule)

    def publish_to_data_product(self, data_product_id):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()
        route = self.pubsub_management.read_stream_route(stream_id)
        stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_definition._id
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        now = time.time()
        ntp_now = now + 2208988800 # Do not use in production, this is a loose translation

        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['time'] = [ntp_now]
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = [None]
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        granule = rdt.to_granule()
        publisher.publish(granule)

    def setup_subscriber(self, data_product_id, callback):
        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        self.assertTrue(len(stream_ids))
        stream_id = stream_ids.pop()

        sub_id = self.pubsub_management.create_subscription('validator_%s'%self.validators, stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)


        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        subscriber = StandaloneStreamSubscriber('validator_%s' % self.validators, callback=callback)
        subscriber.start()
        self.addCleanup(subscriber.stop)
        self.validators+=1

        return subscriber

    def create_density_transform_function(self):
        tf = TransformFunction(name='ctdbp_l2_density', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_density', cls='CTDBP_DensityTransformAlgorithm')
        tf_id = self.data_process_management.create_transform_function(tf)
        self.addCleanup(self.data_process_management.delete_transform_function, tf_id)
        return tf_id

    def create_salinity_transform_function(self):
        tf = TransformFunction(name='ctdbp_l2_salinity', module='ion.processes.data.transforms.ctdbp.ctdbp_L2_salinity', cls='CTDBP_SalinityTransformAlgorithm')
        tf_id = self.data_process_management.create_transform_function(tf)
        self.addCleanup(self.data_process_management.delete_transform_function, tf_id)
        return tf_id

   
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_data_process_prime(self):
        self.lc_preload()
        instrument_data_product_id = self.ctd_instrument_data_product()
        derived_data_product_id = self.ctd_derived_data_product()

        data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[instrument_data_product_id], out_data_product_ids=[derived_data_product_id])
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)
    

        validated = Event()

        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)

            np.testing.assert_array_almost_equal(rdt['conductivity_L1'], np.array([42.914]))
            np.testing.assert_array_almost_equal(rdt['temp_L1'], np.array([20.]))
            np.testing.assert_array_almost_equal(rdt['pressure_L1'], np.array([3.068]))
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.7144739593881]))
            np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.935132729668283]))

            validated.set()

        self.setup_subscriber(derived_data_product_id, callback=validation)
        self.publish_to_data_product(instrument_data_product_id)
        
        self.assertTrue(validated.wait(10))
        
    def test_older_transform(self):
        input_data_product_id = self.ctd_plain_input_data_product()

        conductivity_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'conductivity_product', ['time', 'conductivity'])
        conductivity_stream_def_id = self.get_named_stream_def('conductivity_product stream_def')
        temperature_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'temperature_product', ['time', 'temp'])
        temperature_stream_def_id = self.get_named_stream_def('temperature_product stream_def')
        pressure_data_product_id = self.make_data_product('ctd_parsed_param_dict', 'pressure_product', ['time', 'pressure'])
        pressure_stream_def_id = self.get_named_stream_def('pressure_product stream_def')

        dpd = DataProcessDefinition(name='ctdL0')
        dpd.data_process_type = DataProcessTypeEnum.TRANSFORM
        dpd.module = 'ion.processes.data.transforms.ctd.ctd_L0_all'
        dpd.class_name = 'ctd_L0_all'

        data_process_definition_id = self.data_process_management.create_data_process_definition(dpd)
        self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id)

        self.data_process_management.assign_stream_definition_to_data_process_definition(conductivity_stream_def_id, data_process_definition_id, binding='conductivity')
        self.data_process_management.assign_stream_definition_to_data_process_definition(temperature_stream_def_id, data_process_definition_id, binding='temperature')
        self.data_process_management.assign_stream_definition_to_data_process_definition(pressure_stream_def_id, data_process_definition_id, binding='pressure')

        data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[conductivity_data_product_id, temperature_data_product_id, pressure_data_product_id])
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        conductivity_validated = Event()
        def validate_conductivity(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['conductivity'], np.array([4.2914]))
            conductivity_validated.set()

        self.setup_subscriber(conductivity_data_product_id, callback=validate_conductivity)
        temperature_validated = Event()
        def validate_temperature(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['temp'], np.array([20.0]))
            temperature_validated.set()
        self.setup_subscriber(temperature_data_product_id, callback=validate_temperature)
        pressure_validated = Event()
        def validate_pressure(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['pressure'], np.array([3.068]))
            pressure_validated.set()
        self.setup_subscriber(pressure_data_product_id, callback=validate_pressure)
        self.publish_to_plain_data_product(input_data_product_id)
        self.assertTrue(conductivity_validated.wait(10))
        self.assertTrue(temperature_validated.wait(10))
        self.assertTrue(pressure_validated.wait(10))



    def get_named_stream_def(self, name):
        stream_def_ids, _ = self.resource_registry.find_resources(name=name, restype=RT.StreamDefinition, id_only=True)
        return stream_def_ids[0]

    def test_actors(self):
        input_data_product_id = self.ctd_plain_input_data_product()
        output_data_product_id = self.ctd_plain_density()
        actor = self.create_density_transform_function()
        route = {input_data_product_id: {output_data_product_id: actor}}
        config = DotDict()
        config.process.routes = route
        config.process.params.lat = 45.
        config.process.params.lon = -71.

        data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id], configuration=config)
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        validated = Event()
        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            # The value I use is a double, the value coming back is only a float32 so there's some data loss but it should be precise to the 4th digit
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) 
            validated.set()

        self.setup_subscriber(output_data_product_id, callback=validation)

        self.publish_to_plain_data_product(input_data_product_id)
        self.assertTrue(validated.wait(10))

    def test_multi_in_out(self):
        input1 = self.ctd_plain_input_data_product()
        input2 = self.make_data_product('ctd_parsed_param_dict', 'input2')

        density_dp_id = self.ctd_plain_density()
        salinity_dp_id = self.ctd_plain_salinity()

        density_actor = self.create_density_transform_function()
        salinity_actor = self.create_salinity_transform_function()

        routes = {
            input1 : {
                density_dp_id : density_actor,
                salinity_dp_id : salinity_actor
                },
            input2 : {
                density_dp_id : density_actor
                }
            }

        config = DotDict()
        config.process.routes = routes
        config.process.params.lat = 45.
        config.process.params.lon = -71.


        data_process_id = self.data_process_management.create_data_process2(in_data_product_ids=[input1, input2], out_data_product_ids=[density_dp_id, salinity_dp_id], configuration=config)
        self.addCleanup(self.data_process_management.delete_data_process2, data_process_id)

        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        density_validated = Event()
        salinity_validated = Event()

        def density_validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['density'], np.array([1021.6839775385847]), decimal=4) 
            density_validated.set()

        def salinity_validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            np.testing.assert_array_almost_equal(rdt['salinity'], np.array([30.93513240786831]), decimal=4) 
            salinity_validated.set()

        self.setup_subscriber(density_dp_id, callback=density_validation)
        self.setup_subscriber(salinity_dp_id, callback=salinity_validation)
        
        self.publish_to_plain_data_product(input1)

        self.assertTrue(density_validated.wait(10))
        self.assertTrue(salinity_validated.wait(10))
        density_validated.clear()
        salinity_validated.clear()


        self.publish_to_plain_data_product(input2)
        self.assertTrue(density_validated.wait(10))
        self.assertFalse(salinity_validated.wait(0.75))
        density_validated.clear()
        salinity_validated.clear()



    def test_visual_transform(self):
        input_data_product_id = self.ctd_plain_input_data_product()
        output_data_product_id = self.google_dt_data_product()
        dpd = DataProcessDefinition(name='visual transform')
        dpd.data_process_type = DataProcessTypeEnum.TRANSFORM
        dpd.module = 'ion.processes.data.transforms.viz.google_dt'
        dpd.class_name = 'VizTransformGoogleDT'

        #--------------------------------------------------------------------------------
        # Walk before we base jump
        #--------------------------------------------------------------------------------

        data_process_definition_id = self.data_process_management.create_data_process_definition(dpd)
        self.addCleanup(self.data_process_management.delete_data_process_definition, data_process_definition_id)
    
        data_process_id = self.data_process_management.create_data_process2(data_process_definition_id=data_process_definition_id, in_data_product_ids=[input_data_product_id], out_data_product_ids=[output_data_product_id])
        self.addCleanup(self.data_process_management.delete_data_process2,data_process_id)


        self.data_process_management.activate_data_process2(data_process_id)
        self.addCleanup(self.data_process_management.deactivate_data_process2, data_process_id)

        validated = Event()
        def validation(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            self.assertTrue(rdt['google_dt_components'] is not None)
            validated.set()

        self.setup_subscriber(output_data_product_id, callback=validation)

        self.publish_to_plain_data_product(input_data_product_id)
        self.assertTrue(validated.wait(10))
Exemplo n.º 24
0
class CtdTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber
        self.queue_cleanup = []
        self.data_process_cleanup = []

    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        if parameter_dict_name == 'input_param_dict':
            temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        else:
            temp_ctxt = ParameterContext('temp', param_type=QuantityType(value_encoding=numpy.dtype('float32')))

        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            if parameter_dict_name == 'input_param_dict':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)
            elif  parameter_dict_name == 'output_param_dict' and pc[1].name == 'temp':
                self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id

    def _get_new_ctd_L0_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def _create_calibration_coefficients_dict(self):
        config = DotDict()
        config.process.calibration_coeffs =  {
              'temp_calibration_coeffs': {
                  'TA0' : 1.561342e-03,
                  'TA1' : 2.561486e-04,
                  'TA2' : 1.896537e-07,
                  'TA3' : 1.301189e-07,
                  'TOFFSET' : 0.000000e+00
              },

              'cond_calibration_coeffs':  {
                  'G' : -9.896568e-01,
                  'H' : 1.316599e-01,
                  'I' : -2.213854e-04,
                  'J' : 3.292199e-05,
                  'CPCOR' : -9.570000e-08,
                  'CTCOR' : 3.250000e-06,
                  'CSLOPE' : 1.000000e+00
              },

              'pres_calibration_coeffs' : {
                  'PA0' : 4.960417e-02,
                  'PA1' : 4.883682e-04,
                  'PA2' : -5.687309e-12,
                  'PTCA0' : 5.249802e+05,
                  'PTCA1' : 7.595719e+00,
                  'PTCA2' : -1.322776e-01,
                  'PTCB0' : 2.503125e+01,
                  'PTCB1' : 5.000000e-05,
                  'PTCB2' : 0.000000e+00,
                  'PTEMPA0' : -6.431504e+01,
                  'PTEMPA1' : 5.168177e+01,
                  'PTEMPA2' : -2.847757e-01,
                  'POFFSET' : 0.000000e+00
              }

          }

        return config

    def clean_queues(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()

    def cleaning_operations(self):
        for dproc_id in self.data_process_cleanup:
            self.data_process_management.delete_data_process(dproc_id)

    def test_ctd_L1_all(self):
        """
        Test that packets are processed by the ctd_L1_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='CTDBP_L1_Transform',
            description='Take granules on the L0 stream which have the C, T and P data and separately apply algorithms and output on the L1 stream.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L1',
            class_name='CTDBP_L1_Transform')

        dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        # Get the stream definition for the stream using the parameter dictionary
        L0_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'input_param_dict')

        L0_stream_def_id = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=L0_pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, L0_stream_def_id)

        L1_pdict_id = self._create_input_param_dict_for_test(parameter_dict_name = 'output_param_dict')
        L1_stream_def_id = self.pubsub.create_stream_definition(name='L1_out', parameter_dictionary_id=L1_pdict_id)
        self.addCleanup(self.pubsub.delete_stream_definition, L1_stream_def_id)


        log.debug("Got the parsed parameter dictionary: id: %s", L0_pdict_id)
        log.debug("Got the stream def for parsed input: %s", L0_stream_def_id)

        log.debug("got the stream def for the output: %s", L1_stream_def_id)

        # Input data product
        L0_stream_dp_obj = IonObject(RT.DataProduct,
            name='L0_stream',
            description='L0 stream input to CTBP L1 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        input_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj,
            stream_definition_id=L0_stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id)

        # output data product
        L1_stream_dp_obj = IonObject(RT.DataProduct,
            name='L1_stream',
            description='L1_stream output of CTBP L1 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        L1_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L1_stream_dp_obj,
            stream_definition_id=L1_stream_def_id
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, L1_stream_dp_id)

        # We need the key name here to be "L1_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L1_stream when the config is used to launch the data process
        self.output_products = {'L1_stream' : L1_stream_dp_id}
        out_stream_ids, _ = self.resource_registry.find_objects(L1_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        config = self._create_calibration_coefficients_dict()
        dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products, config)
        self.addCleanup(self.data_process_management.delete_data_process, dproc_id)
        log.debug("Created a data process for ctdbp_L1. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True)

        input_stream_id = stream_ids[0]
        input_stream = self.resource_registry.read(input_stream_id)
        stream_route = input_stream.stream_route

        log.debug("The input stream for the L1 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
            stream_ids=[output_stream_id],
            exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)


        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=L0_stream_def_id, length = 5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s", granule_from_transform)

        # Check that the granule published by the L1 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)


    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertIn('pressure', rdt)
        self.assertIn('temp', rdt)
        self.assertIn('conductivity', rdt)
        self.assertIn('time', rdt)
Exemplo n.º 25
0
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def,
            name='Scalar Salinity data stream')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.ctd_stream_publisher',
            'class': 'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class': 'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=ctd_stream_def_id)

        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------

        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=sal_stream_def_id)

        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=sal_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery(stream_ids=[
                ctd_stream_id,
            ]),
            exchange_name='salinity_transform_input'
        )  # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={
                'output': sal_stream_id,
            },
            process_definition_id=salinity_transform_procdef_id,
            # no configuration needed at this time...
        )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(
            transform_id=sal_transform_id)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                sal_stream_id,
            ]),
            exchange_name='salinity_test',
            name="test salinity subscription",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        for message in results:

            psd = PointSupplementStreamParser(
                stream_definition=SalinityTransform.outgoing_stream_def,
                stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())

            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) >
                       0.0)  # salinity should always be greater than 0
Exemplo n.º 26
0
class TestTransformWorkerSubscriptions(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_multi_subscriptions(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_two',
                                 description='input test stream two')
        self.input_dp_two_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]

        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_two_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )
        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)

        second_dp_id = self.create_data_process_two(dpd_id,
                                                    self.input_dp_two_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        self.subscription_one_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one',
            stream_ids=[self.stream_one_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_one_id)

        self.pubsub_client.activate_subscription(self.subscription_one_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_one_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        self.start_event_listener()

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO

        #create a queue to catch the published granules of stream TWO
        self.subscription_two_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one_two',
            stream_ids=[self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_two_id)

        self.pubsub_client.activate_subscription(self.subscription_two_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_two_id)

        stream_route_two = self.pubsub_client.read_stream_route(
            self.stream_two_id)
        self.publisher_two = StandaloneStreamPublisher(
            stream_id=self.stream_two_id, stream_route=stream_route_two)

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #data process 2 adds salinity + pressure and puts the result in conductivity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [22]
        rdt['pressure'] = [4]
        rdt['salinity'] = [1]

        self.publisher_two.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_two_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_two_transforms_inline(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )

        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)
        second_dp_id = self.create_data_process_two(dpd_id,
                                                    dp1_func_output_dp_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process one
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #retrieve the Stream for these data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]
        #the input to data process two is the output from data process one
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        # Run provenance on the output dataproduct of the second data process to see all the links
        # are as expected
        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            dp2_func_output_dp_id)

        # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child.
        self.assertTrue(len(output_data_product_provenance) == 3)
        # confirm that the linking from the output dataproduct to input dataproduct is correct
        self.assertTrue(
            dp1_func_output_dp_id in
            output_data_product_provenance[dp2_func_output_dp_id]['parents'])
        self.assertTrue(
            self.input_dp_one_id in
            output_data_product_provenance[dp1_func_output_dp_id]['parents'])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_one_id, self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #data process 1 adds conductivity + pressure and puts the result in salinity
        #data process 2 adds salinity + pressure and puts the result in conductivity

        self.start_event_listener()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))

    def create_data_process_definition(self):

        #two data processes using one transform and one DPD

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        return add_array_dpd_id

    def create_data_process_one(self, data_process_definition_id,
                                output_dataproduct):

        # Create the data process
        #data process 1 adds conductivity + pressure and puts the result in salinity
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=data_process_definition_id,
            inputs=[self.input_dp_one_id],
            outputs=[output_dataproduct],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)
        self.dp_list.append(dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process_two(self, data_process_definition_id,
                                input_dataproduct, output_dataproduct):

        # Create the data process
        #data process 2 adds salinity + pressure and puts the result in conductivity
        argument_map = {'arr1': 'salinity', 'arr2': 'pressure'}
        output_param = 'conductivity'
        dp2_func_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=data_process_definition_id,
            inputs=[input_dataproduct],
            outputs=[output_dataproduct],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp2_func_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp2_func_data_process_id)
        self.dp_list.append(dp2_func_data_process_id)

        return dp2_func_data_process_id

    def create_output_data_products(self):

        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_one_id = stream_ids[0]

        dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp2_func_output_dp_obj = IonObject(
            RT.DataProduct,
            name='data_process2_data_product',
            description='output of add array func')

        dp2_func_output_dp_id = self.dataproductclient.create_data_product(
            dp2_func_output_dp_obj, dp2_func_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp2_func_output_dp_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_two_id = stream_ids[0]

        subscription_id = self.pubsub_client.create_subscription(
            'validator',
            data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id, dp2_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        #if data process already created, check origin
        if not 'data process assigned to transform worker' in data_process_event.description:
            self.assertIn(data_process_event.origin, self.dp_list)

    def validate_output_granule(self, msg, route, stream_id):
        self.assertTrue(
            stream_id in
            [self._output_stream_one_id, self._output_stream_two_id])

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  stream_id: %s', stream_id)

        if stream_id == self._output_stream_one_id:
            sal_val = rdt['salinity']
            log.debug('validate_output_granule  sal_val: %s', sal_val)
            np.testing.assert_array_equal(sal_val, np.array([3]))
            self.event1_verified.set()
        else:
            cond_val = rdt['conductivity']
            log.debug('validate_output_granule  cond_val: %s', cond_val)
            np.testing.assert_array_equal(cond_val, np.array([5]))
            self.event2_verified.set()

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)
Exemplo n.º 27
0
class RecordDictionaryIntegrationTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management  = PubsubManagementServiceClient()

        self.rdt                      = None
        self.data_producer_id         = None
        self.provider_metadata_update = None
        self.event                    = Event()

    def verify_incoming(self, m,r,s):
        rdt = RecordDictionaryTool.load_from_granule(m)
        for k,v in rdt.iteritems():
            np.testing.assert_array_equal(v, self.rdt[k])
        self.assertEquals(m.data_producer_id, self.data_producer_id)
        self.assertEquals(m.provider_metadata_update, self.provider_metadata_update)
        self.assertNotEqual(m.creation_timestamp, None)
        self.event.set()


    def test_serialize_compatability(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_extended_parsed()

        stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        verified = Event()
        def verifier(msg, route, stream_id):
            for k,v in msg.record_dictionary.iteritems():
                if v is not None:
                    self.assertIsInstance(v, np.ndarray)
            rdt = RecordDictionaryTool.load_from_granule(msg)
            for k,v in rdt.iteritems():
                self.assertIsInstance(rdt[k], np.ndarray)
                self.assertIsInstance(v, np.ndarray)
            verified.set()

        subscriber = StandaloneStreamSubscriber('sub1', callback=verifier)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        publisher = StandaloneStreamPublisher(stream_id,route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        ph.fill_rdt(rdt,10)
        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(60))


    def test_granule(self):
        
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream,stream_id)
        publisher = StandaloneStreamPublisher(stream_id, route)

        subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['pressure'] = [20] * 10

        self.assertEquals(set(pdict.keys()), set(rdt.fields))
        self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter)

        self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999")

        self.rdt = rdt
        self.data_producer_id = 'data_producer'
        self.provider_metadata_update = {1:1}

        publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1}))

        self.assertTrue(self.event.wait(10))
        
        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.array([None,None,None])
        self.assertTrue(rdt['time'] is None)
        
        rdt['time'] = np.array([None, 1, 2])
        self.assertEquals(rdt['time'][0], rdt.fill_value('time'))


        stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id)
        rdt = RecordDictionaryTool(stream_definition=stream_def_obj)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)


        granule = rdt.to_granule()
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        np.testing.assert_array_equal(rdt['temp'], np.arange(20))

        
    def test_filter(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        filtered_stream_def_id = self.pubsub_management.create_stream_definition('filtered', parameter_dictionary_id=pdict_id, available_fields=['time', 'temp'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, filtered_stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=filtered_stream_def_id)
        self.assertEquals(rdt._available_fields,['time','temp'])
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        with self.assertRaises(KeyError):
            rdt['pressure'] = np.arange(20)

        granule = rdt.to_granule()
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        self.assertEquals(rdt._available_fields, rdt2._available_fields)
        self.assertEquals(rdt.fields, rdt2.fields)
        for k,v in rdt.iteritems():
            self.assertTrue(np.array_equal(rdt[k], rdt2[k]))
        


    def test_rdt_param_funcs(self):
        param_funcs = {
            'identity' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.interpolation',
                'function' : 'identity',
                'args':['x']
            },
            'ctd_tempwat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_tempwat',
                'args' : ['t0']
            },
            'ctd_preswat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_preswat',
                'args' : ["p0", "p_range_psia"]
            },
            'ctd_condwat' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_sbe37im_condwat',
                'args' : ['c0']
            },
            'ctd_pracsal' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_pracsal',
                'args' : ['c', 't', 'p']
            },
            'ctd_density' : {
                'function_type' : PFT.PYTHON,
                'owner' : 'ion_functions.data.ctd_functions',
                'function' : 'ctd_density',
                'args' : ['SP','t','p','lat','lon']
            }
        }

        pfunc_ids = {}
        for name, param_def in param_funcs.iteritems():
            paramfunc = ParameterFunction(name, **param_def)
            pf_id = self.dataset_management.create_parameter_function(paramfunc)
            pfunc_ids[name] = pf_id


        params = {
            'time' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float64',
                'units' : 'seconds since 1900-01-01'
            },
            'temperature_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'pressure_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'conductivity_counts' : {
                'parameter_type' : 'quantity',
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'temperature' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_tempwat'],
                'parameter_function_map' : { 't0' : 'temperature_counts'},
                'value_encoding' : 'float32',
                'units' : 'deg_C'
            },
            'pressure' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_preswat'],
                'parameter_function_map' : {'p0' : 'pressure_counts', 'p_range_psia' : 679.34040721},
                'value_encoding' : 'float32',
                'units' : 'dbar'
            },
            'conductivity' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_condwat'],
                'parameter_function_map' : {'c0' : 'conductivity_counts'},
                'value_encoding' : 'float32',
                'units' : 'Sm-1'
            },
            'salinity' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_pracsal'],
                'parameter_function_map' : {'c' : 'conductivity', 't' : 'temperature', 'p' : 'pressure'},
                'value_encoding' : 'float32',
                'units' : '1'
            },
            'density' : {
                'parameter_type' : 'function',
                'parameter_function_id' : pfunc_ids['ctd_density'],
                'parameter_function_map' : {
                    'SP' : 'salinity',
                    't' : 'temperature',
                    'p' : 'pressure',
                    'lat' : 'lat',
                    'lon' : 'lon'
                },
                'value_encoding' : 'float32',
                'units' : 'kg m-1'
            },
            'lat' : {
                'parameter_type' : 'sparse',
                'value_encoding' : 'float32',
                'units' : 'degrees_north'
            },
            'lon' : {
                'parameter_type' : 'sparse',
                'value_encoding' : 'float32',
                'units' : 'degrees_east'
            }
        }
        param_dict = {}
        for name, param in params.iteritems():
            pcontext = ParameterContext(name, **param)
            param_id = self.dataset_management.create_parameter(pcontext)
            param_dict[name] = param_id
            
        pdict_id = self.dataset_management.create_parameter_dictionary('ctd_test', param_dict.values(), 'time')
        stream_def_id = self.pubsub_management.create_stream_definition('ctd_test', parameter_dictionary_id=pdict_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temperature_counts'] = [280000]
        rdt['conductivity_counts'] = [100000]
        rdt['pressure_counts'] = [2789]

        rdt['lat'] = [45]
        rdt['lon'] = [-71]

        np.testing.assert_allclose(rdt['density'], np.array([1001.00543606]))

    def test_rdt_lookup(self):
        rdt = self.create_lookup_rdt()

        self.assertTrue('offset_a' in rdt.lookup_values())
        self.assertFalse('offset_b' in rdt.lookup_values())

        rdt['time'] = [0]
        rdt['temp'] = [10.0]
        rdt['offset_a'] = [2.0]
        self.assertEquals(rdt['offset_b'], None)
        self.assertEquals(rdt.lookup_values(), ['offset_a'])
        np.testing.assert_array_almost_equal(rdt['calibrated'], np.array([12.0]))

        svm = StoredValueManager(self.container)
        svm.stored_value_cas('coefficient_document', {'offset_b':2.0})
        svm.stored_value_cas("GA03FLMA-RI001-13-CTDMOG999_OFFSETC", {'offset_c':3.0})
        rdt.fetch_lookup_values()
        np.testing.assert_array_equal(rdt['offset_b'], np.array([2.0]))
        np.testing.assert_array_equal(rdt['calibrated_b'], np.array([14.0]))
        np.testing.assert_array_equal(rdt['offset_c'], np.array([3.0]))


    def create_rdt(self):
        contexts, pfuncs = self.create_pfuncs()
        context_ids = list(contexts.itervalues())

        pdict_id = self.dataset_management.create_parameter_dictionary(name='functional_pdict', parameter_context_ids=context_ids, temporal_context='test_TIME')
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
        stream_def_id = self.pubsub_management.create_stream_definition('functional', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt

    def create_lookup_rdt(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()

        stream_def_id = self.pubsub_management.create_stream_definition('lookup', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"})
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        return rdt


    def create_pfuncs(self):
        
        contexts = {}
        funcs = {}

        t_ctxt = ParameterContext(name='TIME', 
                                  parameter_type='quantity',
                                  value_encoding='float64',
                                  units='seconds since 1900-01-01')
        t_ctxt_id = self.dataset_management.create_parameter(t_ctxt)
        contexts['TIME'] = t_ctxt_id

        lat_ctxt = ParameterContext(name='LAT', 
                                    parameter_type="sparse",
                                    value_encoding='float32',
                                    units='degrees_north')
        lat_ctxt_id = self.dataset_management.create_parameter(lat_ctxt)
        contexts['LAT'] = lat_ctxt_id

        lon_ctxt = ParameterContext(name='LON', 
                                    parameter_type='sparse',
                                    value_encoding='float32',
                                    units='degrees_east')
        lon_ctxt_id = self.dataset_management.create_parameter(lon_ctxt)
        contexts['LON'] = lon_ctxt_id

        # Independent Parameters

        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(name='TEMPWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='deg_C')
        temp_ctxt_id = self.dataset_management.create_parameter(temp_ctxt)
        contexts['TEMPWAT_L0'] = temp_ctxt_id

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(name='CONDWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='S m-1')
        cond_ctxt_id = self.dataset_management.create_parameter(cond_ctxt)
        contexts['CONDWAT_L0'] = cond_ctxt_id

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(name='PRESWAT_L0', 
                parameter_type='quantity',
                value_encoding='float32',
                units='dbar')
        press_ctxt_id = self.dataset_management.create_parameter(press_ctxt)
        contexts['PRESWAT_L0'] = press_ctxt_id


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        expr = ParameterFunction(name='TEMPWAT_L1',
                function_type=PFT.NUMEXPR,
                function=tl1_func,
                args=['T'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['TEMPWAT_L1'] = expr_id

        tl1_pmap = {'T': 'TEMPWAT_L0'}
        tempL1_ctxt = ParameterContext(name='TEMPWAT_L1', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=tl1_pmap,
                value_encoding='float32',
                units='deg_C')
        tempL1_ctxt_id = self.dataset_management.create_parameter(tempL1_ctxt)
        contexts['TEMPWAT_L1'] = tempL1_ctxt_id

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        expr = ParameterFunction(name='CONDWAT_L1',
                function_type=PFT.NUMEXPR,
                function=cl1_func,
                args=['C'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['CONDWAT_L1'] = expr_id

        cl1_pmap = {'C': 'CONDWAT_L0'}
        condL1_ctxt = ParameterContext(name='CONDWAT_L1', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=cl1_pmap,
                value_encoding='float32',
                units='S m-1')
        condL1_ctxt_id = self.dataset_management.create_parameter(condL1_ctxt)
        contexts['CONDWAT_L1'] = condL1_ctxt_id

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        expr = ParameterFunction(name='PRESWAT_L1',function=pl1_func,function_type=PFT.NUMEXPR,args=['P','p_range'])
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['PRESWAT_L1'] = expr_id
        
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        presL1_ctxt = ParameterContext(name='PRESWAT_L1',
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=pl1_pmap,
                value_encoding='float32',
                units='S m-1')
        presL1_ctxt_id = self.dataset_management.create_parameter(presL1_ctxt)
        contexts['PRESWAT_L1'] = presL1_ctxt_id

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        expr = ParameterFunction(name='PRACSAL',function_type=PFT.PYTHON,function=sal_func,owner=owner,args=sal_arglist)
        expr_id = self.dataset_management.create_parameter_function(expr)
        funcs['PRACSAL'] = expr_id
        
        c10_f = ParameterFunction(name='c10', function_type=PFT.NUMEXPR, function='C*10', args=['C'])
        expr_id = self.dataset_management.create_parameter_function(c10_f)
        c10 = ParameterContext(name='c10', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map={'C':'CONDWAT_L1'},
                value_encoding='float32',
                units='1')
        c10_id = self.dataset_management.create_parameter(c10)
        contexts['c10'] = c10_id

        # A magic function that may or may not exist actually forms the line below at runtime.
        sal_pmap = {'C': 'c10', 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_ctxt = ParameterContext(name='PRACSAL', 
                parameter_type='function',
                parameter_function_id=expr_id,
                parameter_function_map=sal_pmap,
                value_encoding='float32',
                units='g kg-1')

        sal_ctxt_id = self.dataset_management.create_parameter(sal_ctxt)
        contexts['PRACSAL'] = sal_ctxt_id

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = CoverageParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name='DENSITY', parameter_context=dens_ctxt.dump())
        self.addCleanup(self.dataset_management.delete_parameter_context, dens_ctxt_id)
        contexts['DENSITY'] = dens_ctxt_id
        return contexts, funcs
Exemplo n.º 28
0
    def test_raw_stream_integration(self):
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------

        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(
            node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(
            node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'

        ###
        ### In the beginning there was one stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        raw_ctd_stream_def = SBE37_RAW_stream_definition()
        raw_ctd_stream_def_id = pubsub_management_service.create_stream_definition(
            container=raw_ctd_stream_def, name='Simulated RAW CTD data')

        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': 'ion.processes.data.raw_stream_publisher',
            'class': 'RawStreamPublisher'
        }

        raw_ctd_sim_procdef_id = process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,
                                       datastore_profile='SCIDATA'),
            number_of_workers=1)
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        raw_ctd_stream_id = pubsub_management_service.create_stream(
            stream_definition_id=raw_ctd_stream_def_id)

        # Set up the datasets
        raw_ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=raw_ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule')

        # Configure ingestion of this dataset
        raw_ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id=raw_ctd_dataset_id,
            archive_data=True,
            archive_metadata=True,
            ingestion_configuration_id=
            ingestion_configuration_id,  # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': raw_ctd_stream_id,
            }
        }
        raw_sim_pid = process_dispatcher.schedule_process(
            process_definition_id=raw_ctd_sim_procdef_id,
            configuration=configuration)

        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        raw_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([
                raw_ctd_stream_id,
            ]),
            exchange_name='raw_test',
            name="test raw subscription",
        )

        # this is okay - even in cei mode!
        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn('Raw data received!')
            results.append(message)
            if len(results) > 3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='raw_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(
            subscription_id=raw_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(
            raw_sim_pid
        )  # kill the ctd simulator process - that is enough data

        gevent.sleep(1)

        for message in results:

            sha1 = message.identifiables['stream_encoding'].sha1

            data = message.identifiables['data_stream'].values

            filename = FileSystem.get_hierarchical_url(FS.CACHE, sha1, ".raw")

            with open(filename, 'r') as f:

                assertions(data == f.read())
Exemplo n.º 29
0
class PubsubManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

        self.pdicts = {}
        self.queue_cleanup = list()
        self.exchange_cleanup = list()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        stream_definition_id = self.pubsub_management.create_stream_definition(
            'ctd parsed', parameter_dictionary_id=pdict.identifier)

        # Make sure there is an assoc
        self.assertTrue(
            self.resource_registry.find_associations(
                subject=stream_definition_id,
                predicate=PRED.hasParameterDictionary,
                object=pdict.identifier,
                id_only=True))

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(
            stream_definition_id)
        self.assertTrue(
            PubsubManagementService._compare_pdicts(
                pdict.dump(), stream_definition.parameter_dictionary))

        # Test Deleting
        self.pubsub_management.delete_stream_definition(stream_definition_id)
        self.assertFalse(
            self.resource_registry.find_associations(
                subject=stream_definition_id,
                predicate=PRED.hasParameterDictionary,
                object=pdict.identifier,
                id_only=True))

        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition(
            'L0 products',
            parameter_dictionary_id=pdict.identifier,
            available_fields=['time', 'temp', 'conductivity', 'pressure'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(
            self.pubsub_management.compare_stream_definition(
                in_stream_definition_id, out_stream_definition_id))
        self.assertTrue(
            self.pubsub_management.compatible_stream_definitions(
                in_stream_definition_id, out_stream_definition_id))

        out_stream_definition_id = self.pubsub_management.create_stream_definition(
            'L2 Products',
            parameter_dictionary_id=pdict.identifier,
            available_fields=['time', 'salinity', 'density'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        out_stream_definition_id)
        self.assertFalse(
            self.pubsub_management.compare_stream_definition(
                in_stream_definition_id, out_stream_definition_id))

        self.assertTrue(
            self.pubsub_management.compatible_stream_definitions(
                in_stream_definition_id, out_stream_definition_id))

    def test_validate_stream_defs(self):
        #test no input
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = []
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_0',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_0',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test input with no output
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_1',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_1',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test available field missing parameter context definition -- missing PRESWAT_L0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['DENSITY']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_2',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_2',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test l1 from l0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_3',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_3',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test l2 from l0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_4',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_4',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test Ln from L0
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = [
            'DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'
        ]
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_5',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_5',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test L2 from L1
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'
        ]
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_6',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_6',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test L1 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(
            ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_7',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_7',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test L2 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(
            ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_8',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_8',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test L2 from L0 missing L1
        incoming_pdict_id = self._get_pdict(
            ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL'])
        available_fields_in = [
            'TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'
        ]
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition(
            'in_sd_9',
            parameter_dictionary_id=incoming_pdict_id,
            available_fields=available_fields_in)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition(
            'out_sd_9',
            parameter_dictionary_id=outgoing_pdict_id,
            available_fields=available_fields_out)
        result = self.pubsub_management.validate_stream_defs(
            incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

    def publish_on_stream(self, stream_id, msg):
        stream = self.pubsub_management.read_stream(stream_id)
        stream_route = stream.stream_route
        publisher = StandaloneStreamPublisher(stream_id=stream_id,
                                              stream_route=stream_route)
        publisher.publish(msg)

    def test_stream_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition(
            'test_definition', stream_type='stream')
        topic_id = self.pubsub_management.create_topic(
            name='test_topic', exchange_point='test_exchange')
        self.exchange_cleanup.append('test_exchange')
        topic2_id = self.pubsub_management.create_topic(
            name='another_topic', exchange_point='outside')
        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream',
            topic_ids=[topic_id, topic2_id],
            exchange_point='test_exchange',
            stream_definition_id=stream_def_id)

        topics, assocs = self.resource_registry.find_objects(
            subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertEquals(topics, [topic_id])

        defs, assocs = self.resource_registry.find_objects(
            subject=stream_id,
            predicate=PRED.hasStreamDefinition,
            id_only=True)
        self.assertTrue(len(defs))

        stream = self.pubsub_management.read_stream(stream_id)
        self.assertEquals(stream.name, 'test_stream')
        self.pubsub_management.delete_stream(stream_id)

        with self.assertRaises(NotFound):
            self.pubsub_management.read_stream(stream_id)

        defs, assocs = self.resource_registry.find_objects(
            subject=stream_id,
            predicate=PRED.hasStreamDefinition,
            id_only=True)
        self.assertFalse(len(defs))

        topics, assocs = self.resource_registry.find_objects(
            subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertFalse(len(topics))

        self.pubsub_management.delete_topic(topic_id)
        self.pubsub_management.delete_topic(topic2_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_subscription_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition(
            'test_definition', stream_type='stream')
        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream',
            exchange_point='test_exchange',
            stream_definition_id=stream_def_id)
        subscription_id = self.pubsub_management.create_subscription(
            name='test subscription',
            stream_ids=[stream_id],
            exchange_name='test_queue')
        self.exchange_cleanup.append('test_exchange')

        subs, assocs = self.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasStream, id_only=True)
        self.assertEquals(subs, [stream_id])

        res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName,
                                                       name='test_queue',
                                                       id_only=True)
        self.assertEquals(len(res), 1)

        subs, assocs = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(subs[0], res[0])

        subscription = self.pubsub_management.read_subscription(
            subscription_id)
        self.assertEquals(subscription.exchange_name, 'test_queue')

        self.pubsub_management.delete_subscription(subscription_id)

        subs, assocs = self.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasStream, id_only=True)
        self.assertFalse(len(subs))

        subs, assocs = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertFalse(len(subs))

        self.pubsub_management.delete_stream(stream_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_move_before_activate(self):
        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream', exchange_point='test_xp')

        #--------------------------------------------------------------------------------
        # Test moving before activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription(
            'first_queue', stream_ids=[stream_id])

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.pubsub_management.move_subscription(subscription_id,
                                                 exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(
            name='test_stream', exchange_point='test_xp')
        #--------------------------------------------------------------------------------
        # Test moving after activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription(
            'first_queue', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m, r, s):
            self.assertEquals(m, 'verified')
            self.verified.set()

        subscriber = StandaloneStreamSubscriber('second_queue', verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id,
                                                 exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(
            object=subscription_id,
            predicate=PRED.hasSubscription,
            id_only=True)

        self.assertEquals(len(subjects), 1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('verified')

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_queue_cleanup(self):
        stream_id, route = self.pubsub_management.create_stream(
            'test_stream', 'xp1')
        xn_objs, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='queue1')
        for xn_obj in xn_objs:
            xn = self.container.ex_manager.create_xn_queue(xn_obj.name)
            xn.delete()
        subscription_id = self.pubsub_management.create_subscription(
            'queue1', stream_ids=[stream_id])
        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids), 1)

        self.pubsub_management.delete_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(
            restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids), 0)

    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream(
            'stream1', 'xp1')
        subscription_id = self.pubsub_management.create_subscription(
            'sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m, r, s):
            self.check1.set()

        subscriber = StandaloneStreamSubscriber('sub1', verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_topic_crud(self):

        topic_id = self.pubsub_management.create_topic(
            name='test_topic', exchange_point='test_xp')
        self.exchange_cleanup.append('test_xp')

        topic = self.pubsub_management.read_topic(topic_id)

        self.assertEquals(topic.name, 'test_topic')
        self.assertEquals(topic.exchange_point, 'test_xp')

        self.pubsub_management.delete_topic(topic_id)
        with self.assertRaises(NotFound):
            self.pubsub_management.read_topic(topic_id)

    def test_full_pubsub(self):

        self.sub1_sat = Event()
        self.sub2_sat = Event()

        def subscriber1(m, r, s):
            self.sub1_sat.set()

        def subscriber2(m, r, s):
            self.sub2_sat.set()

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        self.queue_cleanup.append(sub1.xn.queue)
        sub1.start()

        sub2 = StandaloneStreamSubscriber('sub2', subscriber2)
        self.queue_cleanup.append(sub2.xn.queue)
        sub2.start()

        log_topic = self.pubsub_management.create_topic(
            'instrument_logs', exchange_point='instruments')
        science_topic = self.pubsub_management.create_topic(
            'science_data', exchange_point='instruments')
        events_topic = self.pubsub_management.create_topic(
            'notifications', exchange_point='events')

        log_stream, route = self.pubsub_management.create_stream(
            'instrument1-logs',
            topic_ids=[log_topic],
            exchange_point='instruments')
        ctd_stream, route = self.pubsub_management.create_stream(
            'instrument1-ctd',
            topic_ids=[science_topic],
            exchange_point='instruments')
        event_stream, route = self.pubsub_management.create_stream(
            'notifications', topic_ids=[events_topic], exchange_point='events')
        raw_stream, route = self.pubsub_management.create_stream(
            'temp', exchange_point='global.data')
        self.exchange_cleanup.extend(['instruments', 'events', 'global.data'])

        subscription1 = self.pubsub_management.create_subscription(
            'subscription1',
            stream_ids=[log_stream, event_stream],
            exchange_name='sub1')
        subscription2 = self.pubsub_management.create_subscription(
            'subscription2',
            exchange_points=['global.data'],
            stream_ids=[ctd_stream],
            exchange_name='sub2')

        self.pubsub_management.activate_subscription(subscription1)
        self.pubsub_management.activate_subscription(subscription2)

        self.publish_on_stream(log_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))
        self.assertFalse(self.sub2_sat.is_set())

        self.publish_on_stream(raw_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))

        sub1.stop()
        sub2.stop()

    def test_topic_craziness(self):

        self.msg_queue = Queue()

        def subscriber1(m, r, s):
            self.msg_queue.put(m)

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        self.queue_cleanup.append(sub1.xn.queue)
        sub1.start()

        topic1 = self.pubsub_management.create_topic('topic1',
                                                     exchange_point='xp1')
        topic2 = self.pubsub_management.create_topic('topic2',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic1)
        topic3 = self.pubsub_management.create_topic('topic3',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic1)
        topic4 = self.pubsub_management.create_topic('topic4',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic2)
        topic5 = self.pubsub_management.create_topic('topic5',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic2)
        topic6 = self.pubsub_management.create_topic('topic6',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic3)
        topic7 = self.pubsub_management.create_topic('topic7',
                                                     exchange_point='xp1',
                                                     parent_topic_id=topic3)

        # Tree 2
        topic8 = self.pubsub_management.create_topic('topic8',
                                                     exchange_point='xp2')
        topic9 = self.pubsub_management.create_topic('topic9',
                                                     exchange_point='xp2',
                                                     parent_topic_id=topic8)
        topic10 = self.pubsub_management.create_topic('topic10',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic9)
        topic11 = self.pubsub_management.create_topic('topic11',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic9)
        topic12 = self.pubsub_management.create_topic('topic12',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic11)
        topic13 = self.pubsub_management.create_topic('topic13',
                                                      exchange_point='xp2',
                                                      parent_topic_id=topic11)
        self.exchange_cleanup.extend(['xp1', 'xp2'])

        stream1_id, route = self.pubsub_management.create_stream(
            'stream1',
            topic_ids=[topic7, topic4, topic5],
            exchange_point='xp1')
        stream2_id, route = self.pubsub_management.create_stream(
            'stream2', topic_ids=[topic8], exchange_point='xp2')
        stream3_id, route = self.pubsub_management.create_stream(
            'stream3', topic_ids=[topic10, topic13], exchange_point='xp2')
        stream4_id, route = self.pubsub_management.create_stream(
            'stream4', topic_ids=[topic9], exchange_point='xp2')
        stream5_id, route = self.pubsub_management.create_stream(
            'stream5', topic_ids=[topic11], exchange_point='xp2')

        subscription1 = self.pubsub_management.create_subscription(
            'sub1', topic_ids=[topic1])
        subscription2 = self.pubsub_management.create_subscription(
            'sub2', topic_ids=[topic8], exchange_name='sub1')
        subscription3 = self.pubsub_management.create_subscription(
            'sub3', topic_ids=[topic9], exchange_name='sub1')
        subscription4 = self.pubsub_management.create_subscription(
            'sub4',
            topic_ids=[topic10, topic13, topic11],
            exchange_name='sub1')
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription1)

        self.publish_on_stream(stream1_id, 1)

        self.assertEquals(self.msg_queue.get(timeout=10), 1)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription1)
        self.pubsub_management.delete_subscription(subscription1)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription2)

        self.publish_on_stream(stream2_id, 2)
        self.assertEquals(self.msg_queue.get(timeout=10), 2)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription2)
        self.pubsub_management.delete_subscription(subscription2)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription3)

        self.publish_on_stream(stream2_id, 3)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream3_id, 4)
        self.assertEquals(self.msg_queue.get(timeout=10), 4)

        self.pubsub_management.deactivate_subscription(subscription3)
        self.pubsub_management.delete_subscription(subscription3)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription4)

        self.publish_on_stream(stream4_id, 5)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream5_id, 6)
        self.assertEquals(self.msg_queue.get(timeout=10), 6)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.pubsub_management.deactivate_subscription(subscription4)
        self.pubsub_management.delete_subscription(subscription4)

        #--------------------------------------------------------------------------------
        sub1.stop()

        self.pubsub_management.delete_topic(topic13)
        self.pubsub_management.delete_topic(topic12)
        self.pubsub_management.delete_topic(topic11)
        self.pubsub_management.delete_topic(topic10)
        self.pubsub_management.delete_topic(topic9)
        self.pubsub_management.delete_topic(topic8)
        self.pubsub_management.delete_topic(topic7)
        self.pubsub_management.delete_topic(topic6)
        self.pubsub_management.delete_topic(topic5)
        self.pubsub_management.delete_topic(topic4)
        self.pubsub_management.delete_topic(topic3)
        self.pubsub_management.delete_topic(topic2)
        self.pubsub_management.delete_topic(topic1)

        self.pubsub_management.delete_stream(stream1_id)
        self.pubsub_management.delete_stream(stream2_id)
        self.pubsub_management.delete_stream(stream3_id)
        self.pubsub_management.delete_stream(stream4_id)
        self.pubsub_management.delete_stream(stream5_id)

    def _get_pdict(self, filter_values):
        t_ctxt = ParameterContext(
            'TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(
            name='TIME',
            parameter_context=t_ctxt.dump(),
            parameter_type='quantity<int64>',
            unit_of_measure=t_ctxt.uom)

        lat_ctxt = ParameterContext(
            'LAT',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(
            name='LAT',
            parameter_context=lat_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=lat_ctxt.uom)

        lon_ctxt = ParameterContext(
            'LON',
            param_type=ConstantType(
                QuantityType(value_encoding=np.dtype('float32'))),
            fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(
            name='LON',
            parameter_context=lon_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=lon_ctxt.uom)

        # Independent Parameters
        # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext(
            'TEMPWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(
            name='TEMPWAT_L0',
            parameter_context=temp_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=temp_ctxt.uom)

        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext(
            'CONDWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(
            name='CONDWAT_L0',
            parameter_context=cond_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=cond_ctxt.uom)

        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext(
            'PRESWAT_L0',
            param_type=QuantityType(value_encoding=np.dtype('float32')),
            fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(
            name='PRESWAT_L0',
            parameter_context=press_ctxt.dump(),
            parameter_type='quantity<float32>',
            unit_of_measure=press_ctxt.uom)

        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr = NumexprFunction('TEMPWAT_L1',
                               tl1_func, ['T'],
                               param_map=tl1_pmap)
        tempL1_ctxt = ParameterContext(
            'TEMPWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=tempL1_ctxt.name,
            parameter_context=tempL1_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=tempL1_ctxt.uom)

        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr = NumexprFunction('CONDWAT_L1',
                               cl1_func, ['C'],
                               param_map=cl1_pmap)
        condL1_ctxt = ParameterContext(
            'CONDWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=condL1_ctxt.name,
            parameter_context=condL1_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=condL1_ctxt.uom)

        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr = NumexprFunction('PRESWAT_L1',
                               pl1_func, ['P', 'p_range'],
                               param_map=pl1_pmap)
        presL1_ctxt = ParameterContext(
            'PRESWAT_L1',
            param_type=ParameterFunctionType(function=expr),
            variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(
            name=presL1_ctxt.name,
            parameter_context=presL1_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=presL1_ctxt.uom)

        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        sal_pmap = {
            'C':
            NumexprFunction('CONDWAT_L1*10',
                            'C*10', ['C'],
                            param_map={'C': 'CONDWAT_L1'}),
            't':
            'TEMPWAT_L1',
            'p':
            'PRESWAT_L1'
        }
        sal_kwargmap = None
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist,
                              sal_kwargmap, sal_pmap)
        sal_ctxt = ParameterContext('PRACSAL',
                                    param_type=ParameterFunctionType(expr),
                                    variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(
            name=sal_ctxt.name,
            parameter_context=sal_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=sal_ctxt.uom)

        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP',
                                      ['PRACSAL', 'PRESWAT_L1', 'LON', 'LAT'])
        cons_temp_expr = PythonFunction(
            'cons_temp', owner, 'CT_from_t',
            [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction(
            'DENSITY', owner, 'rho',
            [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext(
            'DENSITY',
            param_type=ParameterFunctionType(dens_expr),
            variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(
            name=dens_ctxt.name,
            parameter_context=dens_ctxt.dump(),
            parameter_type='pfunc',
            unit_of_measure=dens_ctxt.uom)

        ids = [
            t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id,
            press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id,
            sal_ctxt_id, dens_ctxt_id
        ]
        contexts = [
            t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt,
            tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt
        ]
        context_ids = [
            ids[i] for i, ctxt in enumerate(contexts)
            if ctxt.name in filter_values
        ]
        pdict_name = '_'.join(
            [ctxt.name for ctxt in contexts if ctxt.name in filter_values])

        try:
            self.pdicts[pdict_name]
            return self.pdicts[pdict_name]
        except KeyError:
            pdict_id = self.dataset_management.create_parameter_dictionary(
                pdict_name,
                parameter_context_ids=context_ids,
                temporal_context='time')
            self.pdicts[pdict_name] = pdict_id
            return pdict_id
Exemplo n.º 30
0
class TestWorkflowManagementIntegration(IonIntegrationTestCase):
    def setUp(self):
        # Start container

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubclient = PubsubManagementServiceClient(
            node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
        self.workflowclient = WorkflowManagementServiceClient(
            node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient(
            node=self.container.node)

        self.ctd_stream_def = SBE37_CDM_stream_definition()

    def _create_ctd_input_stream_and_data_product(
            self, data_product_name='ctd_parsed'):

        cc = self.container
        assertions = self.assertTrue

        #-------------------------------
        # Create CTD Parsed as the initial data product
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(
            container=self.ctd_stream_def, name='Simulated CTD data')

        log.debug('Creating new CDM data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
                           name=data_product_name,
                           description='ctd stream test')
        try:
            ctd_parsed_data_product_id = self.dataproductclient.create_data_product(
                dp_obj, ctd_stream_def_id)
        except Exception as ex:
            self.fail("failed to create new data product: %s" % ex)

        log.debug('new ctd_parsed_data_product_id = ',
                  ctd_parsed_data_product_id)

        #Only ever need one device for testing purposes.
        instDevice_obj, _ = self.rrclient.find_resources(
            restype=RT.InstrumentDevice, name='SBE37IMDevice')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name='SBE37IMDevice',
                                       description="SBE37IMDevice",
                                       serial_number="12345")
            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(
            input_resource_id=instDevice_id,
            data_product_id=ctd_parsed_data_product_id)

        self.dataproductclient.activate_data_product_persistence(
            data_product_id=ctd_parsed_data_product_id,
            persist_data=False,
            persist_metadata=False)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_id,
                                                   PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0)
        ctd_stream_id = stream_ids[0]

        return ctd_stream_id, ctd_parsed_data_product_id

    def _start_simple_input_stream_process(self, ctd_stream_id):
        return self._start_input_stream_process(ctd_stream_id)

    def _start_sinusoidal_input_stream_process(self, ctd_stream_id):
        return self._start_input_stream_process(
            ctd_stream_id, 'ion.processes.data.sinusoidal_stream_publisher',
            'SinusoidalCtdPublisher')

    def _start_input_stream_process(
            self,
            ctd_stream_id,
            module='ion.processes.data.ctd_stream_publisher',
            class_name='SimpleCtdPublisher'):

        ###
        ### Start the process for producing the CTD data
        ###
        # process definition for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module': module,
            'class': class_name
        }

        ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(
            process_definition=producer_definition)

        # Start the ctd simulator to produce some data
        configuration = {
            'process': {
                'stream_id': ctd_stream_id,
            }
        }
        ctd_sim_pid = self.process_dispatcher.schedule_process(
            process_definition_id=ctd_sim_procdef_id,
            configuration=configuration)

        return ctd_sim_pid

    def _start_output_stream_listener(self,
                                      data_product_stream_ids,
                                      message_count_per_stream=10):

        cc = self.container
        assertions = self.assertTrue

        ###
        ### Make a subscriber in the test to listen for transformed data
        ###
        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery(data_product_stream_ids),
            exchange_name='workflow_test',
            name="test workflow transformations",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
                               module='pyon.ion.process',
                               cls='SimpleProcess',
                               config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process,
                                                         node=cc.node)

        result = gevent.event.AsyncResult()
        results = []

        def message_received(message, headers):
            # Heads
            log.warn(' data received!')
            results.append(message)
            if len(results) >= len(
                    data_product_stream_ids
            ) * message_count_per_stream:  #Only wait for so many messages - per stream
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(
            exchange_name='workflow_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(
            subscription_id=salinity_subscription_id)

        # Assert that we have received data
        assertions(result.get(timeout=30))

        self.pubsubclient.deactivate_subscription(
            subscription_id=salinity_subscription_id)

        subscriber.stop()

        return results

    def _validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:

            try:
                psd = PointSupplementStreamParser(
                    stream_definition=self.ctd_stream_def,
                    stream_granule=message)
                temp = psd.get_values('temperature')
                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info('temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                psd = PointSupplementStreamParser(
                    stream_definition=SalinityTransform.outgoing_stream_def,
                    stream_granule=message)
                log.info(psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = psd.get_values('salinity')
                log.info('salinity=' + str(numpy.nanmin(salinity)))

                assertions(isinstance(salinity, numpy.ndarray))

                assertions(numpy.nanmin(salinity) >
                           0.0)  # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(
                        len(first_salinity_values) == len(
                            second_salinity_values))
                    for idx in range(0, len(first_salinity_values)):
                        assertions(first_salinity_values[idx] *
                                   2.0 == second_salinity_values[idx])

    def _create_salinity_data_process_definition(self):

        # Salinity: Data Process Definition

        #First look to see if it exists and if not, then create it
        dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition,
                                              name='ctd_salinity')
        if len(dpd) > 0:
            return dpd[0]

        log.debug("Create data process definition SalinityTransform")
        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='ctd_salinity',
            description='create a salinity data product',
            module='ion.processes.data.transforms.ctd.ctd_L2_salinity',
            class_name='SalinityTransform',
            process_source='SalinityTransform source code here...')
        try:
            ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(
                dpd_obj)
        except Excpetion as ex:
            self.fail(
                "failed to create new SalinityTransform data process definition: %s"
                % ex)

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = self.pubsubclient.create_stream_definition(
            container=SalinityTransform.outgoing_stream_def, name='Salinity')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            sal_stream_def_id, ctd_L2_salinity_dprocdef_id)

        return ctd_L2_salinity_dprocdef_id

    def _create_salinity_doubler_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition,
                                              name='salinity_doubler')
        if len(dpd) > 0:
            return dpd[0]

        # Salinity Doubler: Data Process Definition
        log.debug("Create data process definition SalinityDoublerTransform")
        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='salinity_doubler',
            description='create a salinity doubler data product',
            module='ion.processes.data.transforms.example_double_salinity',
            class_name='SalinityDoubler',
            process_source='SalinityDoubler source code here...')
        try:
            salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(
                dpd_obj)
        except Exception as ex:
            self.fail(
                "failed to create new SalinityDoubler data process definition: %s"
                % ex)

        # create a stream definition for the data from the salinity Transform
        salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(
            container=SalinityDoubler.outgoing_stream_def,
            name='SalinityDoubler')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            salinity_double_stream_def_id, salinity_doubler_dprocdef_id)

        return salinity_doubler_dprocdef_id

    def create_transform_process(self, data_process_definition_id,
                                 data_process_input_dp_id):

        data_process_definition = self.rrclient.read(
            data_process_definition_id)

        # Find the link between the output Stream Definition resource and the Data Process Definition resource
        stream_ids, _ = self.rrclient.find_objects(data_process_definition._id,
                                                   PRED.hasStreamDefinition,
                                                   RT.StreamDefinition,
                                                   id_only=True)
        if not stream_ids:
            raise Inconsistent(
                "The data process definition %s is missing an association to an output stream definition"
                % data_process_definition._id)
        process_output_stream_def_id = stream_ids[0]

        #Concatenate the name of the workflow and data process definition for the name of the data product output
        data_process_name = data_process_definition.name

        # Create the output data product of the transform
        transform_dp_obj = IonObject(
            RT.DataProduct,
            name=data_process_name,
            description=data_process_definition.description)
        transform_dp_id = self.dataproductclient.create_data_product(
            transform_dp_obj, process_output_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(
            data_product_id=transform_dp_id,
            persist_data=True,
            persist_metadata=True)

        #last one out of the for loop is the output product id
        output_data_product_id = transform_dp_id

        # Create the  transform data process
        log.debug("create data_process and start it")
        data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition._id, [data_process_input_dp_id],
            {'output': transform_dp_id})
        self.dataprocessclient.activate_data_process(data_process_id)

        #Find the id of the output data stream
        stream_ids, _ = self.rrclient.find_objects(transform_dp_id,
                                                   PRED.hasStream, None, True)
        if not stream_ids:
            raise Inconsistent(
                "The data process %s is missing an association to an output stream"
                % data_process_id)

        return data_process_id, output_data_product_id

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_SA_transform_components(self):

        assertions = self.assertTrue

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        ###
        ###  Setup the first transformation
        ###

        # Salinity: Data Process Definition
        ctd_L2_salinity_dprocdef_id = self._create_salinity_data_process_definition(
        )

        l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process(
            ctd_L2_salinity_dprocdef_id, ctd_parsed_data_product_id)

        ## get the stream id for the transform outputs
        stream_ids, _ = self.rrclient.find_objects(
            ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0)
        sal_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_stream_id)

        ###
        ###  Setup the second transformation
        ###

        # Salinity Doubler: Data Process Definition
        salinity_doubler_dprocdef_id = self._create_salinity_doubler_data_process_definition(
        )

        salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process(
            salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id)

        stream_ids, _ = self.rrclient.find_objects(
            salinity_doubler_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0)
        sal_dbl_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_dbl_stream_id)

        #Start the input stream process
        ctd_sim_pid = self._start_simple_input_stream_process(ctd_stream_id)

        #Start te output stream listener to monitor and verify messages
        results = self._start_output_stream_listener(data_product_stream_ids)

        #Stop the transform processes
        self.dataprocessclient.deactivate_data_process(
            salinity_double_data_process_id)
        self.dataprocessclient.deactivate_data_process(
            l2_salinity_all_data_process_id)

        # stop the flow parse the messages...
        self.process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        #Validate the data from each of the messages along the way
        self._validate_messages(results)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='Salinity_Test_Workflow',
            description='tests a workflow of multiple transform data processes'
        )

        workflow_data_product_name = 'TEST-Workflow_Output_Product'  #Set a specific output product name

        #Add a transformation process definition
        ctd_L2_salinity_dprocdef_id = self._create_salinity_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=ctd_L2_salinity_dprocdef_id,
            persist_process_output_data=False
        )  #Don't persist the intermediate data product
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Add a transformation process definition
        salinity_doubler_dprocdef_id = self._create_salinity_doubler_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=salinity_doubler_dprocdef_id,
            output_data_product_name=workflow_data_product_name)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        aids = self.rrclient.find_associations(workflow_def_id,
                                               PRED.hasDataProcessDefinition)
        assertions(len(aids) == 2)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id, timeout=30)

        workflow_output_ids, _ = self.rrclient.find_subjects(
            RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1)

        #Verify the output data product name matches what was specified in the workflow definition
        workflow_product = self.rrclient.read(workflow_product_id)
        assertions(workflow_product.name == workflow_data_product_name)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids, _ = self.rrclient.find_objects(
            workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 2)

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                       None, True)
            assertions(len(stream_ids) == 1)
            data_product_stream_ids.append(stream_ids[0])

        #Start the input stream process
        ctd_sim_pid = self._start_simple_input_stream_process(ctd_stream_id)

        #Start the output stream listener to monitor and verify messages
        results = self._start_output_stream_listener(data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id, False, timeout=15)  # Should test true at some point

        #Make sure the Workflow object was removed
        objs, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(objs) == 0)

        # stop the flow parse the messages...
        self.process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        #Validate the data from each of the messages along the way
        self._validate_messages(results)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids, _ = self.rrclient.find_resources(
            restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0)

    def _create_google_dt_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition,
                                              name='google_dt_transform')
        if len(dpd) > 0:
            return dpd[0]

        # Data Process Definition
        log.debug("Create data process definition GoogleDtTransform")
        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='google_dt_transform',
            description='Convert data streams to Google DataTables',
            module='ion.processes.data.transforms.viz.google_dt',
            class_name='VizTransformGoogleDT',
            process_source='VizTransformGoogleDT source code here...')
        try:
            procdef_id = self.dataprocessclient.create_data_process_definition(
                dpd_obj)
        except Exception as ex:
            self.fail(
                "failed to create new VizTransformGoogleDT data process definition: %s"
                % ex)

        # create a stream definition for the data from the
        stream_def_id = self.pubsubclient.create_stream_definition(
            container=VizTransformGoogleDT.outgoing_stream_def,
            name='VizTransformGoogleDT')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            stream_def_id, procdef_id)

        return procdef_id

    def _validate_google_dt_results(self, results_stream_def, results):

        cc = self.container
        assertions = self.assertTrue

        for g in results:

            if isinstance(g, Granule):

                tx = TaxyTool.load_from_granule(g)
                rdt = RecordDictionaryTool.load_from_granule(g)
                #log.warn(tx.pretty_print())
                #log.warn(rdt.pretty_print())

                gdt_component = rdt['google_dt_components'][0]

                assertions(
                    gdt_component['viz_product_type'] == 'google_realtime_dt')
                gdt_description = gdt_component['data_table_description']
                gdt_content = gdt_component['data_table_content']

                assertions(gdt_description[0][0] == 'time')
                assertions(len(gdt_description) > 1)
                assertions(len(gdt_content) >= 0)

        return

    def _create_mpl_graphs_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd, _ = self.rrclient.find_resources(restype=RT.DataProcessDefinition,
                                              name='mpl_graphs_transform')
        if len(dpd) > 0:
            return dpd[0]

        #Data Process Definition
        log.debug("Create data process definition MatplotlibGraphsTransform")
        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='mpl_graphs_transform',
            description='Convert data streams to Matplotlib graphs',
            module='ion.processes.data.transforms.viz.matplotlib_graphs',
            class_name='VizTransformMatplotlibGraphs',
            process_source='VizTransformMatplotlibGraphs source code here...')
        try:
            procdef_id = self.dataprocessclient.create_data_process_definition(
                dpd_obj)
        except Exception as ex:
            self.fail(
                "failed to create new VizTransformMatplotlibGraphs data process definition: %s"
                % ex)

        # create a stream definition for the data
        stream_def_id = self.pubsubclient.create_stream_definition(
            container=VizTransformMatplotlibGraphs.outgoing_stream_def,
            name='VizTransformMatplotlibGraphs')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            stream_def_id, procdef_id)

        return procdef_id

    def _validate_mpl_graphs_results(self, results_stream_def, results):

        cc = self.container
        assertions = self.assertTrue

        for g in results:
            if isinstance(g, Granule):

                tx = TaxyTool.load_from_granule(g)
                rdt = RecordDictionaryTool.load_from_granule(g)
                #log.warn(tx.pretty_print())
                #log.warn(rdt.pretty_print())

                graphs = rdt['matplotlib_graphs']

                for graph in graphs:
                    assertions(
                        graph['viz_product_type'] == 'matplotlib_graphs')
                    # check to see if the list (numpy array) contians actual images
                    assertions(
                        imghdr.what(graph['image_name'], graph['image_obj']) ==
                        'png')

        return

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    #unittest.skip("Skipping for debugging ")
    def test_google_dt_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='GoogleDT_Test_Workflow',
            description=
            'Tests the workflow of converting stream data to Google DT')

        #Add a transformation process definition
        google_dt_procdef_id = self._create_google_dt_data_process_definition()
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=google_dt_procdef_id)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id)

        workflow_output_ids, _ = self.rrclient.find_subjects(
            RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids, _ = self.rrclient.find_objects(
            workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1)

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                       None, True)
            assertions(len(stream_ids) == 1)
            data_product_stream_ids.append(stream_ids[0])

        #Start the input stream process
        ctd_sim_pid = self._start_simple_input_stream_process(ctd_stream_id)

        #Start the output stream listener to monitor and verify messages
        results = self._start_output_stream_listener(data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id, False)  # Should test true at some point

        # stop the flow parse the messages...
        self.process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        #Validate the data from each of the messages along the way
        self._validate_google_dt_results(
            VizTransformGoogleDT.outgoing_stream_def, results)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids, _ = self.rrclient.find_resources(
            restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_mpl_graphs_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='Mpl_Graphs_Test_Workflow',
            description=
            'Tests the workflow of converting stream data to Matplotlib graphs'
        )

        #Add a transformation process definition
        mpl_graphs_procdef_id = self._create_mpl_graphs_data_process_definition(
        )
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=mpl_graphs_procdef_id)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self._create_ctd_input_stream_and_data_product(
        )
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id)

        workflow_output_ids, _ = self.rrclient.find_subjects(
            RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids, _ = self.rrclient.find_objects(
            workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1)

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                       None, True)
            assertions(len(stream_ids) == 1)
            data_product_stream_ids.append(stream_ids[0])

        #Start the input stream process
        ctd_sim_pid = self._start_sinusoidal_input_stream_process(
            ctd_stream_id)

        #Start the output stream listener to monitor and verify messages
        results = self._start_output_stream_listener(data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id, False)  # Should test true at some point

        # stop the flow parse the messages...
        self.process_dispatcher.cancel_process(
            ctd_sim_pid
        )  # kill the ctd simulator process - that is enough data

        #Validate the data from each of the messages along the way
        self._validate_mpl_graphs_results(
            VizTransformGoogleDT.outgoing_stream_def, results)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids, _ = self.rrclient.find_resources(
            restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_multiple_workflow_instances(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(
            RT.WorkflowDefinition,
            name='Multiple_Test_Workflow',
            description='Tests the workflow of converting stream data')

        #Add a transformation process definition
        google_dt_procdef_id = self._create_google_dt_data_process_definition()
        workflow_step_obj = IonObject(
            'DataProcessWorkflowStep',
            data_process_definition_id=google_dt_procdef_id)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(
            workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the first input data product
        ctd_stream_id1, ctd_parsed_data_product_id1 = self._create_ctd_input_stream_and_data_product(
            'ctd_parsed1')
        data_product_stream_ids.append(ctd_stream_id1)

        #Create and start the first workflow
        workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id1)

        #Create the second input data product
        ctd_stream_id2, ctd_parsed_data_product_id2 = self._create_ctd_input_stream_and_data_product(
            'ctd_parsed2')
        data_product_stream_ids.append(ctd_stream_id2)

        #Create and start the first workflow
        workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow(
            workflow_def_id, ctd_parsed_data_product_id2)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 2)

        #Start the first input stream process
        ctd_sim_pid1 = self._start_sinusoidal_input_stream_process(
            ctd_stream_id1)

        #Start the second input stream process
        ctd_sim_pid2 = self._start_simple_input_stream_process(ctd_stream_id2)

        #Start the output stream listener to monitor a set number of messages being sent through the workflows
        results = self._start_output_stream_listener(
            data_product_stream_ids, message_count_per_stream=5)

        # stop the flow of messages...
        self.process_dispatcher.cancel_process(
            ctd_sim_pid1
        )  # kill the ctd simulator process - that is enough data
        self.process_dispatcher.cancel_process(ctd_sim_pid2)

        #Stop the first workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id1, False)  # Should test true at some point

        #Stop the second workflow processes
        self.workflowclient.terminate_data_process_workflow(
            workflow_id2, False)  # Should test true at some point

        workflow_ids, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 0)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids, _ = self.rrclient.find_resources(
            restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0)

        aid_list = self.rrclient.find_associations(
            workflow_def_id, PRED.hasDataProcessDefinition)
        assertions(len(aid_list) == 0)
Exemplo n.º 31
0
class TestTransformPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url(
            'res/deploy/r2deploy.yml')  # Because hey why not?!

        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

    def setup_streams(self):
        in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'sbe37_L0_test', id_only=True)
        out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'sbe37_L1_test', id_only=True)

        in_stream_def_id = self.pubsub_management.create_stream_definition(
            'L0 SBE37', parameter_dictionary_id=in_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        in_stream_def_id)
        out_stream_def_id = self.pubsub_management.create_stream_definition(
            'L1 SBE37', parameter_dictionary_id=out_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream(
            'L0 input',
            stream_definition_id=in_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)
        out_stream_id, out_route = self.pubsub_management.create_stream(
            'L0 output',
            stream_definition_id=out_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def setup_advanced_streams(self):
        in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'sbe37_LC_TEST', id_only=True)
        in_stream_def_id = self.pubsub_management.create_stream_definition(
            'sbe37_instrument',
            parameter_dictionary_id=in_pdict_id,
            available_fields=[
                'time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon'
            ])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        in_stream_def_id)

        out_stream_def_id = self.pubsub_management.create_stream_definition(
            'sbe37_l2',
            parameter_dictionary_id=out_pdict_id,
            available_fields=['time', 'rho', 'PRACSAL_L2'])
        self.addCleanup(self.pubsub_management.delete_stream_definition,
                        out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream(
            'instrument stream',
            stream_definition_id=in_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)

        out_stream_id, out_route = self.pubsub_management.create_stream(
            'data product stream',
            stream_definition_id=out_stream_def_id,
            exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def preload(self):
        config = DotDict()
        config.op = 'load'
        config.scenario = 'BASE,LC_TEST'
        config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary'
        config.path = 'res/preload/r2_ioc'

        self.container.spawn_process('preload',
                                     'ion.processes.bootstrap.ion_loader',
                                     'IONLoader', config)

    def setup_advanced_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_advanced_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)] = None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id: out_stream_id}

        sub_id = self.pubsub_management.create_subscription(
            queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process(
            'transform_prime', 'ion.processes.data.transforms.transform_prime',
            'TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription(
            'listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription,
                        listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription,
                        listen_sub_id)
        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def setup_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)] = None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id: out_stream_id}

        sub_id = self.pubsub_management.create_subscription(
            queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process(
            'transform_prime', 'ion.processes.data.transforms.transform_prime',
            'TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription(
            'listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription,
                        listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription,
                        listen_sub_id)
        return [(in_stream_id, in_stream_def_id),
                (out_stream_id, out_stream_def_id)]

    def setup_validator(self, validator):
        listener = StandaloneStreamSubscriber('listener', validator)
        listener.start()
        self.addCleanup(listener.stop)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]

        validation_event = Event()

        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(
            stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Exemplo n.º 32
0
class PubsubManagementIntTest(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        self.pubsub_management       = PubsubManagementServiceClient()
        self.resource_registry       = ResourceRegistryServiceClient()
        self.dataset_management      = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self.pdicts = {}
        self.queue_cleanup = list()
        self.exchange_cleanup = list()
        self.context_ids = set()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

        self.cleanup_contexts()
    
    def test_stream_def_crud(self):

        # Test Creation
        pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict')
        stream_definition_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict.identifier)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_definition_id)

        # Make sure there is an assoc
        self.assertTrue(self.resource_registry.find_associations(subject=stream_definition_id, predicate=PRED.hasParameterDictionary, object=pdict.identifier, id_only=True))

        # Test Reading
        stream_definition = self.pubsub_management.read_stream_definition(stream_definition_id)
        self.assertTrue(PubsubManagementService._compare_pdicts(pdict.dump(), stream_definition.parameter_dictionary))


        # Test comparisons
        in_stream_definition_id = self.pubsub_management.create_stream_definition('L0 products', parameter_dictionary_id=pdict.identifier, available_fields=['time','temp','conductivity','pressure'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_definition_id)

        out_stream_definition_id = in_stream_definition_id
        self.assertTrue(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))
        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

        out_stream_definition_id = self.pubsub_management.create_stream_definition('L2 Products', parameter_dictionary_id=pdict.identifier, available_fields=['time','salinity','density'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_definition_id)
        self.assertFalse(self.pubsub_management.compare_stream_definition(in_stream_definition_id, out_stream_definition_id))

        self.assertTrue(self.pubsub_management.compatible_stream_definitions(in_stream_definition_id, out_stream_definition_id))

    @unittest.skip('Needs to be refactored for cleanup')
    def test_validate_stream_defs(self):
        self.addCleanup(self.cleanup_contexts)
        #test no input 
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = []
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_0', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_0', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
    
        #test input with no output
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = []
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_1', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_1', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test available field missing parameter context definition -- missing PRESWAT_L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_2', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_2', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)

        #test l1 from l0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_3', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_3', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)

        #test l2 from l0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1', 'DENSITY', 'PRACSAL'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_4', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_4', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test Ln from L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_5', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_5', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test L2 from L1
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        outgoing_pdict_id = self._get_pdict(['DENSITY','PRACSAL','TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_6', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_6', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertTrue(result)
        
        #test L1 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(['TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_7', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_7', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
        
        #test L2 from L0 missing L0
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL', 'TEMPWAT_L1', 'CONDWAT_L1', 'PRESWAT_L1'])
        available_fields_in = ['TIME', 'LAT', 'LON']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_8', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_8', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
        
        #test L2 from L0 missing L1
        incoming_pdict_id = self._get_pdict(['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0'])
        outgoing_pdict_id = self._get_pdict(['DENSITY', 'PRACSAL'])
        available_fields_in = ['TIME', 'LAT', 'LON', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0']
        available_fields_out = ['DENSITY', 'PRACSAL']
        incoming_stream_def_id = self.pubsub_management.create_stream_definition('in_sd_9', parameter_dictionary_id=incoming_pdict_id, available_fields=available_fields_in)
        self.addCleanup(self.pubsub_management.delete_stream_definition, incoming_stream_def_id)
        outgoing_stream_def_id = self.pubsub_management.create_stream_definition('out_sd_9', parameter_dictionary_id=outgoing_pdict_id, available_fields=available_fields_out)
        self.addCleanup(self.pubsub_management.delete_stream_definition, outgoing_stream_def_id)
        result = self.pubsub_management.validate_stream_defs(incoming_stream_def_id, outgoing_stream_def_id)
        self.assertFalse(result)
    
    def publish_on_stream(self, stream_id, msg):
        stream = self.pubsub_management.read_stream(stream_id)
        stream_route = stream.stream_route
        publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route)
        publisher.publish(msg)

    def test_stream_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream')
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_exchange')
        self.addCleanup(self.pubsub_management.delete_topic, topic_id)
        self.exchange_cleanup.append('test_exchange')
        topic2_id = self.pubsub_management.create_topic(name='another_topic', exchange_point='outside')
        self.addCleanup(self.pubsub_management.delete_topic, topic2_id)
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', topic_ids=[topic_id, topic2_id], exchange_point='test_exchange', stream_definition_id=stream_def_id)

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertEquals(topics,[topic_id])

        defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
        self.assertTrue(len(defs))

        stream = self.pubsub_management.read_stream(stream_id)
        self.assertEquals(stream.name,'test_stream')
        self.pubsub_management.delete_stream(stream_id)
        
        with self.assertRaises(NotFound):
            self.pubsub_management.read_stream(stream_id)

        defs, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
        self.assertFalse(len(defs))

        topics, assocs = self.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasTopic, id_only=True)
        self.assertFalse(len(topics))



    def test_data_product_subscription(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        tdom, sdom = time_series_domain()
        dp = DataProduct(name='ctd parsed')
        dp.spatial_domain = sdom.dump()
        dp.temporal_domain = tdom.dump()

        data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id)
        self.addCleanup(self.data_product_management.delete_data_product, data_product_id)

        subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription_id)

        validated = Event()
        def validation(msg, route, stream_id):
            validated.set()

        stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        dp_stream_id = stream_ids.pop()

        validator = StandaloneStreamSubscriber('validator', callback=validation)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_management.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id)

        route = self.pubsub_management.read_stream_route(dp_stream_id)

        publisher = StandaloneStreamPublisher(dp_stream_id, route)
        publisher.publish('hi')
        self.assertTrue(validated.wait(10))
            

    def test_subscription_crud(self):
        stream_def_id = self.pubsub_management.create_stream_definition('test_definition', stream_type='stream')
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_exchange', stream_definition_id=stream_def_id)
        subscription_id = self.pubsub_management.create_subscription(name='test subscription', stream_ids=[stream_id], exchange_name='test_queue')
        self.exchange_cleanup.append('test_exchange')

        subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True)
        self.assertEquals(subs,[stream_id])

        res, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='test_queue', id_only=True)
        self.assertEquals(len(res),1)

        subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(subs[0], res[0])

        subscription = self.pubsub_management.read_subscription(subscription_id)
        self.assertEquals(subscription.exchange_name, 'test_queue')

        self.pubsub_management.delete_subscription(subscription_id)
        
        subs, assocs = self.resource_registry.find_objects(subject=subscription_id,predicate=PRED.hasStream,id_only=True)
        self.assertFalse(len(subs))

        subs, assocs = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertFalse(len(subs))


        self.pubsub_management.delete_stream(stream_id)
        self.pubsub_management.delete_stream_definition(stream_def_id)

    def test_move_before_activate(self):
        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp')

        #--------------------------------------------------------------------------------
        # Test moving before activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id])

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)

        self.assertEquals(len(subjects),1)
        self.assertEquals(subjects[0], xn_ids[0])

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_move_activated_subscription(self):

        stream_id, route = self.pubsub_management.create_stream(name='test_stream', exchange_point='test_xp')
        #--------------------------------------------------------------------------------
        # Test moving after activate
        #--------------------------------------------------------------------------------

        subscription_id = self.pubsub_management.create_subscription('first_queue', stream_ids=[stream_id])
        self.pubsub_management.activate_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='first_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)
        self.assertEquals(xn_ids[0], subjects[0])

        self.verified = Event()

        def verify(m,r,s):
            self.assertEquals(m,'verified')
            self.verified.set()

        subscriber = StandaloneStreamSubscriber('second_queue', verify)
        subscriber.start()

        self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue')

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='second_queue', id_only=True)
        subjects, _ = self.resource_registry.find_subjects(object=subscription_id, predicate=PRED.hasSubscription, id_only=True)

        self.assertEquals(len(subjects),1)
        self.assertEquals(subjects[0], xn_ids[0])

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('verified')

        self.assertTrue(self.verified.wait(2))

        self.pubsub_management.deactivate_subscription(subscription_id)

        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

    def test_queue_cleanup(self):
        stream_id, route = self.pubsub_management.create_stream('test_stream','xp1')
        xn_objs, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        for xn_obj in xn_objs:
            xn = self.container.ex_manager.create_xn_queue(xn_obj.name)
            xn.delete()
        subscription_id = self.pubsub_management.create_subscription('queue1',stream_ids=[stream_id])
        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids),1)

        self.pubsub_management.delete_subscription(subscription_id)

        xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name='queue1')
        self.assertEquals(len(xn_ids),0)

    def test_activation_and_deactivation(self):
        stream_id, route = self.pubsub_management.create_stream('stream1','xp1')
        subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id])

        self.check1 = Event()

        def verifier(m,r,s):
            self.check1.set()


        subscriber = StandaloneStreamSubscriber('sub1',verifier)
        subscriber.start()

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish('should not receive')

        self.assertFalse(self.check1.wait(0.25))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        self.check1.clear()
        self.assertFalse(self.check1.is_set())

        self.pubsub_management.deactivate_subscription(subscription_id)

        publisher.publish('should not receive')
        self.assertFalse(self.check1.wait(0.5))

        self.pubsub_management.activate_subscription(subscription_id)

        publisher.publish('should receive')
        self.assertTrue(self.check1.wait(2))

        subscriber.stop()

        self.pubsub_management.deactivate_subscription(subscription_id)
        self.pubsub_management.delete_subscription(subscription_id)
        self.pubsub_management.delete_stream(stream_id)

        

    def test_topic_crud(self):

        topic_id = self.pubsub_management.create_topic(name='test_topic', exchange_point='test_xp')
        self.exchange_cleanup.append('test_xp')

        topic = self.pubsub_management.read_topic(topic_id)

        self.assertEquals(topic.name,'test_topic')
        self.assertEquals(topic.exchange_point, 'test_xp')

        self.pubsub_management.delete_topic(topic_id)
        with self.assertRaises(NotFound):
            self.pubsub_management.read_topic(topic_id)

    def test_full_pubsub(self):

        self.sub1_sat = Event()
        self.sub2_sat = Event()

        def subscriber1(m,r,s):
            self.sub1_sat.set()

        def subscriber2(m,r,s):
            self.sub2_sat.set()

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        sub1.start()
        self.addCleanup(sub1.stop)

        sub2 = StandaloneStreamSubscriber('sub2', subscriber2)
        sub2.start()
        self.addCleanup(sub2.stop)

        log_topic = self.pubsub_management.create_topic('instrument_logs', exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_topic, log_topic)
        science_topic = self.pubsub_management.create_topic('science_data', exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_topic, science_topic)
        events_topic = self.pubsub_management.create_topic('notifications', exchange_point='events')
        self.addCleanup(self.pubsub_management.delete_topic, events_topic)


        log_stream, route = self.pubsub_management.create_stream('instrument1-logs', topic_ids=[log_topic], exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_stream, log_stream)
        ctd_stream, route = self.pubsub_management.create_stream('instrument1-ctd', topic_ids=[science_topic], exchange_point='instruments')
        self.addCleanup(self.pubsub_management.delete_stream, ctd_stream)
        event_stream, route = self.pubsub_management.create_stream('notifications', topic_ids=[events_topic], exchange_point='events')
        self.addCleanup(self.pubsub_management.delete_stream, event_stream)
        raw_stream, route = self.pubsub_management.create_stream('temp', exchange_point='global.data')
        self.addCleanup(self.pubsub_management.delete_stream, raw_stream)


        subscription1 = self.pubsub_management.create_subscription('subscription1', stream_ids=[log_stream,event_stream], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription1)
        subscription2 = self.pubsub_management.create_subscription('subscription2', exchange_points=['global.data'], stream_ids=[ctd_stream], exchange_name='sub2')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription2)

        self.pubsub_management.activate_subscription(subscription1)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription1)
        self.pubsub_management.activate_subscription(subscription2)
        self.addCleanup(self.pubsub_management.deactivate_subscription, subscription2)

        self.publish_on_stream(log_stream, 1)
        self.assertTrue(self.sub1_sat.wait(4))
        self.assertFalse(self.sub2_sat.is_set())

        self.publish_on_stream(raw_stream,1)
        self.assertTrue(self.sub1_sat.wait(4))
    
    def test_topic_craziness(self):

        self.msg_queue = Queue()

        def subscriber1(m,r,s):
            self.msg_queue.put(m)

        sub1 = StandaloneStreamSubscriber('sub1', subscriber1)
        sub1.start()
        self.addCleanup(sub1.stop)

        topic1 = self.pubsub_management.create_topic('topic1', exchange_point='xp1')
        self.addCleanup(self.pubsub_management.delete_topic, topic1)
        topic2 = self.pubsub_management.create_topic('topic2', exchange_point='xp1', parent_topic_id=topic1)
        self.addCleanup(self.pubsub_management.delete_topic, topic2)
        topic3 = self.pubsub_management.create_topic('topic3', exchange_point='xp1', parent_topic_id=topic1)
        self.addCleanup(self.pubsub_management.delete_topic, topic3)
        topic4 = self.pubsub_management.create_topic('topic4', exchange_point='xp1', parent_topic_id=topic2)
        self.addCleanup(self.pubsub_management.delete_topic, topic4)
        topic5 = self.pubsub_management.create_topic('topic5', exchange_point='xp1', parent_topic_id=topic2)
        self.addCleanup(self.pubsub_management.delete_topic, topic5)
        topic6 = self.pubsub_management.create_topic('topic6', exchange_point='xp1', parent_topic_id=topic3)
        self.addCleanup(self.pubsub_management.delete_topic, topic6)
        topic7 = self.pubsub_management.create_topic('topic7', exchange_point='xp1', parent_topic_id=topic3)
        self.addCleanup(self.pubsub_management.delete_topic, topic7)

        # Tree 2
        topic8 = self.pubsub_management.create_topic('topic8', exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_topic, topic8)
        topic9 = self.pubsub_management.create_topic('topic9', exchange_point='xp2', parent_topic_id=topic8)
        self.addCleanup(self.pubsub_management.delete_topic, topic9)
        topic10 = self.pubsub_management.create_topic('topic10', exchange_point='xp2', parent_topic_id=topic9)
        self.addCleanup(self.pubsub_management.delete_topic, topic10)
        topic11 = self.pubsub_management.create_topic('topic11', exchange_point='xp2', parent_topic_id=topic9)
        self.addCleanup(self.pubsub_management.delete_topic, topic11)
        topic12 = self.pubsub_management.create_topic('topic12', exchange_point='xp2', parent_topic_id=topic11)
        self.addCleanup(self.pubsub_management.delete_topic, topic12)
        topic13 = self.pubsub_management.create_topic('topic13', exchange_point='xp2', parent_topic_id=topic11)
        self.addCleanup(self.pubsub_management.delete_topic, topic13)
        self.exchange_cleanup.extend(['xp1','xp2'])
        
        stream1_id, route = self.pubsub_management.create_stream('stream1', topic_ids=[topic7, topic4, topic5], exchange_point='xp1')
        self.addCleanup(self.pubsub_management.delete_stream, stream1_id)
        stream2_id, route = self.pubsub_management.create_stream('stream2', topic_ids=[topic8], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream2_id)
        stream3_id, route = self.pubsub_management.create_stream('stream3', topic_ids=[topic10,topic13], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream3_id)
        stream4_id, route = self.pubsub_management.create_stream('stream4', topic_ids=[topic9], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream4_id)
        stream5_id, route = self.pubsub_management.create_stream('stream5', topic_ids=[topic11], exchange_point='xp2')
        self.addCleanup(self.pubsub_management.delete_stream, stream5_id)

        subscription1 = self.pubsub_management.create_subscription('sub1', topic_ids=[topic1])
        self.addCleanup(self.pubsub_management.delete_subscription, subscription1)
        subscription2 = self.pubsub_management.create_subscription('sub2', topic_ids=[topic8], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription2)
        subscription3 = self.pubsub_management.create_subscription('sub3', topic_ids=[topic9], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription3)
        subscription4 = self.pubsub_management.create_subscription('sub4', topic_ids=[topic10,topic13, topic11], exchange_name='sub1')
        self.addCleanup(self.pubsub_management.delete_subscription, subscription4)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription1)

        self.publish_on_stream(stream1_id,1)

        self.assertEquals(self.msg_queue.get(timeout=10), 1)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)


        self.pubsub_management.deactivate_subscription(subscription1)
        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription2)
        
        self.publish_on_stream(stream2_id,2)
        self.assertEquals(self.msg_queue.get(timeout=10), 2)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.1)

        self.pubsub_management.deactivate_subscription(subscription2)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription3)

        self.publish_on_stream(stream2_id, 3)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream3_id, 4)
        self.assertEquals(self.msg_queue.get(timeout=10),4)


        self.pubsub_management.deactivate_subscription(subscription3)

        #--------------------------------------------------------------------------------
        self.pubsub_management.activate_subscription(subscription4)

        self.publish_on_stream(stream4_id, 5)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.publish_on_stream(stream5_id, 6)
        self.assertEquals(self.msg_queue.get(timeout=10),6)
        with self.assertRaises(Empty):
            self.msg_queue.get(timeout=0.3)

        self.pubsub_management.deactivate_subscription(subscription4)
        
        #--------------------------------------------------------------------------------
    
    def cleanup_contexts(self):
        for context_id in self.context_ids:
            self.dataset_management.delete_parameter_context(context_id)

    def add_context_to_cleanup(self, context_id):
        self.context_ids.add(context_id)

    def _get_pdict(self, filter_values):
        t_ctxt = ParameterContext('TIME', param_type=QuantityType(value_encoding=np.dtype('int64')))
        t_ctxt.uom = 'seconds since 01-01-1900'
        t_ctxt_id = self.dataset_management.create_parameter_context(name='TIME', parameter_context=t_ctxt.dump(), parameter_type='quantity<int64>', units=t_ctxt.uom)
        self.add_context_to_cleanup(t_ctxt_id)

        lat_ctxt = ParameterContext('LAT', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lat_ctxt.axis = AxisTypeEnum.LAT
        lat_ctxt.uom = 'degree_north'
        lat_ctxt_id = self.dataset_management.create_parameter_context(name='LAT', parameter_context=lat_ctxt.dump(), parameter_type='quantity<float32>', units=lat_ctxt.uom)
        self.add_context_to_cleanup(lat_ctxt_id)


        lon_ctxt = ParameterContext('LON', param_type=ConstantType(QuantityType(value_encoding=np.dtype('float32'))), fill_value=-9999)
        lon_ctxt.axis = AxisTypeEnum.LON
        lon_ctxt.uom = 'degree_east'
        lon_ctxt_id = self.dataset_management.create_parameter_context(name='LON', parameter_context=lon_ctxt.dump(), parameter_type='quantity<float32>', units=lon_ctxt.uom)
        self.add_context_to_cleanup(lon_ctxt_id)


        # Independent Parameters
         # Temperature - values expected to be the decimal results of conversion from hex
        temp_ctxt = ParameterContext('TEMPWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        temp_ctxt.uom = 'deg_C'
        temp_ctxt_id = self.dataset_management.create_parameter_context(name='TEMPWAT_L0', parameter_context=temp_ctxt.dump(), parameter_type='quantity<float32>', units=temp_ctxt.uom)
        self.add_context_to_cleanup(temp_ctxt_id)


        # Conductivity - values expected to be the decimal results of conversion from hex
        cond_ctxt = ParameterContext('CONDWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        cond_ctxt.uom = 'S m-1'
        cond_ctxt_id = self.dataset_management.create_parameter_context(name='CONDWAT_L0', parameter_context=cond_ctxt.dump(), parameter_type='quantity<float32>', units=cond_ctxt.uom)
        self.add_context_to_cleanup(cond_ctxt_id)


        # Pressure - values expected to be the decimal results of conversion from hex
        press_ctxt = ParameterContext('PRESWAT_L0', param_type=QuantityType(value_encoding=np.dtype('float32')), fill_value=-9999)
        press_ctxt.uom = 'dbar'
        press_ctxt_id = self.dataset_management.create_parameter_context(name='PRESWAT_L0', parameter_context=press_ctxt.dump(), parameter_type='quantity<float32>', units=press_ctxt.uom)
        self.add_context_to_cleanup(press_ctxt_id)


        # Dependent Parameters

        # TEMPWAT_L1 = (TEMPWAT_L0 / 10000) - 10
        tl1_func = '(T / 10000) - 10'
        tl1_pmap = {'T': 'TEMPWAT_L0'}
        expr = NumexprFunction('TEMPWAT_L1', tl1_func, ['T'], param_map=tl1_pmap)
        tempL1_ctxt = ParameterContext('TEMPWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        tempL1_ctxt.uom = 'deg_C'
        tempL1_ctxt_id = self.dataset_management.create_parameter_context(name=tempL1_ctxt.name, parameter_context=tempL1_ctxt.dump(), parameter_type='pfunc', units=tempL1_ctxt.uom)
        self.add_context_to_cleanup(tempL1_ctxt_id)


        # CONDWAT_L1 = (CONDWAT_L0 / 100000) - 0.5
        cl1_func = '(C / 100000) - 0.5'
        cl1_pmap = {'C': 'CONDWAT_L0'}
        expr = NumexprFunction('CONDWAT_L1', cl1_func, ['C'], param_map=cl1_pmap)
        condL1_ctxt = ParameterContext('CONDWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        condL1_ctxt.uom = 'S m-1'
        condL1_ctxt_id = self.dataset_management.create_parameter_context(name=condL1_ctxt.name, parameter_context=condL1_ctxt.dump(), parameter_type='pfunc', units=condL1_ctxt.uom)
        self.add_context_to_cleanup(condL1_ctxt_id)


        # Equation uses p_range, which is a calibration coefficient - Fixing to 679.34040721
        #   PRESWAT_L1 = (PRESWAT_L0 * p_range / (0.85 * 65536)) - (0.05 * p_range)
        pl1_func = '(P * p_range / (0.85 * 65536)) - (0.05 * p_range)'
        pl1_pmap = {'P': 'PRESWAT_L0', 'p_range': 679.34040721}
        expr = NumexprFunction('PRESWAT_L1', pl1_func, ['P', 'p_range'], param_map=pl1_pmap)
        presL1_ctxt = ParameterContext('PRESWAT_L1', param_type=ParameterFunctionType(function=expr), variability=VariabilityEnum.TEMPORAL)
        presL1_ctxt.uom = 'S m-1'
        presL1_ctxt_id = self.dataset_management.create_parameter_context(name=presL1_ctxt.name, parameter_context=presL1_ctxt.dump(), parameter_type='pfunc', units=presL1_ctxt.uom)
        self.add_context_to_cleanup(presL1_ctxt_id)


        # Density & practical salinity calucluated using the Gibbs Seawater library - available via python-gsw project:
        #       https://code.google.com/p/python-gsw/ & http://pypi.python.org/pypi/gsw/3.0.1

        # PRACSAL = gsw.SP_from_C((CONDWAT_L1 * 10), TEMPWAT_L1, PRESWAT_L1)
        owner = 'gsw'
        sal_func = 'SP_from_C'
        sal_arglist = ['C', 't', 'p']
        sal_pmap = {'C': NumexprFunction('CONDWAT_L1*10', 'C*10', ['C'], param_map={'C': 'CONDWAT_L1'}), 't': 'TEMPWAT_L1', 'p': 'PRESWAT_L1'}
        sal_kwargmap = None
        expr = PythonFunction('PRACSAL', owner, sal_func, sal_arglist, sal_kwargmap, sal_pmap)
        sal_ctxt = ParameterContext('PRACSAL', param_type=ParameterFunctionType(expr), variability=VariabilityEnum.TEMPORAL)
        sal_ctxt.uom = 'g kg-1'
        sal_ctxt_id = self.dataset_management.create_parameter_context(name=sal_ctxt.name, parameter_context=sal_ctxt.dump(), parameter_type='pfunc', units=sal_ctxt.uom)
        self.add_context_to_cleanup(sal_ctxt_id)


        # absolute_salinity = gsw.SA_from_SP(PRACSAL, PRESWAT_L1, longitude, latitude)
        # conservative_temperature = gsw.CT_from_t(absolute_salinity, TEMPWAT_L1, PRESWAT_L1)
        # DENSITY = gsw.rho(absolute_salinity, conservative_temperature, PRESWAT_L1)
        owner = 'gsw'
        abs_sal_expr = PythonFunction('abs_sal', owner, 'SA_from_SP', ['PRACSAL', 'PRESWAT_L1', 'LON','LAT'])
        cons_temp_expr = PythonFunction('cons_temp', owner, 'CT_from_t', [abs_sal_expr, 'TEMPWAT_L1', 'PRESWAT_L1'])
        dens_expr = PythonFunction('DENSITY', owner, 'rho', [abs_sal_expr, cons_temp_expr, 'PRESWAT_L1'])
        dens_ctxt = ParameterContext('DENSITY', param_type=ParameterFunctionType(dens_expr), variability=VariabilityEnum.TEMPORAL)
        dens_ctxt.uom = 'kg m-3'
        dens_ctxt_id = self.dataset_management.create_parameter_context(name=dens_ctxt.name, parameter_context=dens_ctxt.dump(), parameter_type='pfunc', units=dens_ctxt.uom)
        self.add_context_to_cleanup(dens_ctxt_id)

        
        ids = [t_ctxt_id, lat_ctxt_id, lon_ctxt_id, temp_ctxt_id, cond_ctxt_id, press_ctxt_id, tempL1_ctxt_id, condL1_ctxt_id, presL1_ctxt_id, sal_ctxt_id, dens_ctxt_id]
        contexts = [t_ctxt, lat_ctxt, lon_ctxt, temp_ctxt, cond_ctxt, press_ctxt, tempL1_ctxt, condL1_ctxt, presL1_ctxt, sal_ctxt, dens_ctxt]
        context_ids = [ids[i] for i,ctxt in enumerate(contexts) if ctxt.name in filter_values]
        pdict_name = '_'.join([ctxt.name for ctxt in contexts if ctxt.name in filter_values])

        try:
            self.pdicts[pdict_name]
            return self.pdicts[pdict_name]
        except KeyError:
            pdict_id = self.dataset_management.create_parameter_dictionary(pdict_name, parameter_context_ids=context_ids, temporal_context='time')
            self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)
            self.pdicts[pdict_name] = pdict_id
            return pdict_id
class TestVisualizationServiceIntegration(VisualizationIntegrationTestHelper):

    def setUp(self):
        # Start container

        logging.disable(logging.ERROR)
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        logging.disable(logging.NOTSET)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.workflowclient = WorkflowManagementServiceClient(node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node)
        self.vis_client = VisualizationServiceClient(node=self.container.node)

        self.ctd_stream_def = SBE37_CDM_stream_definition()

    def validate_messages(self, msgs):

        cc = self.container
        assertions = self.assertTrue

        rdt = RecordDictionaryTool.load_from_granule(msgs.body)

        vardict = {}
        vardict['temp'] = get_safe(rdt, 'temp')
        vardict['time'] = get_safe(rdt, 'time')
        print vardict['time']
        print vardict['temp']




    @attr('LOCOINT')
    #@patch.dict('pyon.ion.exchange.CFG', {'container':{'exchange':{'auto_register': False}}})
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    @unittest.skip("in progress")
    def test_visualization_queue(self):

        assertions = self.assertTrue

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        user_queue_name = 'user_queue'

        xq = self.container.ex_manager.create_xn_queue(user_queue_name)

        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery(data_product_stream_ids),
            exchange_name = user_queue_name,
            exchange_point = 'science_data',
            name = "user visualization queue"
        )


        subscriber_registrar = StreamSubscriberRegistrar(container=self.container)

        #subscriber = subscriber_registrar.create_subscriber(exchange_name=user_queue_name)
        #subscriber.start()

        #Using endpoint Subscriber directly; but should be a Stream-based subscriber that does nto require a process
        #subscriber = Subscriber(from_name=(subscriber_registrar.XP, user_queue_name), callback=cb)
        subscriber = Subscriber(from_name=xq)
        subscriber.initialize()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id)

        #Start the output stream listener to monitor and collect messages
        #results = self.start_output_stream_and_listen(None, data_product_stream_ids)

        #Not sure why this is needed - but it is
        #subscriber._chan.stop_consume()

        ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id)

        gevent.sleep(10.0)  # Send some messages - don't care how many


        msg_count,_ = xq.get_stats()
        log.info('Messages in user queue 1: %s ' % msg_count)

        #Validate the data from each of the messages along the way
        #self.validate_messages(results)

#        for x in range(msg_count):
#            mo = subscriber.get_one_msg(timeout=1)
#            print mo.body
#            mo.ack()

        msgs = subscriber.get_all_msgs(timeout=2)
        for x in range(len(msgs)):
            msgs[x].ack()
            self.validate_messages(msgs[x])
           # print msgs[x].body



        #Should be zero after pulling all of the messages.
        msg_count,_ = xq.get_stats()
        log.info('Messages in user queue 2: %s ' % msg_count)


        #Trying to continue to receive messages in the queue
        gevent.sleep(5.0)  # Send some messages - don't care how many


        #Turning off after everything - since it is more representative of an always on stream of data!
        self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data



        #Should see more messages in the queue
        msg_count,_ = xq.get_stats()
        log.info('Messages in user queue 3: %s ' % msg_count)

        msgs = subscriber.get_all_msgs(timeout=2)
        for x in range(len(msgs)):
            msgs[x].ack()
            self.validate_messages(msgs[x])

        #Should be zero after pulling all of the messages.
        msg_count,_ = xq.get_stats()
        log.info('Messages in user queue 4: %s ' % msg_count)

        subscriber.close()

    #@unittest.skip("in progress")
    def test_realtime_visualization(self):
        assertions = self.assertTrue


        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='GoogleDT_Test_Workflow',description='Tests the workflow of converting stream data to Google DT')

        #Add a transformation process definition
        google_dt_procdef_id = self.create_google_dt_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=True)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)


        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=20)


        ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id)


        #TODO - Need to add workflow creation for google data table

        vis_params ={}
        vis_params['in_product_type'] = 'google_dt'
        vis_token = self.vis_client.initiate_realtime_visualization(data_product_id=workflow_product_id, visualization_parameters=vis_params)

        #Trying to continue to receive messages in the queue
        gevent.sleep(10.0)  # Send some messages - don't care how many

        #TODO - find out what the actual return data type should be
        vis_data = self.vis_client.get_realtime_visualization_data(vis_token)

        print vis_data

        #Trying to continue to receive messages in the queue
        gevent.sleep(5.0)  # Send some messages - don't care how many


        #Turning off after everything - since it is more representative of an always on stream of data!
        #todo remove the try except
        try:
            self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data
        except:
            log.warning("cancelling process did not work")
        vis_data = self.vis_client.get_realtime_visualization_data(vis_token)

        print vis_data

        self.vis_client.terminate_realtime_visualization_data(vis_token)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id, False)  # Should test true at some point

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)


    def test_google_dt_overview_visualization(self):

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id)

        # Generate some data for a few seconds
        gevent.sleep(5.0)

        #Turning off after everything - since it is more representative of an always on stream of data!
        self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data

        # Use the data product to test the data retrieval and google dt generation capability of the vis service
        vis_data = self.vis_client.get_visualization_data(ctd_parsed_data_product_id)

        # validate the returned data
        self.validate_vis_service_google_dt_results(vis_data)


    def test_mpl_graphs_overview_visualization(self):

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        ctd_sim_pid = self.start_sinusoidal_input_stream_process(ctd_stream_id)

        # Generate some data for a few seconds
        gevent.sleep(5.0)

        #Turning off after everything - since it is more representative of an always on stream of data!
        self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data

        # Use the data product to test the data retrieval and google dt generation capability of the vis service
        vis_data = self.vis_client.get_visualization_image(ctd_parsed_data_product_id)

        # validate the returned data
        self.validate_vis_service_mpl_graphs_results(vis_data)

        return
Exemplo n.º 34
0
    def test_usgs_integration(self):
        '''
        test_usgs_integration
        Test full DM Services Integration using usgs
        '''
        cc = self.container
        assertions = self.assertTrue

        #-----------------------------
        # Copy below here
        #-----------------------------
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        process_list = []
        datasets = []

        datastore_name = 'test_usgs_integration'


        #---------------------------
        # Set up ingestion
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=8
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)

        usgs_stream_def = USGS_stream_definition()

        stream_def_id = pubsub_management_service.create_stream_definition(container=usgs_stream_def, name='Junk definition')


        #---------------------------
        # Set up the producers (CTD Simulators)
        #---------------------------
        # Launch five simulated CTD producers
        for iteration in xrange(2):
            # Make a stream to output on

            stream_id = pubsub_management_service.create_stream(stream_definition_id=stream_def_id)

            #---------------------------
            # Set up the datasets
            #---------------------------
            dataset_id = dataset_management_service.create_dataset(
                stream_id=stream_id,
                datastore_name=datastore_name,
                view_name='datasets/stream_join_granule'
            )
            # Keep track of the datasets
            datasets.append(dataset_id)

            stream_policy_id = ingestion_management_service.create_dataset_configuration(
                dataset_id = dataset_id,
                archive_data = True,
                archive_metadata = True,
                ingestion_configuration_id = ingestion_configuration_id
            )


            producer_definition = ProcessDefinition()
            producer_definition.executable = {
                'module':'ion.agents.eoi.handler.usgs_stream_publisher',
                'class':'UsgsPublisher'
            }
            configuration = {
                'process':{
                    'stream_id':stream_id,
                    }
            }
            procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
            log.debug('LUKE_DEBUG: procdef_id: %s', procdef_id)
            pid = process_dispatcher.schedule_process(process_definition_id=procdef_id, configuration=configuration)


            # Keep track, we'll kill 'em later.
            process_list.append(pid)
            # Get about 4 seconds of data
        time.sleep(4)

        #---------------------------
        # Stop producing data
        #---------------------------

        for process in process_list:
            process_dispatcher.cancel_process(process)

        #----------------------------------------------
        # The replay and the transform, a love story.
        #----------------------------------------------
        # Happy Valentines to the clever coder who catches the above!

        transform_definition = ProcessDefinition()
        transform_definition.executable = {
            'module':'ion.processes.data.transforms.transform_example',
            'class':'TransformCapture'
        }
        transform_definition_id = process_dispatcher.create_process_definition(process_definition=transform_definition)

        dataset_id = datasets.pop() # Just need one for now
        replay_id, stream_id = data_retriever_service.define_replay(dataset_id=dataset_id)

        #--------------------------------------------
        # I'm Selling magazine subscriptions here!
        #--------------------------------------------

        subscription = pubsub_management_service.create_subscription(query=StreamQuery(stream_ids=[stream_id]),
            exchange_name='transform_capture_point')

        #--------------------------------------------
        # Start the transform (capture)
        #--------------------------------------------
        transform_id = transform_management_service.create_transform(
            name='capture_transform',
            in_subscription_id=subscription,
            process_definition_id=transform_definition_id
        )

        transform_management_service.activate_transform(transform_id=transform_id)

        #--------------------------------------------
        # BEGIN REPLAY!
        #--------------------------------------------

        data_retriever_service.start_replay(replay_id=replay_id)

        #--------------------------------------------
        # Lets get some boundaries
        #--------------------------------------------

        bounds = dataset_management_service.get_dataset_bounds(dataset_id=dataset_id)
Exemplo n.º 35
0
class TestOmsLaunch(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.omsclient = ObservatoryManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()


        self.platformModel_id = None

        # rsn_oms: to retrieve network structure and information from RSN-OMS:
        # Note that OmsClientFactory will create an "embedded" RSN OMS
        # simulator object by default.
        self.rsn_oms = OmsClientFactory.create_instance()

        self.all_platforms = {}
        self.topology = {}
        self.agent_device_map = {}
        self.agent_streamconfig_map = {}

        self._async_data_result = AsyncResult()
        self._data_subscribers = []
        self._samples_received = []
        self.addCleanup(self._stop_data_subscribers)

        self._async_event_result = AsyncResult()
        self._event_subscribers = []
        self._events_received = []
        self.addCleanup(self._stop_event_subscribers)
        self._start_event_subscriber()

        self._set_up_DataProduct_obj()
        self._set_up_PlatformModel_obj()

    def _set_up_DataProduct_obj(self):
        # Create data product object to be used for each of the platform log streams
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('platform_eng_parsed', id_only=True)
        self.platform_eng_stream_def_id = self.pubsubcli.create_stream_definition(
            name='platform_eng', parameter_dictionary_id=pdict_id)
        self.dp_obj = IonObject(RT.DataProduct,
            name='platform_eng data',
            description='platform_eng test',
            temporal_domain = tdom,
            spatial_domain = sdom)

    def _set_up_PlatformModel_obj(self):
        # Create PlatformModel
        platformModel_obj = IonObject(RT.PlatformModel,
                                      name='RSNPlatformModel',
                                      description="RSNPlatformModel")
        try:
            self.platformModel_id = self.imsclient.create_platform_model(platformModel_obj)
        except BadRequest as ex:
            self.fail("failed to create new PLatformModel: %s" %ex)
        log.debug( 'new PlatformModel id = %s', self.platformModel_id)

    def _traverse(self, platform_id, parent_platform_objs=None):
        """
        Recursive routine that repeatedly calls _prepare_platform to build
        the object dictionary for each platform.

        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval the dict returned by _prepare_platform at this level.
        """

        log.info("Starting _traverse for %r", platform_id)

        plat_objs = self._prepare_platform(platform_id, parent_platform_objs)

        self.all_platforms[platform_id] = plat_objs

        # now, traverse the children:
        retval = self.rsn_oms.getSubplatformIDs(platform_id)
        subplatform_ids = retval[platform_id]
        for subplatform_id in subplatform_ids:
            self._traverse(subplatform_id, plat_objs)

        # note, topology indexed by platform_id
        self.topology[platform_id] = plat_objs['children']

        return plat_objs

    def _prepare_platform(self, platform_id, parent_platform_objs):
        """
        This routine generalizes the manual construction currently done in
        test_oms_launch.py. It is called by the recursive _traverse method so
        all platforms starting from a given base platform are prepared.

        Note: For simplicity in this test, sites are organized in the same
        hierarchical way as the platforms themselves.

        @param platform_id ID of the platform to be visited
        @param parent_platform_objs dict of objects associated to parent
                        platform, if any.

        @retval a dict of associated objects similar to those in
                test_oms_launch
        """

        site__obj = IonObject(RT.PlatformSite,
                            name='%s_PlatformSite' % platform_id,
                            description='%s_PlatformSite platform site' % platform_id)

        site_id = self.omsclient.create_platform_site(site__obj)

        if parent_platform_objs:
            # establish hasSite association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['site_id'],
                predicate=PRED.hasSite,
                object=site_id)

        # prepare platform attributes and ports:
        monitor_attributes = self._prepare_platform_attributes(platform_id)
        ports =              self._prepare_platform_ports(platform_id)

        device__obj = IonObject(RT.PlatformDevice,
                        name='%s_PlatformDevice' % platform_id,
                        description='%s_PlatformDevice platform device' % platform_id,
                        ports=ports,
                        platform_monitor_attributes = monitor_attributes)

        device_id = self.imsclient.create_platform_device(device__obj)

        self.imsclient.assign_platform_model_to_platform_device(self.platformModel_id, device_id)
        self.rrclient.create_association(subject=site_id, predicate=PRED.hasDevice, object=device_id)
        self.damsclient.register_instrument(instrument_id=device_id)


        if parent_platform_objs:
            # establish hasDevice association with the parent
            self.rrclient.create_association(
                subject=parent_platform_objs['device_id'],
                predicate=PRED.hasDevice,
                object=device_id)

        agent__obj = IonObject(RT.PlatformAgent,
                            name='%s_PlatformAgent' % platform_id,
                            description='%s_PlatformAgent platform agent' % platform_id)

        agent_id = self.imsclient.create_platform_agent(agent__obj)

        if parent_platform_objs:
            # add this platform_id to parent's children:
            parent_platform_objs['children'].append(platform_id)


        self.imsclient.assign_platform_model_to_platform_agent(self.platformModel_id, agent_id)

#        agent_instance_obj = IonObject(RT.PlatformAgentInstance,
#                                name='%s_PlatformAgentInstance' % platform_id,
#                                description="%s_PlatformAgentInstance" % platform_id)
#
#        agent_instance_id = self.imsclient.create_platform_agent_instance(
#                            agent_instance_obj, agent_id, device_id)

        plat_objs = {
            'platform_id':        platform_id,
            'site__obj':          site__obj,
            'site_id':            site_id,
            'device__obj':        device__obj,
            'device_id':          device_id,
            'agent__obj':         agent__obj,
            'agent_id':           agent_id,
#            'agent_instance_obj': agent_instance_obj,
#            'agent_instance_id':  agent_instance_id,
            'children':           []
        }

        log.info("plat_objs for platform_id %r = %s", platform_id, str(plat_objs))

        self.agent_device_map[platform_id] = device__obj

        stream_config = self._create_stream_config(plat_objs)
        self.agent_streamconfig_map[platform_id] = stream_config
#        self._start_data_subscriber(agent_instance_id, stream_config)

        return plat_objs

    def _prepare_platform_attributes(self, platform_id):
        """
        Returns the list of PlatformMonitorAttributes objects corresponding to
        the attributes associated to the given platform.
        """
        result = self.rsn_oms.getPlatformAttributes(platform_id)
        self.assertTrue(platform_id in result)
        ret_infos = result[platform_id]

        monitor_attributes = []
        for attrName, attrDfn in ret_infos.iteritems():
            log.debug("platform_id=%r: preparing attribute=%r", platform_id, attrName)

            monitor_rate = attrDfn['monitorCycleSeconds']
            units =        attrDfn['units']

            plat_attr_obj = IonObject(OT.PlatformMonitorAttributes,
                                      id=attrName,
                                      monitor_rate=monitor_rate,
                                      units=units)

            monitor_attributes.append(plat_attr_obj)

        return monitor_attributes

    def _prepare_platform_ports(self, platform_id):
        """
        Returns the list of PlatformPort objects corresponding to the ports
        associated to the given platform.
        """
        result = self.rsn_oms.getPlatformPorts(platform_id)
        self.assertTrue(platform_id in result)
        port_dict = result[platform_id]

        ports = []
        for port_id, port in port_dict.iteritems():
            log.debug("platform_id=%r: preparing port=%r", platform_id, port_id)
            ip_address = port['comms']['ip']
            plat_port_obj = IonObject(OT.PlatformPort,
                                      port_id=port_id,
                                      ip_address=ip_address)

            ports.append(plat_port_obj)

        return ports

    def _create_stream_config(self, plat_objs):

        platform_id = plat_objs['platform_id']
        device_id =   plat_objs['device_id']


        #create the log data product
        self.dp_obj.name = '%s platform_eng data' % platform_id
        data_product_id = self.dpclient.create_data_product(data_product=self.dp_obj, stream_definition_id=self.platform_eng_stream_def_id)
        self.damsclient.assign_data_product(input_resource_id=device_id, data_product_id=data_product_id)
        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id, PRED.hasStream, None, True)

        stream_config = self._build_stream_config(stream_ids[0])
        return stream_config

    def _build_stream_config(self, stream_id=''):

        platform_eng_dictionary = DatasetManagementService.get_parameter_dictionary_by_name('platform_eng_parsed')

        #get the streamroute object from pubsub by passing the stream_id
        stream_def_ids, _ = self.rrclient.find_objects(stream_id,
            PRED.hasStreamDefinition,
            RT.StreamDefinition,
            True)


        stream_route = self.pubsubcli.read_stream_route(stream_id=stream_id)
        stream_config = {'routing_key' : stream_route.routing_key,
                         'stream_id' : stream_id,
                         'stream_definition_ref' : stream_def_ids[0],
                         'exchange_point' : stream_route.exchange_point,
                         'parameter_dictionary':platform_eng_dictionary.dump()}

        return stream_config

    def _set_platform_agent_instances(self):
        """
        Once most of the objs/defs associated with all platforms are in
        place, this method creates and associates the PlatformAgentInstance
        elements.
        """

        self.platform_configs = {}
        for platform_id, plat_objs in self.all_platforms.iteritems():

            PLATFORM_CONFIG = self.platform_configs[platform_id] = {
                'platform_id':             platform_id,
                'platform_topology':       self.topology,

#                'agent_device_map':        self.agent_device_map,
                'agent_streamconfig_map':  self.agent_streamconfig_map,

                'driver_config':           DVR_CONFIG,
            }

            agent_config = {
                'platform_config': PLATFORM_CONFIG,
            }

            agent_instance_obj = IonObject(RT.PlatformAgentInstance,
                                    name='%s_PlatformAgentInstance' % platform_id,
                                    description="%s_PlatformAgentInstance" % platform_id,
                                    agent_config=agent_config)

            agent_id = plat_objs['agent_id']
            device_id = plat_objs['device_id']
            agent_instance_id = self.imsclient.create_platform_agent_instance(
                                agent_instance_obj, agent_id, device_id)

            plat_objs['agent_instance_obj'] = agent_instance_obj
            plat_objs['agent_instance_id']  = agent_instance_id


            stream_config = self.agent_streamconfig_map[platform_id]
            self._start_data_subscriber(agent_instance_id, stream_config)


    def _start_data_subscriber(self, stream_name, stream_config):
        """
        Starts data subscriber for the given stream_name and stream_config
        """

        def consume_data(message, stream_route, stream_id):
            # A callback for processing subscribed-to data.
            log.info('Subscriber received data message: %s.', str(message))
            self._samples_received.append(message)
            self._async_data_result.set()

        log.info('_start_data_subscriber stream_name=%r', stream_name)

        stream_id = stream_config['stream_id']

        # Create subscription for the stream
        exchange_name = '%s_queue' % stream_name
        self.container.ex_manager.create_xn_queue(exchange_name).purge()
        sub = StandaloneStreamSubscriber(exchange_name, consume_data)
        sub.start()
        self._data_subscribers.append(sub)
        sub_id = self.pubsubcli.create_subscription(name=exchange_name, stream_ids=[stream_id])
        self.pubsubcli.activate_subscription(sub_id)
        sub.subscription_id = sub_id

    def _stop_data_subscribers(self):
        """
        Stop the data subscribers on cleanup.
        """
        try:
            for sub in self._data_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._data_subscribers = []

    def _start_event_subscriber(self, event_type="PlatformAlarmEvent", sub_type="power"):
        """
        Starts event subscriber for events of given event_type ("PlatformAlarmEvent"
        by default) and given sub_type ("power" by default).
        """
        # TODO note: ion-definitions still using 'PlatformAlarmEvent' but we
        # should probably define 'PlatformExternalEvent' or something like that.

        def consume_event(evt, *args, **kwargs):
            # A callback for consuming events.
            log.info('Event subscriber received evt: %s.', str(evt))
            self._events_received.append(evt)
            self._async_event_result.set(evt)

        sub = EventSubscriber(event_type=event_type,
                              sub_type=sub_type,
                              callback=consume_event)

        sub.start()
        log.info("registered event subscriber for event_type=%r, sub_type=%r",
                 event_type, sub_type)

        self._event_subscribers.append(sub)
        sub._ready_event.wait(timeout=EVENT_TIMEOUT)

    def _stop_event_subscribers(self):
        """
        Stops the event subscribers on cleanup.
        """
        try:
            for sub in self._event_subscribers:
                if hasattr(sub, 'subscription_id'):
                    try:
                        self.pubsubcli.deactivate_subscription(sub.subscription_id)
                    except:
                        pass
                    self.pubsubcli.delete_subscription(sub.subscription_id)
                sub.stop()
        finally:
            self._event_subscribers = []

    def test_oms_create_and_launch(self):

        # pick a base platform:
        base_platform_id = BASE_PLATFORM_ID

        # and trigger the traversal of the branch rooted at that base platform
        # to create corresponding ION objects and configuration dictionaries:
        base_platform_objs = self._traverse(base_platform_id)

        # now that most of the topology information is there, add the
        # PlatformAgentInstance elements
        self._set_platform_agent_instances()

        base_platform_config = self.platform_configs[base_platform_id]

        log.info("base_platform_id = %r", base_platform_id)
        log.info("topology = %s", str(self.topology))


        #-------------------------------
        # Launch Base Platform AgentInstance, connect to the resource agent client
        #-------------------------------

        agent_instance_id = base_platform_objs['agent_instance_id']
        pid = self.imsclient.start_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
        log.debug("start_platform_agent_instance returned pid=%s", pid)

        #wait for start
        instance_obj = self.imsclient.read_platform_agent_instance(agent_instance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
                                instance_obj.agent_process_id,
                                ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(90), "The platform agent instance did not spawn in 90 seconds")

        agent_instance_obj= self.imsclient.read_instrument_agent_instance(agent_instance_id)
        log.debug('test_oms_create_and_launch: Platform agent instance obj: %s', str(agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._pa_client = ResourceAgentClient('paclient', name=agent_instance_obj.agent_process_id,  process=FakeProcess())
        log.debug(" test_oms_create_and_launch:: got pa client %s", str(self._pa_client))

        log.debug("base_platform_config =\n%s", base_platform_config)

        # ping_agent can be issued before INITIALIZE
        retval = self._pa_client.ping_agent(timeout=TIMEOUT)
        log.debug( 'Base Platform ping_agent = %s', str(retval) )

        # issue INITIALIZE command to the base platform, which will launch the
        # creation of the whole platform hierarchy rooted at base_platform_config['platform_id']
#        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE, kwargs=dict(plat_config=base_platform_config))
        cmd = AgentCommand(command=PlatformAgentEvent.INITIALIZE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform INITIALIZE = %s', str(retval) )


        # GO_ACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_ACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform GO_ACTIVE = %s', str(retval) )

        # RUN: this command includes the launch of the resource monitoring greenlets
        cmd = AgentCommand(command=PlatformAgentEvent.RUN)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform RUN = %s', str(retval) )

        # START_EVENT_DISPATCH
        kwargs = dict(params="TODO set params")
        cmd = AgentCommand(command=PlatformAgentEvent.START_EVENT_DISPATCH, kwargs=kwargs)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        self.assertTrue(retval.result is not None)

        # wait for data sample
        # just wait for at least one -- see consume_data above
        log.info("waiting for reception of a data sample...")
        self._async_data_result.get(timeout=DATA_TIMEOUT)
        self.assertTrue(len(self._samples_received) >= 1)

        log.info("waiting a bit more for reception of more data samples...")
        sleep(10)
        log.info("Got data samples: %d", len(self._samples_received))


        # wait for event
        # just wait for at least one event -- see consume_event above
        log.info("waiting for reception of an event...")
        self._async_event_result.get(timeout=EVENT_TIMEOUT)
        log.info("Received events: %s", len(self._events_received))


        # STOP_EVENT_DISPATCH
        cmd = AgentCommand(command=PlatformAgentEvent.STOP_EVENT_DISPATCH)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        self.assertTrue(retval.result is not None)

        # GO_INACTIVE
        cmd = AgentCommand(command=PlatformAgentEvent.GO_INACTIVE)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform GO_INACTIVE = %s', str(retval) )

        # RESET: Resets the base platform agent, which includes termination of
        # its sub-platforms processes:
        cmd = AgentCommand(command=PlatformAgentEvent.RESET)
        retval = self._pa_client.execute_agent(cmd, timeout=TIMEOUT)
        log.debug( 'Base Platform RESET = %s', str(retval) )

        #-------------------------------
        # Stop Base Platform AgentInstance
        #-------------------------------
        self.imsclient.stop_platform_agent_instance(platform_agent_instance_id=agent_instance_id)
class EventTriggeredTransformIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(EventTriggeredTransformIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.queue_cleanup = []
        self.exchange_cleanup = []

        self.pubsub = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()

        self.exchange_name = "test_queue"
        self.exchange_point = "test_exchange"

        self.dataset_management = DatasetManagementServiceClient()

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

    @attr("LOCOINT")
    @unittest.skipIf(os.getenv("CEI_LAUNCH_TEST", False), "Skip test while in CEI LAUNCH mode")
    def test_event_triggered_transform_A(self):
        """
        Test that packets are processed by the event triggered transform
        """

        # ---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        # ---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name="EventTriggeredTransform_A", description="For testing EventTriggeredTransform_A"
        )
        process_definition.executable["module"] = "ion.processes.data.transforms.event_triggered_transform"
        process_definition.executable["class"] = "EventTriggeredTransform_A"
        event_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition
        )

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)

        stream_def_id = self.pubsub.create_stream_definition("cond_stream_def", parameter_dictionary_id=pdict_id)
        cond_stream_id, _ = self.pubsub.create_stream(
            "test_conductivity", exchange_point="science_data", stream_definition_id=stream_def_id
        )

        config.process.publish_streams.conductivity = cond_stream_id
        config.process.event_type = "ResourceLifecycleEvent"

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=event_transform_proc_def_id, configuration=config
        )

        # ---------------------------------------------------------------------------------------------
        # Publish an event to wake up the event triggered transform
        # ---------------------------------------------------------------------------------------------

        event_publisher = EventPublisher("ResourceLifecycleEvent")
        event_publisher.publish_event(origin="fake_origin")

        # ---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the conductivity, temperature and pressure granules from
        # the ctd transform
        # ---------------------------------------------------------------------------------------------
        ar_cond = gevent.event.AsyncResult()

        def subscriber1(m, r, s):
            ar_cond.set(m)

        sub_event_transform = StandaloneStreamSubscriber("sub_event_transform", subscriber1)
        self.addCleanup(sub_event_transform.stop)

        sub_event_transform_id = self.pubsub.create_subscription(
            "subscription_cond", stream_ids=[cond_stream_id], exchange_name="sub_event_transform"
        )

        self.pubsub.activate_subscription(sub_event_transform_id)

        self.queue_cleanup.append(sub_event_transform.xn.queue)

        sub_event_transform.start()

        # ------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        # ------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = "stream_id.stream"
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind("stream_id.stream", xp)

        pub = StandaloneStreamPublisher("stream_id", stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        # ------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        # ------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result_cond = ar_cond.get(timeout=10)
        self.assertTrue(isinstance(result_cond, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result_cond)
        self.assertTrue(rdt.__contains__("conductivity"))

        self.check_cond_algorithm_execution(publish_granule, result_cond)

    def check_cond_algorithm_execution(self, publish_granule, granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform)

        output_data = output_rdt_transform["conductivity"]
        input_data = input_rdt_to_transform["conductivity"]

        self.assertTrue(((input_data / 100000.0) - 0.5).all() == output_data.all())

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        for field in rdt:
            rdt[field] = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()

        return g

    def test_event_triggered_transform_B(self):
        """
        Test that packets are processed by the event triggered transform
        """

        # ---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        # ---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name="EventTriggeredTransform_B", description="For testing EventTriggeredTransform_B"
        )
        process_definition.executable["module"] = "ion.processes.data.transforms.event_triggered_transform"
        process_definition.executable["class"] = "EventTriggeredTransform_B"
        event_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition
        )

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)

        stream_def_id = self.pubsub.create_stream_definition("stream_def", parameter_dictionary_id=pdict_id)
        stream_id, _ = self.pubsub.create_stream(
            "test_stream", exchange_point="science_data", stream_definition_id=stream_def_id
        )

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point
        config.process.publish_streams.output = stream_id
        config.process.event_type = "ResourceLifecycleEvent"
        config.process.stream_id = stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=event_transform_proc_def_id, configuration=config
        )

        # ---------------------------------------------------------------------------------------------
        # Publish an event to wake up the event triggered transform
        # ---------------------------------------------------------------------------------------------

        event_publisher = EventPublisher("ResourceLifecycleEvent")
        event_publisher.publish_event(origin="fake_origin")
Exemplo n.º 37
0
class TestPreloadThenLoadDataset(IonIntegrationTestCase):
    """ Uses the preload system to define the ExternalDataset and related resources,
        then invokes services to perform the load
    """
    def setUp(self):
        # Start container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        config = dict(op="load",
                      scenario="NOSE",
                      attachments="res/preload/r2_ioc/attachments")
        self.container.spawn_process("Loader",
                                     "ion.processes.bootstrap.ion_loader",
                                     "IONLoader",
                                     config=config)
        self.pubsub = PubsubManagementServiceClient()
        self.dams = DataAcquisitionManagementServiceClient()

    @unittest.skip("depricated test, now in mi repo")
    def test_use_case(self):
        # setUp() has already started the container and performed the preload
        #        self.assert_dataset_loaded('Test External CTD Dataset') # make sure we have the ExternalDataset resources
        self.assert_dataset_loaded(
            'Unit Test SMB37')  # association changed -- now use device name
        self.do_listen_for_incoming(
        )  # listen for any data being received from the dataset
        self.do_read_dataset()  # call services to load dataset
        self.assert_data_received()  # check that data was received as expected
        self.do_shutdown()

    def assert_dataset_loaded(self, name):
        rr = self.container.resource_registry
        #        self.external_dataset = self.find_object_by_name(name, RT.ExternalDataset)
        devs, _ = rr.find_resources(RT.InstrumentDevice,
                                    name=name,
                                    id_only=False)
        self.assertEquals(len(devs), 1)
        self.device = devs[0]
        obj, _ = rr.find_objects(subject=self.device._id,
                                 predicate=PRED.hasAgentInstance,
                                 object_type=RT.ExternalDatasetAgentInstance)
        self.agent_instance = obj[0]
        obj, _ = rr.find_objects(object_type=RT.ExternalDatasetAgent,
                                 predicate=PRED.hasAgentDefinition,
                                 subject=self.agent_instance._id)
        self.agent = obj[0]

        driver_cfg = self.agent_instance.driver_config
        #stream_definition_id = driver_cfg['dh_cfg']['stream_def'] if 'dh_cfg' in driver_cfg else driver_cfg['stream_def']
        #self.stream_definition = rr.read(stream_definition_id)

        self.data_product = rr.read_object(subject=self.device._id,
                                           predicate=PRED.hasOutputProduct,
                                           object_type=RT.DataProduct)

        self.dataset_id = rr.read_object(subject=self.data_product._id,
                                         predicate=PRED.hasDataset,
                                         object_type=RT.Dataset,
                                         id_only=True)

        ids, _ = rr.find_objects(subject=self.data_product._id,
                                 predicate=PRED.hasStream,
                                 object_type=RT.Stream,
                                 id_only=True)
        self.stream_id = ids[0]
        self.route = self.pubsub.read_stream_route(self.stream_id)

    def do_listen_for_incoming(self):
        subscription_id = self.pubsub.create_subscription(
            'validator', data_product_ids=[self.data_product._id])
        self.addCleanup(self.pubsub.delete_subscription, subscription_id)

        self.granule_capture = []
        self.granule_count = 0

        def on_granule(msg, route, stream_id):
            self.granule_count += 1
            if self.granule_count < 5:
                self.granule_capture.append(msg)

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub.deactivate_subscription, subscription_id)

        self.dataset_modified = Event()

        def cb2(*args, **kwargs):
            self.dataset_modified.set()
            # TODO: event isn't using the ExternalDataset, but a different ID for a Dataset

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb2,
                             origin=self.dataset_id)
        es.start()
        self.addCleanup(es.stop)

    def do_read_dataset(self):
        self.dams.start_external_dataset_agent_instance(
            self.agent_instance._id)
        #
        # should i wait for process (above) to start
        # before launching client (below)?
        #
        self.client = None
        end = time.time() + MAX_AGENT_START_TIME
        while not self.client and time.time() < end:
            try:
                self.client = ResourceAgentClient(self.device._id,
                                                  process=FakeProcess())
            except NotFound:
                time.sleep(2)
        if not self.client:
            self.fail(
                msg='external dataset agent process did not start in %d seconds'
                % MAX_AGENT_START_TIME)
        self.client.execute_agent(
            AgentCommand(command=ResourceAgentEvent.INITIALIZE))
        self.client.execute_agent(
            AgentCommand(command=ResourceAgentEvent.GO_ACTIVE))
        self.client.execute_agent(AgentCommand(command=ResourceAgentEvent.RUN))
        self.client.execute_resource(command=AgentCommand(
            command=DriverEvent.START_AUTOSAMPLE))

    def assert_data_received(self):

        #let it go for up to 120 seconds, then stop the agent and reset it
        if not self.dataset_modified.is_set():
            self.dataset_modified.wait(30)
        self.assertTrue(self.granule_count > 2,
                        msg='granule count = %d' % self.granule_count)

        rdt = RecordDictionaryTool.load_from_granule(self.granule_capture[0])
        self.assertAlmostEqual(0, rdt['oxygen'][0], delta=0.01)
        self.assertAlmostEqual(309.77, rdt['pressure'][0], delta=0.01)
        self.assertAlmostEqual(37.9848, rdt['conductivity'][0], delta=0.01)
        self.assertAlmostEqual(9.5163, rdt['temp'][0], delta=0.01)
        self.assertAlmostEqual(3527207897.0, rdt['time'][0], delta=1)

    def do_shutdown(self):
        self.dams.stop_external_dataset_agent_instance(self.agent_instance._id)
Exemplo n.º 38
0
    def test_integrated_transform(self):
        '''
        This example script runs a chained three way transform:
            B
        A <
            C
        Where A is the even_odd transform (generates a stream of even and odd numbers from input)
        and B and C are the basic transforms that receive even and odd input
        '''
        cc = self.container
        assertions = self.assertTrue

        pubsub_cli = PubsubManagementServiceClient(node=cc.node)
        rr_cli = ResourceRegistryServiceClient(node=cc.node)
        tms_cli = TransformManagementServiceClient(node=cc.node)
        #-------------------------------
        # Process Definition
        #-------------------------------
        # Create the process definition for the basic transform
        process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class':'TransformExample'
        }
        basic_transform_definition_id, _ = rr_cli.create(process_definition)

        # Create The process definition for the TransformEvenOdd
        process_definition = IonObject(RT.ProcessDefinition, name='evenodd_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class':'TransformEvenOdd'
        }
        evenodd_transform_definition_id, _ = rr_cli.create(process_definition)

        #-------------------------------
        # Streams
        #-------------------------------
        streams = [pubsub_cli.create_stream() for i in xrange(5)]

        #-------------------------------
        # Subscriptions
        #-------------------------------

        query = StreamQuery(stream_ids=[streams[0]])
        input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue')

        query = StreamQuery(stream_ids = [streams[1]]) # even output
        even_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='even_queue')

        query = StreamQuery(stream_ids = [streams[2]]) # odd output
        odd_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='odd_queue')


        #-------------------------------
        # Launch the EvenOdd Transform
        #-------------------------------

        evenodd_id = tms_cli.create_transform(name='even_odd',
            in_subscription_id=input_subscription_id,
            out_streams={'even':streams[1], 'odd':streams[2]},
            process_definition_id=evenodd_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(evenodd_id)


        #-------------------------------
        # Launch the Even Processing Transform
        #-------------------------------

        even_transform_id = tms_cli.create_transform(name='even_transform',
            in_subscription_id = even_subscription_id,
            out_streams={'even_plus1':streams[3]},
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(even_transform_id)

        #-------------------------------
        # Launch the Odd Processing Transform
        #-------------------------------

        odd_transform_id = tms_cli.create_transform(name='odd_transform',
            in_subscription_id = odd_subscription_id,
            out_streams={'odd_plus1':streams[4]},
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(odd_transform_id)

        #-------------------------------
        # Set up final subscribers
        #-------------------------------

        evenplus1_subscription_id = pubsub_cli.create_subscription(
            query=StreamQuery([streams[3]]),
            exchange_name='evenplus1_queue',
            name='EvenPlus1Subscription',
            description='EvenPlus1 SubscriptionDescription'
        )
        oddplus1_subscription_id = pubsub_cli.create_subscription(
            query=StreamQuery([streams[4]]),
            exchange_name='oddplus1_queue',
            name='OddPlus1Subscription',
            description='OddPlus1 SubscriptionDescription'
        )

        total_msg_count = 2

        msgs = gevent.queue.Queue()


        def even1_message_received(message, headers):
            input = int(message.get('num'))
            assertions( (input % 2) ) # Assert it is odd (transform adds 1)
            msgs.put(True)


        def odd1_message_received(message, headers):
            input = int(message.get('num'))
            assertions(not (input % 2)) # Assert it is even
            msgs.put(True)

        subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)
        even_subscriber = subscriber_registrar.create_subscriber(exchange_name='evenplus1_queue', callback=even1_message_received)
        odd_subscriber = subscriber_registrar.create_subscriber(exchange_name='oddplus1_queue', callback=odd1_message_received)

        # Start subscribers
        even_subscriber.start()
        odd_subscriber.start()

        # Activate subscriptions
        pubsub_cli.activate_subscription(evenplus1_subscription_id)
        pubsub_cli.activate_subscription(oddplus1_subscription_id)

        #-------------------------------
        # Set up fake stream producer
        #-------------------------------

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        # Normally the user does not see or create the publisher, this is part of the containers business.
        # For the test we need to set it up explicitly
        publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node)
        stream_publisher = publisher_registrar.create_publisher(stream_id=streams[0])

        #-------------------------------
        # Start test
        #-------------------------------

        # Publish a stream
        for i in xrange(total_msg_count):
            stream_publisher.publish({'num':str(i)})

        time.sleep(0.5)

        for i in xrange(total_msg_count * 2):
            try:
                msgs.get()
            except Empty:
                assertions(False, "Failed to process all messages correctly.")
class TransformPrototypeIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(TransformPrototypeIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.rrc = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.ssclient = SchedulerServiceClient()
        self.event_publisher = EventPublisher()

        self.exchange_names = []
        self.exchange_points = []

    def tearDown(self):

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def now_utc(self):
        return time.mktime(datetime.datetime.utcnow().timetuple())

    def _create_interval_timer_with_end_time(self,timer_interval= None, end_time = None ):
        '''
        A convenience method to set up an interval timer with an end time
        '''
        self.timer_received_time = 0
        self.timer_interval = timer_interval

        start_time = self.now_utc()
        if not end_time:
            end_time = start_time + 2 * timer_interval + 1

        log.debug("got the end time here!! %s" % end_time)

        # Set up the interval timer. The scheduler will publish event with origin set as "Interval Timer"
        sid = self.ssclient.create_interval_timer(start_time="now" ,
            interval=self.timer_interval,
            end_time=end_time,
            event_origin="Interval Timer",
            event_subtype="")

        def cleanup_timer(scheduler, schedule_id):
            """
            Do a friendly cancel of the scheduled event.
            If it fails, it's ok.
            """
            try:
                scheduler.cancel_timer(schedule_id)
            except:
                log.warn("Couldn't cancel")

        self.addCleanup(cleanup_timer, self.ssclient, sid)

        return sid

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_event_processing(self):
        '''
        Test that events are processed by the transforms according to a provided algorithm
        '''


        #-------------------------------------------------------------------------------------
        # Set up the scheduler for an interval timer with an end time
        #-------------------------------------------------------------------------------------
        id = self._create_interval_timer_with_end_time(timer_interval=2)
        self.assertIsNotNone(id)

        #-------------------------------------------------------------------------------------
        # Create an event alert transform....
        # The configuration for the Event Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        configuration = {
            'process':{
                'event_type': 'ResourceEvent',
                'timer_origin': 'Interval Timer',
                'instrument_origin': 'My_favorite_instrument'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process(  name= 'event_alert_transform',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='EventAlertTransform',
            configuration= configuration)

        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish events and make assertions about alerts
        #-------------------------------------------------------------------------------------

        queue = gevent.queue.Queue()

        def event_received(message, headers):
            queue.put(message)

        event_subscriber = EventSubscriber( origin="EventAlertTransform",
            event_type="DeviceEvent",
            callback=event_received)

        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        # publish event twice

        for i in xrange(5):
            self.event_publisher.publish_event(    event_type = 'ExampleDetectableEvent',
                origin = "My_favorite_instrument",
                voltage = 5,
                telemetry = 10,
                temperature = 20)
            gevent.sleep(0.1)
            self.assertTrue(queue.empty())



        #publish event the third time but after a time interval larger than 2 seconds
        gevent.sleep(5)

        #-------------------------------------------------------------------------------------
        # Make assertions about the alert event published by the EventAlertTransform
        #-------------------------------------------------------------------------------------

        event = queue.get(timeout=10)

        log.debug("Alarm event received from the EventAertTransform %s" % event)

        self.assertEquals(event.type_, "DeviceEvent")
        self.assertEquals(event.origin, "EventAlertTransform")

        #------------------------------------------------------------------------------------------------
        # Now clear the event queue being populated by alarm events and publish normally once again
        #------------------------------------------------------------------------------------------------

        queue.queue.clear()

        for i in xrange(5):
            self.event_publisher.publish_event(    event_type = 'ExampleDetectableEvent',
                origin = "My_favorite_instrument",
                voltage = 5,
                telemetry = 10,
                temperature = 20)
            gevent.sleep(0.1)
            self.assertTrue(queue.empty())

        log.debug("This completes the requirement that the EventAlertTransform publishes \
                    an alarm event when it does not hear from the instrument for some time.")


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_stream_processing(self):
        #--------------------------------------------------------------------------------
        #Test that streams are processed by the transforms according to a provided algorithm
        #--------------------------------------------------------------------------------

        #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH,
        #todo(contd) and if the word VALUE=<number> exists and that number is less than something

        #todo later on we are going to use complex algorithms to make this prototype powerful

        #-------------------------------------------------------------------------------------
        # Start a subscriber to listen for an alert event from the Stream Alert Transform
        #-------------------------------------------------------------------------------------

        queue = gevent.queue.Queue()

        def event_received(message, headers):
            queue.put(message)

        event_subscriber = EventSubscriber( origin="StreamAlertTransform",
            event_type="DeviceEvent",
            callback=event_received)

        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        #-------------------------------------------------------------------------------------
        # The configuration for the Stream Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        config = {
            'process':{
                'queue_name': 'a_queue',
                'value': 10,
                'event_type':'DeviceEvent'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='StreamAlertTransform',
            configuration= config)

        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish streams and make assertions about alerts
        #-------------------------------------------------------------------------------------
        exchange_name = 'a_queue'
        exchange_point = 'test_exchange'
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(exchange_name)
        xp = self.container.ex_manager.create_xp(exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\
                  " will trigger an alert event from the StreamAlertTransform because the value provided is "\
                  "less than 10 that was passed in through the config."

        pub.publish(message)

        event = queue.get(timeout=10)
        self.assertEquals(event.type_, "DeviceEvent")
        self.assertEquals(event.origin, "StreamAlertTransform")

    #        self.purge_queues(exchange_name)

    #    def purge_queues(self, exchange_name):
    #        xn = self.container.ex_manager.create_xn_queue(exchange_name)
    #        xn.purge()

    @staticmethod
    def create_process(name= '', module = '', class_name = '', configuration = None):
        '''
        A helper method to create a process
        '''

        producer_definition = ProcessDefinition(name=name)
        producer_definition.executable = {
            'module':module,
            'class': class_name
        }

        process_dispatcher = ProcessDispatcherServiceClient()

        procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
        pid = process_dispatcher.schedule_process(process_definition_id= procdef_id, configuration=configuration)

        return pid

    def test_demo_stream_granules_processing(self):
        """
        Test that the Demo Stream Alert Transform is functioning. The transform coordinates with the scheduler.
        It is configured to listen to a source that publishes granules. It publishes a DeviceStatusEvent if it
        receives a granule with bad data or a DeviceCommsEvent if no granule has arrived between two timer events.

        The transform is configured at launch using a config dictionary.
        """
        #-------------------------------------------------------------------------------------
        # Start a subscriber to listen for an alert event from the Stream Alert Transform
        #-------------------------------------------------------------------------------------

        queue_bad_data = gevent.queue.Queue()
        queue_no_data = gevent.queue.Queue()

        def bad_data(message, headers):
            if message.type_ == "DeviceStatusEvent":
                queue_bad_data.put(message)

        def no_data(message, headers):
            queue_no_data.put(message)

        event_subscriber_bad_data = EventSubscriber( origin="DemoStreamAlertTransform",
            event_type="DeviceStatusEvent",
            callback=bad_data)

        event_subscriber_no_data = EventSubscriber( origin="DemoStreamAlertTransform",
            event_type="DeviceCommsEvent",
            callback=no_data)

        event_subscriber_bad_data.start()
        event_subscriber_no_data.start()

        self.addCleanup(event_subscriber_bad_data.stop)
        self.addCleanup(event_subscriber_no_data.stop)

        #-------------------------------------------------------------------------------------
        # The configuration for the Stream Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        self.valid_values = [-100, 100]
        self.timer_interval = 5
        self.queue_name = 'a_queue'

        config = {
            'process':{
                'timer_interval': self.timer_interval,
                'queue_name': self.queue_name,
                'variable_name': 'input_voltage',
                'time_field_name': 'preferred_timestamp',
                'valid_values': self.valid_values,
                'timer_origin': 'Interval Timer'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process( name= 'DemoStreamAlertTransform',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='DemoStreamAlertTransform',
            configuration= config)

        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish streams and make assertions about alerts
        #-------------------------------------------------------------------------------------

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name= 'platform_eng_parsed', id_only=True)

        stream_def_id = self.pubsub_management.create_stream_definition('demo_stream', parameter_dictionary_id=pdict_id)
        stream_id, stream_route = self.pubsub_management.create_stream( name='test_demo_alert',
            exchange_point='exch_point_1',
            stream_definition_id=stream_def_id)

        sub_1 = self.pubsub_management.create_subscription(name='sub_1', stream_ids=[stream_id], exchange_points=['exch_point_1'], exchange_name=self.queue_name)
        self.pubsub_management.activate_subscription(sub_1)
        self.exchange_names.append('sub_1')
        self.exchange_points.append('exch_point_1')

        #-------------------------------------------------------------------------------------
        # publish a *GOOD* granule
        #-------------------------------------------------------------------------------------
        self.length = 2
        val = numpy.array([random.uniform(0,50)  for l in xrange(self.length)])
        self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val, length=self.length)

        self.assertTrue(queue_bad_data.empty())

        #-------------------------------------------------------------------------------------
        # publish a few *BAD* granules
        #-------------------------------------------------------------------------------------
        self.length = 2
        self.number = 2
        val = numpy.array([random.uniform(110,200)  for l in xrange(self.length)])
        self._publish_granules(stream_id= stream_id, stream_route= stream_route, number= self.number, values=val, length=self.length)

        for i in xrange(self.length * self.number):
            event = queue_bad_data.get(timeout=10)
            self.assertEquals(event.type_, "DeviceStatusEvent")
            self.assertEquals(event.origin, "DemoStreamAlertTransform")
            self.assertEquals(event.state, DeviceStatusType.OUT_OF_RANGE)
            self.assertEquals(event.valid_values, self.valid_values)
            self.assertEquals(event.sub_type, 'input_voltage')
            self.assertIsNotNone(event.value)
            self.assertIsNotNone(event.time_stamp)

        # To ensure that only the bad values generated the alert events. Queue should be empty now
        self.assertEquals(queue_bad_data.qsize(), 0)

        #-------------------------------------------------------------------------------------
        # Do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status
        #-------------------------------------------------------------------------------------
        event = queue_no_data.get(timeout=15)

        self.assertEquals(event.type_, "DeviceCommsEvent")
        self.assertEquals(event.origin, "DemoStreamAlertTransform")
        self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION)
        self.assertEquals(event.sub_type, 'input_voltage')

        #-------------------------------------------------------------------------------------
        # Empty the queues and repeat tests
        #-------------------------------------------------------------------------------------
        queue_bad_data.queue.clear()
        queue_no_data.queue.clear()

        #-------------------------------------------------------------------------------------
        # publish a *GOOD* granule again
        #-------------------------------------------------------------------------------------
        self.length = 2
        val = numpy.array([random.uniform(0,50)  for l in xrange(self.length)])
        self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val, length=self.length)

        self.assertTrue(queue_bad_data.empty())

        #-------------------------------------------------------------------------------------
        # Again do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status
        #-------------------------------------------------------------------------------------

        event = queue_no_data.get(timeout=20)

        self.assertEquals(event.type_, "DeviceCommsEvent")
        self.assertEquals(event.origin, "DemoStreamAlertTransform")
        self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION)
        self.assertEquals(event.sub_type, 'input_voltage')

    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None, length=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = numpy.array([random.uniform(0,1000)  for l in xrange(length)])
            g = rdt.to_granule()
            pub.publish(g)
Exemplo n.º 40
0
class TransformManagementServiceIntTest(IonIntegrationTestCase):

    def setUp(self):
        # set up the container
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        self.pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        self.tms_cli = TransformManagementServiceClient(node=self.container.node)
        self.rr_cli = ResourceRegistryServiceClient(node=self.container.node)
        self.procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        self.input_stream_id = self.pubsub_cli.create_stream(name='input_stream',original=True)

        self.input_subscription_id = self.pubsub_cli.create_subscription(query=StreamQuery(stream_ids=[self.input_stream_id]),exchange_name='transform_input',name='input_subscription')

        self.output_stream_id = self.pubsub_cli.create_stream(name='output_stream',original=True)

        self.process_definition = ProcessDefinition(name='basic_transform_definition')
        self.process_definition.executable = {'module': 'ion.processes.data.transforms.transform_example',
                                              'class':'TransformExample'}
        self.process_definition_id = self.procd_cli.create_process_definition(process_definition=self.process_definition)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_create_transform(self):
        configuration = {'program_args':{'arg1':'value'}}

        transform_id = self.tms_cli.create_transform(
              name='test_transform',
              in_subscription_id=self.input_subscription_id,
              out_streams={'output':self.output_stream_id},
              process_definition_id=self.process_definition_id)

        # test transform creation in rr
        transform = self.rr_cli.read(transform_id)
        self.assertEquals(transform.name,'test_transform')


        # test associations
        predicates = [PRED.hasSubscription, PRED.hasOutStream, PRED.hasProcessDefinition]
        assocs = []
        for p in predicates:
            assocs += self.rr_cli.find_associations(transform_id,p,id_only=True)
        self.assertEquals(len(assocs),3)

        # test process creation
        transform = self.tms_cli.read_transform(transform_id)
        pid = transform.process_id
        proc = self.container.proc_manager.procs.get(pid)
        self.assertIsInstance(proc,TransformExample)

        # clean up
        self.tms_cli.delete_transform(transform_id)

    def test_create_transform_no_procdef(self):
        with self.assertRaises(NotFound):
            self.tms_cli.create_transform(name='test',in_subscription_id=self.input_subscription_id)

    def test_create_transform_bad_procdef(self):
        with self.assertRaises(NotFound):
            self.tms_cli.create_transform(name='test',
                in_subscription_id=self.input_subscription_id,
                process_definition_id='bad')
    
    def test_create_transform_no_config(self):
        transform_id = self.tms_cli.create_transform(
            name='test_transform',
            in_subscription_id=self.input_subscription_id,
            out_streams={'output':self.output_stream_id},
            process_definition_id=self.process_definition_id,
        )
        self.tms_cli.delete_transform(transform_id)

    def test_create_transform_name_failure(self):
        transform_id = self.tms_cli.create_transform(
            name='test_transform',
            in_subscription_id=self.input_subscription_id,
            out_streams={'output':self.output_stream_id},
            process_definition_id=self.process_definition_id,
        )
        with self.assertRaises(BadRequest):
            transform_id = self.tms_cli.create_transform(
                name='test_transform',
                in_subscription_id=self.input_subscription_id,
                out_streams={'output':self.output_stream_id},
                process_definition_id=self.process_definition_id,
            )
        self.tms_cli.delete_transform(transform_id)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    def test_create_no_output(self):
        transform_id = self.tms_cli.create_transform(
            name='test_transform',
            in_subscription_id=self.input_subscription_id,
            process_definition_id=self.process_definition_id,
        )

        predicates = [PRED.hasSubscription, PRED.hasOutStream, PRED.hasProcessDefinition]
        assocs = []
        for p in predicates:
            assocs += self.rr_cli.find_associations(transform_id,p,id_only=True)
        self.assertEquals(len(assocs),2)

        # test process creation
        transform = self.tms_cli.read_transform(transform_id)
        pid = transform.process_id
        proc = self.container.proc_manager.procs.get(pid)
        self.assertIsInstance(proc,TransformExample)

        self.tms_cli.delete_transform(transform_id)
    def test_read_transform_exists(self):
        trans_obj = IonObject(RT.Transform,name='trans_obj')
        trans_id, _ = self.rr_cli.create(trans_obj)

        res = self.tms_cli.read_transform(trans_id)
        actual = self.rr_cli.read(trans_id)

        self.assertEquals(res._id,actual._id)


    def test_read_transform_nonexist(self):
        with self.assertRaises(NotFound) as e:
            self.tms_cli.read_transform('123')

    def test_activate_transform(self):

        transform_id = self.tms_cli.create_transform(
            name='test_transform',
            in_subscription_id=self.input_subscription_id,
            out_streams={'output':self.output_stream_id},
            process_definition_id=self.process_definition_id
        )

        self.tms_cli.activate_transform(transform_id)

        # pubsub check if activated?
        self.tms_cli.delete_transform(transform_id)

    def test_activate_transform_nonexist(self):
        with self.assertRaises(NotFound):
            self.tms_cli.activate_transform('1234')

    def test_delete_transform(self):

        transform_id = self.tms_cli.create_transform(
            name='test_transform',
            in_subscription_id=self.input_subscription_id,
            process_definition_id=self.process_definition_id
        )
        self.tms_cli.delete_transform(transform_id)

        # assertions
        with self.assertRaises(NotFound):
            self.rr_cli.read(transform_id)


    def test_delete_transform_nonexist(self):
        with self.assertRaises(NotFound):
            self.tms_cli.delete_transform('123')

    def test_execute_transform(self):
        # set up
        process_definition = ProcessDefinition(name='procdef_execute')
        process_definition.executable['module'] = 'ion.processes.data.transforms.transform_example'
        process_definition.executable['class'] = 'ReverseTransform'
        data = [1,2,3]

        process_definition_id, _ = self.rr_cli.create(process_definition)

        retval = self.tms_cli.execute_transform(process_definition_id,data)

        self.assertEquals(retval,[3,2,1])


    def test_integrated_transform(self):
        '''
        This example script runs a chained three way transform:
            B
        A <
            C
        Where A is the even_odd transform (generates a stream of even and odd numbers from input)
        and B and C are the basic transforms that receive even and odd input
        '''
        cc = self.container
        assertions = self.assertTrue

        pubsub_cli = PubsubManagementServiceClient(node=cc.node)
        rr_cli = ResourceRegistryServiceClient(node=cc.node)
        tms_cli = TransformManagementServiceClient(node=cc.node)
        #-------------------------------
        # Process Definition
        #-------------------------------
        # Create the process definition for the basic transform
        process_definition = IonObject(RT.ProcessDefinition, name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class':'TransformExample'
        }
        basic_transform_definition_id, _ = rr_cli.create(process_definition)

        # Create The process definition for the TransformEvenOdd
        process_definition = IonObject(RT.ProcessDefinition, name='evenodd_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class':'TransformEvenOdd'
        }
        evenodd_transform_definition_id, _ = rr_cli.create(process_definition)

        #-------------------------------
        # Streams
        #-------------------------------
        streams = [pubsub_cli.create_stream() for i in xrange(5)]

        #-------------------------------
        # Subscriptions
        #-------------------------------

        query = StreamQuery(stream_ids=[streams[0]])
        input_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='input_queue')

        query = StreamQuery(stream_ids = [streams[1]]) # even output
        even_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='even_queue')

        query = StreamQuery(stream_ids = [streams[2]]) # odd output
        odd_subscription_id = pubsub_cli.create_subscription(query=query, exchange_name='odd_queue')


        #-------------------------------
        # Launch the EvenOdd Transform
        #-------------------------------

        evenodd_id = tms_cli.create_transform(name='even_odd',
            in_subscription_id=input_subscription_id,
            out_streams={'even':streams[1], 'odd':streams[2]},
            process_definition_id=evenodd_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(evenodd_id)


        #-------------------------------
        # Launch the Even Processing Transform
        #-------------------------------

        even_transform_id = tms_cli.create_transform(name='even_transform',
            in_subscription_id = even_subscription_id,
            out_streams={'even_plus1':streams[3]},
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(even_transform_id)

        #-------------------------------
        # Launch the Odd Processing Transform
        #-------------------------------

        odd_transform_id = tms_cli.create_transform(name='odd_transform',
            in_subscription_id = odd_subscription_id,
            out_streams={'odd_plus1':streams[4]},
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(odd_transform_id)

        #-------------------------------
        # Set up final subscribers
        #-------------------------------

        evenplus1_subscription_id = pubsub_cli.create_subscription(
            query=StreamQuery([streams[3]]),
            exchange_name='evenplus1_queue',
            name='EvenPlus1Subscription',
            description='EvenPlus1 SubscriptionDescription'
        )
        oddplus1_subscription_id = pubsub_cli.create_subscription(
            query=StreamQuery([streams[4]]),
            exchange_name='oddplus1_queue',
            name='OddPlus1Subscription',
            description='OddPlus1 SubscriptionDescription'
        )

        total_msg_count = 2

        msgs = gevent.queue.Queue()


        def even1_message_received(message, headers):
            input = int(message.get('num'))
            assertions( (input % 2) ) # Assert it is odd (transform adds 1)
            msgs.put(True)


        def odd1_message_received(message, headers):
            input = int(message.get('num'))
            assertions(not (input % 2)) # Assert it is even
            msgs.put(True)

        subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)
        even_subscriber = subscriber_registrar.create_subscriber(exchange_name='evenplus1_queue', callback=even1_message_received)
        odd_subscriber = subscriber_registrar.create_subscriber(exchange_name='oddplus1_queue', callback=odd1_message_received)

        # Start subscribers
        even_subscriber.start()
        odd_subscriber.start()

        # Activate subscriptions
        pubsub_cli.activate_subscription(evenplus1_subscription_id)
        pubsub_cli.activate_subscription(oddplus1_subscription_id)

        #-------------------------------
        # Set up fake stream producer
        #-------------------------------

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        # Normally the user does not see or create the publisher, this is part of the containers business.
        # For the test we need to set it up explicitly
        publisher_registrar = StreamPublisherRegistrar(process=dummy_process, node=cc.node)
        stream_publisher = publisher_registrar.create_publisher(stream_id=streams[0])

        #-------------------------------
        # Start test
        #-------------------------------

        # Publish a stream
        for i in xrange(total_msg_count):
            stream_publisher.publish({'num':str(i)})

        time.sleep(0.5)

        for i in xrange(total_msg_count * 2):
            try:
                msgs.get()
            except Empty:
                assertions(False, "Failed to process all messages correctly.")

    """
class TestTransformWorker(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process = TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(
            node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(
            node=self.container.node, process=process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client,
                                  self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    def push_granule(self, data_product_id):
        '''
        Publishes and monitors that the granule arrived
        '''
        datasets, _ = self.rrclient.find_objects(data_product_id,
                                                 PRED.hasDataset,
                                                 id_only=True)
        dataset_monitor = DatasetMonitor(datasets[0])

        rdt = self.ph.rdt_for_data_product(data_product_id)
        self.ph.fill_parsed_rdt(rdt)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)

        assert dataset_monitor.wait()
        dataset_monitor.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(
            restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess,
                                                 id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)

        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(
            dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(
            dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[
            output_data_product_id[0]]['parents'][self.input_dp_id]
                        ['data_process_definition_id'] == dataprocessdef_id)

        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time'] = [0]  # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure'] = [2]
            rdt['salinity'] = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))

        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(
            self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(
            dataprocessdef_id)
        self.assertIn('def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(
            dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasInputProduct,
            object_type=RT.DataProduct,
            id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(
            subject_type=RT.DataProcessDefinition,
            predicate=PRED.hasDataProcess,
            object=dataprocess_id,
            id_only=True)
        self.assertTrue(dpd_objs is not None)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_instrumentdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        instDevice_obj, _ = self.rrclient.find_resources(
            restype=RT.InstrumentDevice, name='test_ctd_device')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice,
                                       name='test_ctd_device',
                                       description="test_ctd_device",
                                       serial_number="12345")
            instDevice_id = self.imsclient.create_instrument_device(
                instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )

        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(instDevice_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[instDevice_id]['type']
                        == 'InstrumentDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_platformdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        platform_device_obj, _ = self.rrclient.find_resources(
            restype=RT.PlatformDevice, name='TestPlatform')
        if platform_device_obj:
            platform_device_id = platform_device_obj[0]._id
        else:
            platform_device_obj = IonObject(RT.PlatformDevice,
                                            name='TestPlatform',
                                            description="TestPlatform",
                                            serial_number="12345")
            platform_device_id = self.imsclient.create_platform_device(
                platform_device=platform_device_obj)

        self.damsclient.assign_data_product(
            input_resource_id=platform_device_id,
            data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition,
                        dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(platform_device_id in output_data_product_provenance[
            self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[platform_device_id]
                        ['type'] == 'PlatformDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=self.event_data_process_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_bad_argument_map(self):
        self._output_stream_ids = []

        # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during
        # data process creation and that the correct exception is raised for both input and output.

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()

        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS)
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process with invalid argument map
        argument_map = {"arr1": "foo", "arr2": "bar"}
        output_param = "salinity"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Input data product does not contain the parameters defined in argument map"
        )

        # create the data process with invalid output parameter name
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "foo"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(
                data_process_definition_id=add_array_dpd_id,
                inputs=[self.input_dp_id],
                outputs=[dp1_func_output_dp_id],
                argument_map=argument_map,
                out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(
            ex.message,
            "Output data product does not contain the output parameter name provided"
        )

    def create_event_data_processes(self):

        # two data processes using one transform and one DPD
        argument_map = {"a": "salinity"}

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='validate_salinity_array',
            description='validate_salinity_array',
            function='validate_salinity_array',
            module="ion.processes.data.transforms.test.test_transform_worker",
            arguments=['a'],
            function_type=TransformFunctionType.TRANSFORM)

        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='validate_salinity_array',
            description='validate_salinity_array',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
        )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id,
            add_array_dpd_id,
            binding='validate_salinity_array')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=None,
            argument_map=argument_map)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process,
                        dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process(self):

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id = self.create_output_data_product()
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"

        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(
            RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri=
            'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            version_label='1.0a')
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(
            data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(
            self.stream_def_id, add_array_dpd_id, binding='add_array_func')

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(
            data_process_definition_id=add_array_dpd_id,
            inputs=[self.input_dp_id],
            outputs=[dp1_func_output_dp_id],
            argument_map=argument_map,
            out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        # add an attachment object to this DPD to test new SA-21
        import msgpack
        attachment_content = 'foo bar'
        attachment_obj = IonObject(RT.Attachment,
                                   name='test_attachment',
                                   attachment_type=AttachmentType.ASCII,
                                   content_type='text/plain',
                                   content=msgpack.packb(attachment_content))
        att_id = self.rrclient.create_attachment(add_array_dpd_id,
                                                 attachment_obj)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id

    def create_output_data_product(self):
        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(
            name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(RT.DataProduct,
                                      name='data_process1_data_product',
                                      description='output of add array func')

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(
            dp1_output_dp_obj, dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product,
                        dp1_func_output_dp_id)
        # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id,
                                                   PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])

        subscription_id = self.pubsub_client.create_subscription(
            'validator', data_product_ids=[dp1_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s',
                      stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)
            self.granule_verified.set()

        validator = StandaloneStreamSubscriber('validator',
                                               callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        return dp1_func_output_dp_id

    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s",
                  str(data_process_event.__dict__))

        # if data process already created, check origin
        if self.dp_list:
            self.assertIn(data_process_event.origin, self.dp_list)

            # if this is a heartbeat event then 100 granules have been processed
            if 'data process status update.' in data_process_event.description:
                self.heartbeat_event_verified.set()

        else:
            # else check that this is the assign event

            if 'Data process assigned to transform worker' in data_process_event.description:
                self.worker_assigned_event_verified.set()
            elif 'Data process created for data product' in data_process_event.description:
                self.dp_created_event_verified.set()

    def validate_output_granule(self, msg, route, stream_id):
        self.assertIn(stream_id, self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent,
                             callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)

    def validate_transform_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        status_alert_event = args[0]

        np.testing.assert_array_equal(status_alert_event.origin,
                                      self.stream_id)
        np.testing.assert_array_equal(status_alert_event.values,
                                      np.array([self.event_data_process_id]))
        log.debug("DeviceStatusAlertEvent: %s",
                  str(status_alert_event.__dict__))
        self.event_verified.set()

    def start_event_transform_listener(self):
        es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent,
                             callback=self.validate_transform_event)
        es.start()

        self.addCleanup(es.stop)

    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1, 2)
        self.assertEquals(a, 3)
Exemplo n.º 42
0
class CtdbpTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdbpTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub            = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g

    def _create_input_param_dict_for_test(self, parameter_dict_name = ''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext('conductivity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext('pressure', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        temp_ctxt = ParameterContext('temperature', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext('density', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext('salinity', param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            self.addCleanup(self.dataset_management.delete_parameter_context,ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary, pdict_id)

        return pdict_id

    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description='Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(dpd_obj)
        self.addCleanup(self.data_process_management.delete_data_process_definition, dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(parameter_dict_name = 'fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
#        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict)
        log.debug("Got the stream def for parsed input: %s", input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        input_dp_id = self.dataproduct_management.create_data_product(data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict
        )
        self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain = tdom,
            spatial_domain = sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(data_product=L0_stream_dp_obj,
                                                                    stream_definition_id=input_stream_def_dict
                                                                    )
        self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        self.output_products = {'L0_stream' : L0_stream_dp_id}
        out_stream_ids, _ = self.resource_registry.find_objects(L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process( dprocdef_id, [input_dp_id], self.output_products)
        self.addCleanup(self.data_process_management.delete_data_process, dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()
        def subscriber(m,r,s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
            stream_ids=[output_stream_id],
            exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)


        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(stream_definition_id=input_stream_def_dict, length = 5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s", granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)


    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        pass
Exemplo n.º 43
0
    def run_even_odd_transform(self):
        '''
        This example script runs a chained three way transform:
            B
        A <
            C
        Where A is the even_odd transform (generates a stream of even and odd numbers from input)
        and B and C are the basic transforms that receive even and odd input
        '''
        pubsub_cli = PubsubManagementServiceClient(node=self.container.node)
        tms_cli = TransformManagementServiceClient(node=self.container.node)
        procd_cli = ProcessDispatcherServiceClient(node=self.container.node)

        #-------------------------------
        # Process Definition
        #-------------------------------
        # Create the process definition for the basic transform
        process_definition = IonObject(RT.ProcessDefinition,
                                       name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class': 'TransformExample'
        }
        basic_transform_definition_id = procd_cli.create_process_definition(
            process_definition=process_definition)

        # Create The process definition for the TransformEvenOdd
        process_definition = IonObject(RT.ProcessDefinition,
                                       name='basic_transform_definition')
        process_definition.executable = {
            'module': 'ion.processes.data.transforms.transform_example',
            'class': 'TransformEvenOdd'
        }
        evenodd_transform_definition_id = procd_cli.create_process_definition(
            process_definition=process_definition)

        #-------------------------------
        # Streams
        #-------------------------------
        input_stream_id = pubsub_cli.create_stream(name='input_stream',
                                                   original=True)

        even_stream_id = pubsub_cli.create_stream(name='even_stream',
                                                  original=True)

        odd_stream_id = pubsub_cli.create_stream(name='odd_stream',
                                                 original=True)

        #-------------------------------
        # Subscriptions
        #-------------------------------

        query = StreamQuery(stream_ids=[input_stream_id])
        input_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='input_queue')

        query = StreamQuery(stream_ids=[even_stream_id])
        even_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='even_queue')

        query = StreamQuery(stream_ids=[odd_stream_id])
        odd_subscription_id = pubsub_cli.create_subscription(
            query=query, exchange_name='odd_queue')

        #-------------------------------
        # Launch the EvenOdd Transform
        #-------------------------------

        evenodd_id = tms_cli.create_transform(
            name='even_odd',
            in_subscription_id=input_subscription_id,
            out_streams={
                'even': even_stream_id,
                'odd': odd_stream_id
            },
            process_definition_id=evenodd_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(evenodd_id)

        #-------------------------------
        # Launch the Even Processing Transform
        #-------------------------------

        even_transform_id = tms_cli.create_transform(
            name='even_transform',
            in_subscription_id=even_subscription_id,
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(even_transform_id)

        #-------------------------------
        # Launch the Odd Processing Transform
        #-------------------------------

        odd_transform_id = tms_cli.create_transform(
            name='odd_transform',
            in_subscription_id=odd_subscription_id,
            process_definition_id=basic_transform_definition_id,
            configuration={})
        tms_cli.activate_transform(odd_transform_id)

        #-------------------------------
        # Spawn the Streaming Producer
        #-------------------------------

        id_p = self.container.spawn_process(
            'myproducer', 'ion.processes.data.transforms.transform_example',
            'TransformExampleProducer', {
                'process': {
                    'type': 'stream_process',
                    'publish_streams': {
                        'out_stream': input_stream_id
                    }
                },
                'stream_producer': {
                    'interval': 4000
                }
            })
        self.container.proc_manager.procs[id_p].start()
Exemplo n.º 44
0
class VisualizationIntegrationTestHelper(IonIntegrationTestCase):

    def create_ctd_input_stream_and_data_product(self, data_product_name='ctd_parsed'):

        cc = self.container
        assertions = self.assertTrue

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.workflowclient = WorkflowManagementServiceClient(node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node)
        self.vis_client = VisualizationServiceClient(node=self.container.node)


        #-------------------------------
        # Create CTD Parsed as the initial data product
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')


        log.debug('Creating new CDM data product with a stream definition')

        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        parameter_dictionary = craft.create_parameters()
        parameter_dictionary = parameter_dictionary.dump()

        dp_obj = IonObject(RT.DataProduct,
            name=data_product_name,
            description='ctd stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        ctd_parsed_data_product_id = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary)

        log.debug('new ctd_parsed_data_product_id = %s' % ctd_parsed_data_product_id)

        #Only ever need one device for testing purposes.
        instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='SBE37IMDevice')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" )
            instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product_id)

        self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product_id)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        ctd_stream_id = stream_ids[0]

        return ctd_stream_id, ctd_parsed_data_product_id

    def create_data_product(self, dp_name = "", dp_description = ""):

        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        parameter_dictionary = craft.create_parameters()   # this creates a ParameterDictionary object
        parameter_dictionary = parameter_dictionary.dump()  # this returns a python dictionary

        data_prod_obj = IonObject(RT.DataProduct,
            name=dp_name,
            description=dp_description,
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_prod_id = self.create_data_product(data_prod_obj, stream_definition_id, parameter_dictionary)

        return data_prod_id, data_prod_obj

    def start_simple_input_stream_process(self, ctd_stream_id):
        return self.start_input_stream_process(ctd_stream_id)

    def start_sinusoidal_input_stream_process(self, ctd_stream_id):
        return self.start_input_stream_process(ctd_stream_id, 'ion.processes.data.sinusoidal_stream_publisher', 'SinusoidalCtdPublisher')

    def start_input_stream_process(self, ctd_stream_id, module = 'ion.processes.data.ctd_stream_publisher', class_name= 'SimpleCtdPublisher'):


        ###
        ### Start the process for producing the CTD data
        ###
        # process definition for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':module,
            'class':class_name
        }

        ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)

        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
                }
        }
        ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)

        return ctd_sim_pid

    def start_output_stream_and_listen(self, ctd_stream_id, data_product_stream_ids, message_count_per_stream=10):

        cc = self.container
        assertions = self.assertTrue

        ###
        ### Make a subscriber in the test to listen for transformed data
        ###
        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery(data_product_stream_ids),
            exchange_name = 'workflow_test',
            exchange_point = 'science_data',
            name = "test workflow transformations",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, container=cc)

        result = gevent.event.AsyncResult()
        results = []
        message_count = len(data_product_stream_ids) * message_count_per_stream

        def message_received(message, headers):
            # Heads
            results.append(message)
            if len(results) >= message_count:   #Only wait for so many messages - per stream
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='workflow_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id)


        #Start the input stream process
        if ctd_stream_id is not None:
            ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id)

        # Assert that we have received data
        assertions(result.get(timeout=30))

        # stop the flow parse the messages...
        if ctd_stream_id is not None:
            self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data

        self.pubsubclient.deactivate_subscription(subscription_id=salinity_subscription_id)

        subscriber.stop()

        return results


    def validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:
            rdt = RecordDictionaryTool.load_from_granule(message)

            try:
                temp = get_safe(rdt, 'temp')
            #                psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message)
            #                temp = psd.get_values('temperature')
            #                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info( 'temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                #psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                #log.info( psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                #assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = get_safe(rdt, 'salinity')
                #salinity = psd.get_values('salinity')
                log.info( 'salinity=' + str(numpy.nanmin(salinity)))

                # Check to see if salinity has values
                assertions(salinity != None)

                assertions(isinstance(salinity, numpy.ndarray))
                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])


    def validate_data_ingest_retrieve(self, dataset_id):

        assertions = self.assertTrue
        self.data_retriever = DataRetrieverServiceClient(node=self.container.node)

        #validate that data was ingested
        replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(replay_granule)
        salinity = get_safe(rdt, 'salinity')
        assertions(salinity != None)

        #retrieve all the granules from the database and check the values
        replay_granule_all = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(replay_granule_all)
        for k, v in rdt.iteritems():
            if k == 'salinity':
                for val in numpy.nditer(v):
                    assertions(val > 0)

    def create_salinity_data_process_definition(self):

        # Salinity: Data Process Definition

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='ctd_salinity')
        if len(dpd) > 0:
            return dpd[0]

        log.debug("Create data process definition SalinityTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='ctd_salinity',
            description='create a salinity data product',
            module='ion.processes.data.transforms.ctd.ctd_L2_salinity',
            class_name='SalinityTransform',
            process_source='SalinityTransform source code here...')
        try:
            ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Excpetion as ex:
            self.fail("failed to create new SalinityTransform data process definition: %s" %ex)

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def,  name='Salinity')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id )

        return ctd_L2_salinity_dprocdef_id

    def create_salinity_doubler_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='salinity_doubler')
        if len(dpd) > 0:
            return dpd[0]

        # Salinity Doubler: Data Process Definition
        log.debug("Create data process definition SalinityDoublerTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='salinity_doubler',
            description='create a salinity doubler data product',
            module='ion.processes.data.transforms.example_double_salinity',
            class_name='SalinityDoubler',
            process_source='SalinityDoubler source code here...')
        try:
            salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new SalinityDoubler data process definition: %s" %ex)


        # create a stream definition for the data from the salinity Transform
        salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def,  name='SalinityDoubler')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id )

        return salinity_doubler_dprocdef_id


    def create_transform_process(self, data_process_definition_id, data_process_input_dp_id):

        data_process_definition = self.rrclient.read(data_process_definition_id)

        # Find the link between the output Stream Definition resource and the Data Process Definition resource
        stream_ids,_ = self.rrclient.find_objects(data_process_definition._id, PRED.hasStreamDefinition, RT.StreamDefinition,  id_only=True)
        if not stream_ids:
            raise Inconsistent("The data process definition %s is missing an association to an output stream definition" % data_process_definition._id )
        process_output_stream_def_id = stream_ids[0]

        #Concatenate the name of the workflow and data process definition for the name of the data product output
        data_process_name = data_process_definition.name

        # Create the output data product of the transform
        transform_dp_obj = IonObject(RT.DataProduct, name=data_process_name,description=data_process_definition.description)
        transform_dp_id = self.dataproductclient.create_data_product(transform_dp_obj, process_output_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(data_product_id=transform_dp_id)

        #last one out of the for loop is the output product id
        output_data_product_id = transform_dp_id

        # Create the  transform data process
        log.debug("create data_process and start it")
        data_process_id = self.dataprocessclient.create_data_process(data_process_definition._id, [data_process_input_dp_id], {'output':transform_dp_id})
        self.dataprocessclient.activate_data_process(data_process_id)


        #Find the id of the output data stream
        stream_ids, _ = self.rrclient.find_objects(transform_dp_id, PRED.hasStream, None, True)
        if not stream_ids:
            raise Inconsistent("The data process %s is missing an association to an output stream" % data_process_id )

        return data_process_id, output_data_product_id



    def create_google_dt_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='google_dt_transform')
        if len(dpd) > 0:
            return dpd[0]

        # Data Process Definition
        log.debug("Create data process definition GoogleDtTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='google_dt_transform',
            description='Convert data streams to Google DataTables',
            module='ion.processes.data.transforms.viz.google_dt',
            class_name='VizTransformGoogleDT',
            process_source='VizTransformGoogleDT source code here...')
        try:
            procdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new VizTransformGoogleDT data process definition: %s" %ex)


        # create a stream definition for the data from the
        stream_def_id = self.pubsubclient.create_stream_definition(container=VizTransformGoogleDT.outgoing_stream_def,  name='VizTransformGoogleDT')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id )

        return procdef_id


    def validate_google_dt_transform_results(self, results):

        cc = self.container
        assertions = self.assertTrue

        # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases
        if isinstance(results,Granule):
            results =[results]

        for g in results:

            if isinstance(g,Granule):

                tx = TaxyTool.load_from_granule(g)
                rdt = RecordDictionaryTool.load_from_granule(g)

                gdt_data = get_safe(rdt, 'google_dt_components')

                # IF this granule does not contains google dt, skip
                if gdt_data == None:
                    continue

                gdt = gdt_data[0]

                assertions(gdt['viz_product_type'] == 'google_dt' )
                assertions(len(gdt['data_description']) >= 0) # Need to come up with a better check
                assertions(len(gdt['data_content']) >= 0)




    def create_mpl_graphs_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='mpl_graphs_transform')
        if len(dpd) > 0:
            return dpd[0]

        #Data Process Definition
        log.debug("Create data process definition MatplotlibGraphsTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='mpl_graphs_transform',
            description='Convert data streams to Matplotlib graphs',
            module='ion.processes.data.transforms.viz.matplotlib_graphs',
            class_name='VizTransformMatplotlibGraphs',
            process_source='VizTransformMatplotlibGraphs source code here...')
        try:
            procdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new VizTransformMatplotlibGraphs data process definition: %s" %ex)


        # create a stream definition for the data
        stream_def_id = self.pubsubclient.create_stream_definition(container=VizTransformMatplotlibGraphs.outgoing_stream_def,  name='VizTransformMatplotlibGraphs')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id )

        return procdef_id

    def validate_mpl_graphs_transform_results(self, results):

        cc = self.container
        assertions = self.assertTrue

        # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases
        if isinstance(results,Granule):
            results =[results]

        for g in results:
            if isinstance(g,Granule):

                tx = TaxyTool.load_from_granule(g)
                rdt = RecordDictionaryTool.load_from_granule(g)

                graphs = get_safe(rdt, 'matplotlib_graphs')

                if graphs == None:
                    continue

                for graph in graphs[0]:

                    # At this point only dictionaries containing image data should be passed
                    # For some reason non dictionary values are filtering through.
                    if not isinstance(graph, dict):
                        continue

                    assertions(graph['viz_product_type'] == 'matplotlib_graphs' )
                    # check to see if the list (numpy array) contains actual images
                    assertions(imghdr.what(graph['image_name'], h = graph['image_obj']) == 'png')



    def validate_vis_service_google_dt_results(self, results):


        assertions = self.assertTrue

        assertions(results)
        gdt_str = (results.lstrip("google.visualization.Query.setResponse(")).rstrip(")")

        assertions(len(gdt_str) > 0)

        return

    def validate_vis_service_mpl_graphs_results(self, results):

        assertions = self.assertTrue
        assertions(results)

        # check to see if the object passed is a dictionary with a valid image object in it
        image_format = results["content_type"].lstrip("image/")

        assertions(imghdr.what(results['image_name'], h = base64.decodestring(results['image_obj'])) == image_format)

        return
class TestTransformWorker(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')
        # Instantiate a process to represent the test
        process=TransformWorkerTestProcess()

        self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node)
        self.pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceProcessClient(node=self.container.node, process = process)

        self.time_dom, self.spatial_dom = time_series_domain()

        self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup)

        self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10)

    def push_granule(self, data_product_id):
        '''
        Publishes and monitors that the granule arrived
        '''
        datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True)
        dataset_monitor = DatasetMonitor(datasets[0])

        rdt = self.ph.rdt_for_data_product(data_product_id)
        self.ph.fill_parsed_rdt(rdt)
        self.ph.publish_rdt_to_data_product(data_product_id, rdt)

        assert dataset_monitor.wait()
        dataset_monitor.stop()



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process()
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)


        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[output_data_product_id[0]]['parents'][self.input_dp_id]['data_process_definition_id'] == dataprocessdef_id)


        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s', subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )


        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time']         = [0] # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure']     = [2]
            rdt['salinity']     = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))


        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(dataprocessdef_id)
        self.assertIn( 'def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True)
        self.assertTrue(dpd_objs is not None)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_instrumentdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
            temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='test_ctd_device')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345" )
            instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process()

        self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents'])
        self.assertTrue(instDevice_id in output_data_product_provenance[self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice')

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_transform_worker_with_platformdevice(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # Create CTD Parsed as the initial data product
        # create a stream definition for the data from the ctd simulator
        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
            temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id)

        # only ever need one device for testing purposes.
        platform_device_obj,_ = self.rrclient.find_resources(restype=RT.PlatformDevice, name='TestPlatform')
        if platform_device_obj:
            platform_device_id = platform_device_obj[0]._id
        else:
            platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345" )
            platform_device_id = self.imsclient.create_platform_device(platform_device=platform_device_obj)

        self.damsclient.assign_data_product(input_resource_id=platform_device_id, data_product_id=self.input_dp_id)

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process()
        self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id)
        self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id,_ = self.rrclient.find_objects(subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 3)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[output_data_product_id[0]]['parents'])
        self.assertTrue(platform_device_id in output_data_product_provenance[self.input_dp_id]['parents'])
        self.assertTrue(output_data_product_provenance[platform_device_id]['type'] == 'PlatformDevice')


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()


        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated


        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s', subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_bad_argument_map(self):
        self._output_stream_ids = []

        # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during
        # data process creation and that the correct exception is raised for both input and output.

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id =  self.create_output_data_product()


        # Set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
            uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
            )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' )

        # create the data process with invalid argument map
        argument_map = {"arr1": "foo", "arr2": "bar"}
        output_param = "salinity"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                                 outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(ex.message, "Input data product does not contain the parameters defined in argument map")

        # create the data process with invalid output parameter name
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "foo"
        with self.assertRaises(BadRequest) as cm:
            dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                                 outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param)

        ex = cm.exception
        log.debug(' exception raised: %s', cm)
        self.assertEqual(ex.message, "Output data product does not contain the output parameter name provided")


    def create_event_data_processes(self):

        # two data processes using one transform and one DPD
        argument_map= {"a": "salinity"}


        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='validate_salinity_array',
            description='validate_salinity_array',
            function='validate_salinity_array',
            module="ion.processes.data.transforms.test.test_transform_worker",
            arguments=['a'],
            function_type=TransformFunctionType.TRANSFORM
            )

        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='validate_salinity_array',
            description='validate_salinity_array',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array' )

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                             outputs=None, argument_map=argument_map)
        self.damsclient.register_process(dp1_data_process_id)
        self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        return dp1_data_process_id

    def create_data_process(self):

        # two data processes using one transform and one DPD

        dp1_func_output_dp_id =  self.create_output_data_product()
        argument_map = {"arr1": "conductivity", "arr2": "pressure"}
        output_param = "salinity"


        # set up DPD and DP #2 - array add function
        tf_obj = IonObject(RT.TransformFunction,
            name='add_array_func',
            description='adds values in an array',
            function='add_arrays',
            module="ion_example.add_arrays",
            arguments=['arr1', 'arr2'],
            function_type=TransformFunctionType.TRANSFORM,
             uri='http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
            )
        add_array_func_id, rev = self.rrclient.create(tf_obj)

        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='add_arrays',
            description='adds the values of two arrays',
            data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS,
            version_label='1.0a'
            )
        add_array_dpd_id = self.dataprocessclient.create_data_process_definition(data_process_definition=dpd_obj, function_id=add_array_func_id)
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(self.stream_def_id, add_array_dpd_id, binding='add_array_func' )

        # create the data process
        dp1_data_process_id = self.dataprocessclient.create_data_process(data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id],
                                                                             outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param)
        self.damsclient.register_process(dp1_data_process_id)
        #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id)

        # add an attachment object to this DPD to test new SA-21
        import msgpack
        attachment_content = 'foo bar'
        attachment_obj = IonObject( RT.Attachment,
                                name='test_attachment',
                                attachment_type=AttachmentType.ASCII,
                                content_type='text/plain',
                                content=msgpack.packb(attachment_content))
        att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj)
        self.addCleanup(self.rrclient.delete_attachment, att_id)

        return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id


    def create_output_data_product(self):
        dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition(name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id)

        dp1_output_dp_obj = IonObject(  RT.DataProduct,
            name='data_process1_data_product',
            description='output of add array func',
            temporal_domain = self.time_dom.dump(),
            spatial_domain = self.spatial_dom.dump())

        dp1_func_output_dp_id = self.dataproductclient.create_data_product(dp1_output_dp_obj,  dp1_outgoing_stream_id)
        self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id)
        # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger
        stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True)
        self._output_stream_ids.append(stream_ids[0])

        subscription_id = self.pubsub_client.create_subscription('validator', data_product_ids=[dp1_func_output_dp_id])
        self.addCleanup(self.pubsub_client.delete_subscription, subscription_id)

        def on_granule(msg, route, stream_id):
            log.debug('recv_packet stream_id: %s route: %s   msg: %s', stream_id, route, msg)
            self.validate_output_granule(msg, route, stream_id)
            self.granule_verified.set()

        validator = StandaloneStreamSubscriber('validator', callback=on_granule)
        validator.start()
        self.addCleanup(validator.stop)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id)

        return dp1_func_output_dp_id


    def validate_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        data_process_event = args[0]
        log.debug("DataProcessStatusEvent: %s" ,  str(data_process_event.__dict__))

        # if data process already created, check origin
        if self.dp_list:
            self.assertIn( data_process_event.origin, self.dp_list)

            # if this is a heartbeat event then 100 granules have been processed
            if 'data process status update.' in data_process_event.description:
                self.heartbeat_event_verified.set()

        else:
            # else check that this is the assign event

            if 'Data process assigned to transform worker' in data_process_event.description:
                self.worker_assigned_event_verified.set()
            elif 'Data process created for data product' in data_process_event.description:
                self.dp_created_event_verified.set()


    def validate_output_granule(self, msg, route, stream_id):
        self.assertIn( stream_id, self._output_stream_ids)

        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.debug('validate_output_granule  rdt: %s', rdt)
        sal_val = rdt['salinity']
        np.testing.assert_array_equal(sal_val, np.array([3]))

    def start_event_listener(self):

        es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event)
        es.start()

        self.addCleanup(es.stop)

    def validate_transform_event(self, *args, **kwargs):
        """
        This method is a callback function for receiving DataProcessStatusEvent.
        """
        status_alert_event = args[0]

        np.testing.assert_array_equal(status_alert_event.origin, self.stream_id )
        np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id]))
        log.debug("DeviceStatusAlertEvent: %s" ,  str(status_alert_event.__dict__))
        self.event_verified.set()


    def start_event_transform_listener(self):
        es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event)
        es.start()

        self.addCleanup(es.stop)


    def test_download(self):
        egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg'
        egg_path = TransformWorker.download_egg(egg_url)

        import pkg_resources
        pkg_resources.working_set.add_entry(egg_path)

        from ion_example.add_arrays import add_arrays

        a = add_arrays(1,2)
        self.assertEquals(a,3)
Exemplo n.º 46
0
class CtdTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.queue_cleanup = []
        self.exchange_cleanup = []

        self.pubsub = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

        self.exchange_name = 'ctd_L0_all_queue'
        self.exchange_point = 'test_exchange'
        self.i = 0

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

    def test_ctd_L0_all(self):
        '''
        Test that packets are processed by the ctd_L0_all transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='ctd_L0_all', description='For testing ctd_L0_all')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.ctd_L0_all'
        process_definition.executable['class'] = 'ctd_L0_all'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub.create_stream_definition(
            'ctd_all_stream_def', parameter_dictionary_id=pdict_id)

        cond_stream_id, _ = self.pubsub.create_stream(
            'test_cond',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        pres_stream_id, _ = self.pubsub.create_stream(
            'test_pres',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        temp_stream_id, _ = self.pubsub.create_stream(
            'test_temp',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        config.process.publish_streams.conductivity = cond_stream_id
        config.process.publish_streams.pressure = pres_stream_id
        config.process.publish_streams.temperature = temp_stream_id

        # Schedule the process
        pid = self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the conductivity, temperature and pressure granules from
        # the ctd transform
        #---------------------------------------------------------------------------------------------
        ar_cond = gevent.event.AsyncResult()

        def subscriber1(m, r, s):
            ar_cond.set(m)

        sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1)
        self.addCleanup(sub_cond.stop)

        ar_temp = gevent.event.AsyncResult()

        def subscriber2(m, r, s):
            ar_temp.set(m)

        sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2)
        self.addCleanup(sub_temp.stop)

        ar_pres = gevent.event.AsyncResult()

        def subscriber3(m, r, s):
            ar_pres.set(m)

        sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3)
        self.addCleanup(sub_pres.stop)

        sub_cond_id = self.pubsub.create_subscription(
            'subscription_cond',
            stream_ids=[cond_stream_id],
            exchange_name='sub_cond')

        sub_temp_id = self.pubsub.create_subscription(
            'subscription_temp',
            stream_ids=[temp_stream_id],
            exchange_name='sub_temp')

        sub_pres_id = self.pubsub.create_subscription(
            'subscription_pres',
            stream_ids=[pres_stream_id],
            exchange_name='sub_pres')

        self.pubsub.activate_subscription(sub_cond_id)
        self.pubsub.activate_subscription(sub_temp_id)
        self.pubsub.activate_subscription(sub_pres_id)

        self.queue_cleanup.append(sub_cond.xn.queue)
        self.queue_cleanup.append(sub_temp.xn.queue)
        self.queue_cleanup.append(sub_pres.xn.queue)

        sub_cond.start()
        sub_temp.start()
        sub_pres.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result_cond = ar_cond.get(timeout=10)
        result_temp = ar_temp.get(timeout=10)
        result_pres = ar_pres.get(timeout=10)

        out_dict = {}
        out_dict['c'] = RecordDictionaryTool.load_from_granule(
            result_cond)['conductivity']
        out_dict['t'] = RecordDictionaryTool.load_from_granule(
            result_temp)['temp']
        out_dict['p'] = RecordDictionaryTool.load_from_granule(
            result_pres)['pressure']

        # Check that the transform algorithm was successfully executed
        self.check_granule_splitting(publish_granule, out_dict)

    def test_ctd_L1_conductivity(self):
        '''
        Test that packets are processed by the ctd_L1_conductivity transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='CTDL1ConductivityTransform',
            description='For testing CTDL1ConductivityTransform')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.ctd_L1_conductivity'
        process_definition.executable['class'] = 'CTDL1ConductivityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)

        stream_def_id = self.pubsub.create_stream_definition(
            'cond_stream_def', parameter_dictionary_id=pdict_id)
        cond_stream_id, _ = self.pubsub.create_stream(
            'test_conductivity',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        config.process.publish_streams.conductivity = cond_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the conductivity, temperature and pressure granules from
        # the ctd transform
        #---------------------------------------------------------------------------------------------
        ar_cond = gevent.event.AsyncResult()

        def subscriber1(m, r, s):
            ar_cond.set(m)

        sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1)
        self.addCleanup(sub_cond.stop)

        sub_cond_id = self.pubsub.create_subscription(
            'subscription_cond',
            stream_ids=[cond_stream_id],
            exchange_name='sub_cond')

        self.pubsub.activate_subscription(sub_cond_id)

        self.queue_cleanup.append(sub_cond.xn.queue)

        sub_cond.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result_cond = ar_cond.get(timeout=10)
        self.assertTrue(isinstance(result_cond, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result_cond)
        self.assertTrue(rdt.__contains__('conductivity'))

        self.check_cond_algorithm_execution(publish_granule, result_cond)

    def check_cond_algorithm_execution(self, publish_granule,
                                       granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(
            publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(
            granule_from_transform)

        output_data = output_rdt_transform['conductivity']
        input_data = input_rdt_to_transform['conductivity']

        self.assertTrue(
            numpy.array_equal(((input_data / 100000.0) - 0.5), output_data))

    def check_pres_algorithm_execution(self, publish_granule,
                                       granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(
            publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(
            granule_from_transform)

        output_data = output_rdt_transform['pressure']
        input_data = input_rdt_to_transform['pressure']

        self.assertTrue(
            numpy.array_equal((input_data / 100.0) + 0.5, output_data))

    def check_temp_algorithm_execution(self, publish_granule,
                                       granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(
            publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(
            granule_from_transform)

        output_data = output_rdt_transform['temp']
        input_data = input_rdt_to_transform['temp']

        self.assertTrue(
            numpy.array_equal(((input_data / 10000.0) - 10), output_data))

    def check_density_algorithm_execution(self, publish_granule,
                                          granule_from_transform):

        #------------------------------------------------------------------
        # Calculate the correct density from the input granule data
        #------------------------------------------------------------------
        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(
            publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(
            granule_from_transform)

        conductivity = input_rdt_to_transform['conductivity']
        pressure = input_rdt_to_transform['pressure']
        temperature = input_rdt_to_transform['temp']

        longitude = input_rdt_to_transform['lon']
        latitude = input_rdt_to_transform['lat']

        sp = SP_from_cndr(r=conductivity / cte.C3515,
                          t=temperature,
                          p=pressure)
        sa = SA_from_SP(sp, pressure, longitude, latitude)
        dens_value = rho(sa, temperature, pressure)

        out_density = output_rdt_transform['density']

        log.debug("density output from the transform: %s", out_density)
        log.debug("values of density expected from the transform: %s",
                  dens_value)

        numpy.testing.assert_array_almost_equal(out_density,
                                                dens_value,
                                                decimal=3)

#        #-----------------------------------------------------------------------------
#        # Check that the output data from the transform has the correct density values
#        #-----------------------------------------------------------------------------
#        for item in zip(out_density.tolist(), dens_value.tolist()):
#            if isinstance(item[0], int) or isinstance(item[0], float):
#                self.assertEquals(item[0], item[1])

    def check_salinity_algorithm_execution(self, publish_granule,
                                           granule_from_transform):

        #------------------------------------------------------------------
        # Calculate the correct density from the input granule data
        #------------------------------------------------------------------
        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(
            publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(
            granule_from_transform)

        conductivity = input_rdt_to_transform['conductivity']
        pressure = input_rdt_to_transform['pressure']
        temperature = input_rdt_to_transform['temp']

        sal_value = SP_from_cndr(r=conductivity / cte.C3515,
                                 t=temperature,
                                 p=pressure)

        out_salinity = output_rdt_transform['salinity']

        #-----------------------------------------------------------------------------
        # Check that the output data from the transform has the correct density values
        #-----------------------------------------------------------------------------
        self.assertTrue(numpy.array_equal(sal_value, out_salinity))

    def check_granule_splitting(self, publish_granule, out_dict):
        '''
        This checks that the ctd_L0_all transform is able to split out one of the
        granules from the whole granule
        fed into the transform
        '''

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(
            publish_granule)

        in_cond = input_rdt_to_transform['conductivity']
        in_pressure = input_rdt_to_transform['pressure']
        in_temp = input_rdt_to_transform['temp']

        out_cond = out_dict['c']
        out_pres = out_dict['p']
        out_temp = out_dict['t']

        self.assertTrue(numpy.array_equal(in_cond, out_cond))
        self.assertTrue(numpy.array_equal(in_pressure, out_pres))
        self.assertTrue(numpy.array_equal(in_temp, out_temp))

    def test_ctd_L1_pressure(self):
        '''
        Test that packets are processed by the ctd_L1_pressure transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='CTDL1PressureTransform',
            description='For testing CTDL1PressureTransform')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.ctd_L1_pressure'
        process_definition.executable['class'] = 'CTDL1PressureTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)

        stream_def_id = self.pubsub.create_stream_definition(
            'pres_stream_def', parameter_dictionary_id=pdict_id)
        pres_stream_id, _ = self.pubsub.create_stream(
            'test_pressure',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.pressure = pres_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the pressure granules from
        # the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_pres = gevent.event.AsyncResult()

        def subscriber3(m, r, s):
            ar_pres.set(m)

        sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3)
        self.addCleanup(sub_pres.stop)

        sub_pres_id = self.pubsub.create_subscription(
            'subscription_pres',
            stream_ids=[pres_stream_id],
            exchange_name='sub_pres')

        self.pubsub.activate_subscription(sub_pres_id)

        self.queue_cleanup.append(sub_pres.xn.queue)

        sub_pres.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_pres.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('pressure'))

        self.check_pres_algorithm_execution(publish_granule, result)

    def test_ctd_L1_temperature(self):
        '''
        Test that packets are processed by the ctd_L1_temperature transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='CTDL1TemperatureTransform',
            description='For testing CTDL1TemperatureTransform')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.ctd_L1_temperature'
        process_definition.executable['class'] = 'CTDL1TemperatureTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)

        stream_def_id = self.pubsub.create_stream_definition(
            'temp_stream_def', parameter_dictionary_id=pdict_id)
        temp_stream_id, _ = self.pubsub.create_stream(
            'test_temperature',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.temperature = temp_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscriber that will receive the temperature granule from
        # the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_temp = gevent.event.AsyncResult()

        def subscriber2(m, r, s):
            ar_temp.set(m)

        sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2)
        self.addCleanup(sub_temp.stop)

        sub_temp_id = self.pubsub.create_subscription(
            'subscription_temp',
            stream_ids=[temp_stream_id],
            exchange_name='sub_temp')

        self.pubsub.activate_subscription(sub_temp_id)

        self.queue_cleanup.append(sub_temp.xn.queue)

        sub_temp.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_temp.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('temp'))

        self.check_temp_algorithm_execution(publish_granule, result)

    def test_ctd_L2_density(self):
        '''
        Test that packets are processed by the ctd_L1_density transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='DensityTransform',
            description='For testing DensityTransform')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.ctd_L2_density'
        process_definition.executable['class'] = 'DensityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        config.process.interval = 1.0

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)

        stream_def_id = self.pubsub.create_stream_definition(
            'dens_stream_def', parameter_dictionary_id=pdict_id)
        dens_stream_id, _ = self.pubsub.create_stream(
            'test_density',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')
        config.process.publish_streams.density = dens_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create a subscriber that will receive the density granule from the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_dens = gevent.event.AsyncResult()

        def subscriber3(m, r, s):
            ar_dens.set(m)

        sub_dens = StandaloneStreamSubscriber('sub_dens', subscriber3)
        self.addCleanup(sub_dens.stop)

        sub_dens_id = self.pubsub.create_subscription(
            'subscription_dens',
            stream_ids=[dens_stream_id],
            exchange_name='sub_dens')

        self.pubsub.activate_subscription(sub_dens_id)

        self.queue_cleanup.append(sub_dens.xn.queue)

        sub_dens.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_dens.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('density'))

        self.check_density_algorithm_execution(publish_granule, result)

    def test_ctd_L2_salinity(self):
        '''
        Test that packets are processed by the ctd_L1_salinity transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='SalinityTransform',
            description='For testing SalinityTransform')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.ctd_L2_salinity'
        process_definition.executable['class'] = 'SalinityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub.create_stream_definition(
            'sal_stream_def', parameter_dictionary_id=pdict_id)
        sal_stream_id, _ = self.pubsub.create_stream(
            'test_salinity',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.salinity = sal_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create a subscriber that will receive the salinity granule from the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_sal = gevent.event.AsyncResult()

        def subscriber3(m, r, s):
            ar_sal.set(m)

        sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3)
        self.addCleanup(sub_sal.stop)

        sub_sal_id = self.pubsub.create_subscription(
            'subscription_sal',
            stream_ids=[sal_stream_id],
            exchange_name='sub_sal')

        self.pubsub.activate_subscription(sub_sal_id)

        self.queue_cleanup.append(sub_sal.xn.queue)

        sub_sal.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=stream_def_id, length=5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_sal.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('salinity'))

        self.check_salinity_algorithm_execution(publish_granule, result)

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i + length)

        for field in rdt:
            if isinstance(
                    rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array(
                    [random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()
        self.i += length

        return g

    def test_presf_L0_splitter(self):
        '''
        Test that packets are processed by the ctd_L1_pressure transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='Presf L0 Splitter',
            description='For testing Presf L0 Splitter')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.presf_L0_splitter'
        process_definition.executable['class'] = 'PresfL0Splitter'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)

        stream_def_id = self.pubsub.create_stream_definition(
            'pres_stream_def', parameter_dictionary_id=pdict_id)
        pres_stream_id, _ = self.pubsub.create_stream(
            'test_pressure',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.absolute_pressure = pres_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)

#        #---------------------------------------------------------------------------------------------
#        # Create subscribers that will receive the pressure granules from
#        # the ctd transform
#        #---------------------------------------------------------------------------------------------
#
#        ar_pres = gevent.event.AsyncResult()
#        def subscriber3(m,r,s):
#            ar_pres.set(m)
#        sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3)
#        self.addCleanup(sub_pres.stop)
#
#        sub_pres_id = self.pubsub.create_subscription('subscription_pres',
#            stream_ids=[pres_stream_id],
#            exchange_name='sub_pres')
#
#        self.pubsub.activate_subscription(sub_pres_id)
#
#        self.queue_cleanup.append(sub_pres.xn.queue)
#
#        sub_pres.start()
#
#        #------------------------------------------------------------------------------------------------------
#        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
#        #------------------------------------------------------------------------------------------------------
#
#        # Do all the routing stuff for the publishing
#        routing_key = 'stream_id.stream'
#        stream_route = StreamRoute(self.exchange_point, routing_key)
#
#        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
#        xp = self.container.ex_manager.create_xp(self.exchange_point)
#        xn.bind('stream_id.stream', xp)
#
#        pub = StandaloneStreamPublisher('stream_id', stream_route)
#
#        # Build a packet that can be published
#        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)
#
#        # Publish the packet
#        pub.publish(publish_granule)
#
#        #------------------------------------------------------------------------------------------------------
#        # Make assertions about whether the ctd transform executed its algorithm and published the correct
#        # granules
#        #------------------------------------------------------------------------------------------------------
#
#        # Get the granule that is published by the ctd transform post processing
#        result = ar_pres.get(timeout=10)
#        self.assertTrue(isinstance(result, Granule))
#
#        rdt = RecordDictionaryTool.load_from_granule(result)
#        self.assertTrue(rdt.__contains__('pressure'))
#
#        self.check_pres_algorithm_execution(publish_granule, result)
#

    def test_presf_L1(self):
        '''
        Test that packets are processed by the ctd_L1_pressure transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='PresfL1Transform',
            description='For testing PresfL1Transform')
        process_definition.executable[
            'module'] = 'ion.processes.data.transforms.ctd.presf_L1'
        process_definition.executable['class'] = 'PresfL1Transform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(
            process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)

        stream_def_id = self.pubsub.create_stream_definition(
            'pres_stream_def', parameter_dictionary_id=pdict_id)
        pres_stream_id, _ = self.pubsub.create_stream(
            'test_pressure',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.seafloor_pressure = pres_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(
            process_definition_id=ctd_transform_proc_def_id,
            configuration=config)
class TestInstrumentAgent(IonIntegrationTestCase):
    """
    Test cases for instrument agent class. Functions in this class provide
    instrument agent integration tests and provide a tutorial on use of
    the agent setup and interface.
    """
    def customCleanUp(self):
        log.info(
            'CUSTOM CLEAN UP ******************************************************************************'
        )

    def setUp(self):
        """
        Setup the test environment to exersice use of instrumet agent, including:
        * define driver_config parameters.
        * create container with required services and container client.
        * create publication stream ids for each driver data stream.
        * create stream_config parameters.
        * create and activate subscriptions for agent data streams.
        * spawn instrument agent process and create agent client.
        * add cleanup functions to cause subscribers to get stopped.
        """

        #       params = { ('CTD', 'TA2'): -1.9434316e-05,
        #       ('CTD', 'PTCA1'): 1.3206866,
        #       ('CTD', 'TCALDATE'): [8, 11, 2006] }

        #       for tup in params:
        #           print tup

        self.addCleanup(self.customCleanUp)
        # Names of agent data streams to be configured.
        parsed_stream_name = 'ctd_parsed'
        raw_stream_name = 'ctd_raw'

        # Driver configuration.
        #Simulator

        self.driver_config = {
            'svr_addr': 'localhost',
            'cmd_port': 5556,
            'evt_port': 5557,
            'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'comms_config': {
                SBE37Channel.CTD: {
                    'method': 'ethernet',
                    'device_addr': CFG.device.sbe37.host,
                    'device_port': CFG.device.sbe37.port,
                    'server_addr': 'localhost',
                    'server_port': 8888
                }
            }
        }

        #Hardware
        '''
        self.driver_config = {
            'svr_addr': 'localhost',
            'cmd_port': 5556,
            'evt_port': 5557,
            'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
            'dvr_cls': 'SBE37Driver',
            'comms_config': {
                SBE37Channel.CTD: {
                    'method':'ethernet',
                    'device_addr': '137.110.112.119',
                    'device_port': 4001,
                    'server_addr': 'localhost',
                    'server_port': 8888
                }
            }
        }
        '''

        # Start container.
        self._start_container()

        # Establish endpoint with container (used in tests below)
        self._container_client = ContainerAgentClient(node=self.container.node,
                                                      name=self.container.name)

        # Bring up services in a deploy file (no need to message)
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        # Create a pubsub client to create streams.
        self._pubsub_client = PubsubManagementServiceClient(
            node=self.container.node)

        # A callback for processing subscribed-to data.
        def consume(message, headers):
            log.info('Subscriber received message: %s', str(message))

        # Create a stream subscriber registrar to create subscribers.
        subscriber_registrar = StreamSubscriberRegistrar(
            process=self.container, node=self.container.node)

        self.subs = []

        # Create streams for each stream named in driver.
        self.stream_config = {}
        for (stream_name, val) in PACKET_CONFIG.iteritems():
            stream_def = ctd_stream_definition(stream_id=None)
            stream_def_id = self._pubsub_client.create_stream_definition(
                container=stream_def)
            stream_id = self._pubsub_client.create_stream(
                name=stream_name,
                stream_definition_id=stream_def_id,
                original=True,
                encoding='ION R2')
            self.stream_config[stream_name] = stream_id

            # Create subscriptions for each stream.
            exchange_name = '%s_queue' % stream_name
            sub = subscriber_registrar.create_subscriber(
                exchange_name=exchange_name, callback=consume)
            sub.start()
            query = StreamQuery(stream_ids=[stream_id])
            sub_id = self._pubsub_client.create_subscription(\
                                query=query, exchange_name=exchange_name)
            self._pubsub_client.activate_subscription(sub_id)
            self.subs.append(sub)

        # Add cleanup function to stop subscribers.
        def stop_subscriber(sub_list):
            for sub in sub_list:
                sub.stop()

        self.addCleanup(stop_subscriber, self.subs)

        # Create agent config.

        self.agent_resource_id = '123xyz'

        self.agent_config = {
            'driver_config': self.driver_config,
            'stream_config': self.stream_config,
            'agent': {
                'resource_id': self.agent_resource_id
            }
        }

        # Launch an instrument agent process.
        self._ia_name = 'agent007'
        self._ia_mod = 'ion.agents.instrument.instrument_agent'
        self._ia_class = 'InstrumentAgent'
        self._ia_pid = self._container_client.spawn_process(
            name=self._ia_name,
            module=self._ia_mod,
            cls=self._ia_class,
            config=self.agent_config)

        log.info('got pid=%s', str(self._ia_pid))

        self._ia_client = None
        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(self.agent_resource_id,
                                              process=FakeProcess())
        log.info('got ia client %s', str(self._ia_client))

    def test_initialize(self):
        """
        Test agent initialize command. This causes creation of
        driver process and transition to inactive.
        """

        cmd = AgentCommand(command='initialize')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        log.info(reply)

        time.sleep(2)

        caps = gw_agent_get_capabilities(self.agent_resource_id)
        log.info('Capabilities: %s', str(caps))

        time.sleep(2)

        cmd = AgentCommand(command='reset')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        log.info(reply)

    def test_direct_access(self):
        """
        Test agent direct_access command. This causes creation of
        driver process and transition to direct access.
        """
        print("test initing")
        cmd = AgentCommand(command='initialize')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test go_active")
        cmd = AgentCommand(command='go_active')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test run")
        cmd = AgentCommand(command='run')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test go_da")
        cmd = AgentCommand(command='go_direct_access')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        print("reply=" + str(reply))
        time.sleep(2)

        print("test go_ob")
        cmd = AgentCommand(command='go_observatory')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test go_inactive")
        cmd = AgentCommand(command='go_inactive')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        print("test reset")
        cmd = AgentCommand(command='reset')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

    def test_go_active(self):
        """
        Test agent go_active command. This causes a driver process to
        launch a connection broker, connect to device hardware, determine
        entry state of driver and initialize driver parameters.
        """
        cmd = AgentCommand(command='initialize')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        cmd = AgentCommand(command='go_active')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        cmd = AgentCommand(command='go_inactive')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)

        cmd = AgentCommand(command='reset')
        reply = gw_agent_execute_agent(self.agent_resource_id, cmd)
        time.sleep(2)
Exemplo n.º 48
0
class EventManagementIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(EventManagementIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.event_management = EventManagementServiceClient()
        self.rrc = ResourceRegistryServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.pubsub = PubsubManagementServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self.queue_cleanup = []
        self.exchange_cleanup = []

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()

    def test_create_read_update_delete_event_type(self):
        """
        Test that the CRUD method for event types work correctly
        """

        event_type = EventType(name="an event type")
        event_type.origin = 'instrument_1'

        # create
        event_type_id = self.event_management.create_event_type(event_type)
        self.assertIsNotNone(event_type_id)

        # read
        read_event_type = self.event_management.read_event_type(event_type_id)
        self.assertEquals(read_event_type.name, event_type.name)
        self.assertEquals(read_event_type.origin, event_type.origin)

        #update
        read_event_type.origin = 'instrument_2'
        read_event_type.producer = 'producer'

        self.event_management.update_event_type(read_event_type)
        updated_event_type = self.event_management.read_event_type(event_type_id)

        self.assertEquals(updated_event_type.origin, 'instrument_2')
        self.assertEquals(updated_event_type.producer, 'producer')

        # delete
        self.event_management.delete_event_type(event_type_id)

        with self.assertRaises(NotFound):
            self.event_management.read_event_type(event_type_id)

    def test_create_read_update_delete_event_process_definition(self):
        """
        Test that the CRUD methods for the event process definitions work correctly
        """

        # Create
        module = 'ion.processes.data.transforms.event_alert_transform'
        class_name = 'EventAlertTransform'
        procdef_id = self.event_management.create_event_process_definition(version='ver_1',
                                                                            module=module,
                                                                            class_name=class_name,
                                                                            uri='http://hare.com',
                                                                            arguments=['arg1', 'arg2'],
                                                                            event_types=['ExampleDetectableEvent', 'type_2'],
                                                                            sub_types=['sub_type_1'])
        # Read
        read_process_def = self.event_management.read_event_process_definition(procdef_id)
        self.assertEquals(read_process_def.executable['module'], module)
        self.assertEquals(read_process_def.executable['class'], class_name)

        # Update
        self.event_management.update_event_process_definition(event_process_definition_id=procdef_id,
                                                                class_name='StreamAlertTransform',
                                                                arguments=['arg3', 'arg4'],
                                                                event_types=['event_type_new'])

        updated_event_process_def = self.event_management.read_event_process_definition(procdef_id)
        self.assertEquals(updated_event_process_def.executable['class'], 'StreamAlertTransform')
        self.assertEquals(updated_event_process_def.arguments, ['arg3', 'arg4'])
        definition = updated_event_process_def.definition
        self.assertEquals(updated_event_process_def.definition.event_types, ['event_type_new'])

        # Delete
        self.event_management.delete_event_process_definition(procdef_id)

        with self.assertRaises(NotFound):
            self.event_management.read_event_process_definition(procdef_id)

    def test_event_in_stream_out_transform(self):
        """
        Test the event-in/stream-out transform
        """

        stream_id, _ = self.pubsub.create_stream('test_stream', exchange_point='science_data')
        self.exchange_cleanup.append('science_data')

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='EventToStreamTransform',
            description='For testing an event-in/stream-out transform')
        process_definition.executable['module']= 'ion.processes.data.transforms.event_in_stream_out_transform'
        process_definition.executable['class'] = 'EventToStreamTransform'
        proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = 'test_queue'
        config.process.exchange_point = 'science_data'
        config.process.publish_streams.output = stream_id
        config.process.event_type = 'ExampleDetectableEvent'
        config.process.variables = ['voltage', 'temperature' ]

        # Schedule the process
        pid = self.process_dispatcher.schedule_process(process_definition_id=proc_def_id, configuration=config)
        self.addCleanup(self.process_dispatcher.cancel_process,pid)

        #---------------------------------------------------------------------------------------------
        # Create a subscriber for testing
        #---------------------------------------------------------------------------------------------

        ar_cond = gevent.event.AsyncResult()
        def subscriber_callback(m, r, s):
            ar_cond.set(m)
        sub = StandaloneStreamSubscriber('sub', subscriber_callback)
        self.addCleanup(sub.stop)
        sub_id = self.pubsub.create_subscription('subscription_cond',
            stream_ids=[stream_id],
            exchange_name='sub')
        self.pubsub.activate_subscription(sub_id)
        self.queue_cleanup.append(sub.xn.queue)
        sub.start()

        gevent.sleep(4)

        #---------------------------------------------------------------------------------------------
        # Publish an event. The transform has been configured to receive this event
        #---------------------------------------------------------------------------------------------

        event_publisher = EventPublisher("ExampleDetectableEvent")
        event_publisher.publish_event(origin = 'fake_origin', voltage = '5', temperature = '273')

        # Assert that the transform processed the event and published data on the output stream
        result_cond = ar_cond.get(timeout=10)
        self.assertTrue(result_cond)

    def test_create_read_delete_event_process(self):
        """
        Test that the CRUD methods for the event processes work correctly
        """

        #---------------------------------------------------------------------------------------------
        # Create a process definition
        #---------------------------------------------------------------------------------------------

        # Create
        module = 'ion.processes.data.transforms.event_alert_transform'
        class_name = 'EventAlertTransform'
        procdef_id = self.event_management.create_event_process_definition(version='ver_1',
            module=module,
            class_name=class_name,
            uri='http://hare.com',
            arguments=['arg1', 'arg2'],
            event_types=['ExampleDetectableEvent', 'type_2'],
            sub_types=['sub_type_1'])

        # Read
        read_process_def = self.event_management.read_event_process_definition(procdef_id)
        self.assertEquals(read_process_def.arguments, ['arg1', 'arg2'])

        #---------------------------------------------------------------------------------------------
        # Use the process definition to create a process
        #---------------------------------------------------------------------------------------------

        # Create a stream
        param_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)

        stream_def_id = self.pubsub.create_stream_definition('cond_stream_def', parameter_dictionary_id=param_dict_id)

        tdom, sdom = time_series_domain()
        tdom, sdom = tdom.dump(), sdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)

        # Create a data product
        data_product_id = self.data_product_management.create_data_product(data_product=dp_obj, stream_definition_id=stream_def_id)

        output_products = {}
        output_products['conductivity'] = data_product_id

        # Create an event process
        event_process_id = self.event_management.create_event_process(  process_definition_id=procdef_id,
                                                                        event_types=['ExampleDetectableEvent','DetectionEvent'],
                                                                        sub_types=['s1', 's2'],
                                                                        origins=['or_1', 'or_2'],
                                                                        origin_types=['or_t1', 'or_t2'],
                                                                        out_data_products = output_products)
        self.addCleanup(self.process_dispatcher.cancel_process, event_process_id)

        #---------------------------------------------------------------------------------------------
        # Read the event process object and make assertions
        #---------------------------------------------------------------------------------------------                                                                out_data_products={'conductivity': data_product_id})
        event_process_obj = self.event_management.read_event_process(event_process_id=event_process_id)

        # Get the stream associated with the data product for the sake of making assertions
        stream_ids, _ = self.rrc.find_objects(data_product_id, PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]

        # Assertions!
        self.assertEquals(event_process_obj.detail.output_streams['conductivity'], stream_id)
        self.assertEquals(event_process_obj.detail.event_types, ['ExampleDetectableEvent', 'DetectionEvent'])
        self.assertEquals(event_process_obj.detail.sub_types, ['s1', 's2'])
        self.assertEquals(event_process_obj.detail.origins, ['or_1', 'or_2'])
        self.assertEquals(event_process_obj.detail.origin_types, ['or_t1', 'or_t2'])
Exemplo n.º 49
0
class CtdbpTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdbpTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.pubsub = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.dataproduct_management = DataProductManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()

        # This is for the time values inside the packets going into the transform
        self.i = 0

        # Cleanup of queue created by the subscriber

    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i + length)

        for field in rdt:
            if isinstance(
                    rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array(
                    [random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()
        self.i += length

        return g

    def _create_input_param_dict_for_test(self, parameter_dict_name=''):

        pdict = ParameterDictionary()

        t_ctxt = ParameterContext(
            'time',
            param_type=QuantityType(value_encoding=numpy.dtype('float64')))
        t_ctxt.axis = AxisTypeEnum.TIME
        t_ctxt.uom = 'seconds since 01-01-1900'
        pdict.add_context(t_ctxt)

        cond_ctxt = ParameterContext(
            'conductivity',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        cond_ctxt.uom = ''
        pdict.add_context(cond_ctxt)

        pres_ctxt = ParameterContext(
            'pressure',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        pres_ctxt.uom = ''
        pdict.add_context(pres_ctxt)

        temp_ctxt = ParameterContext(
            'temperature',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        temp_ctxt.uom = ''
        pdict.add_context(temp_ctxt)

        dens_ctxt = ParameterContext(
            'density',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        dens_ctxt.uom = ''
        pdict.add_context(dens_ctxt)

        sal_ctxt = ParameterContext(
            'salinity',
            param_type=QuantityType(value_encoding=numpy.dtype('float32')))
        sal_ctxt.uom = ''
        pdict.add_context(sal_ctxt)

        #create temp streamdef so the data product can create the stream
        pc_list = []
        for pc_k, pc in pdict.iteritems():
            ctxt_id = self.dataset_management.create_parameter_context(
                pc_k, pc[1].dump())
            pc_list.append(ctxt_id)
            self.addCleanup(self.dataset_management.delete_parameter_context,
                            ctxt_id)

        pdict_id = self.dataset_management.create_parameter_dictionary(
            parameter_dict_name, pc_list)
        self.addCleanup(self.dataset_management.delete_parameter_dictionary,
                        pdict_id)

        return pdict_id

    def test_ctdbp_L0_all(self):
        """
        Test packets processed by the ctdbp_L0_all transform
        """

        #----------- Data Process Definition --------------------------------

        dpd_obj = IonObject(
            RT.DataProcessDefinition,
            name='CTDBP_L0_all',
            description=
            'Take parsed stream and put the C, T and P into three separate L0 streams.',
            module='ion.processes.data.transforms.ctdbp.ctdbp_L0',
            class_name='CTDBP_L0_all')

        dprocdef_id = self.data_process_management.create_data_process_definition(
            dpd_obj)
        self.addCleanup(
            self.data_process_management.delete_data_process_definition,
            dprocdef_id)

        log.debug("created data process definition: id = %s", dprocdef_id)

        #----------- Data Products --------------------------------

        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        input_param_dict = self._create_input_param_dict_for_test(
            parameter_dict_name='fictitious_ctdp_param_dict')

        # Get the stream definition for the stream using the parameter dictionary
        #        input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True)
        input_stream_def_dict = self.pubsub.create_stream_definition(
            name='parsed', parameter_dictionary_id=input_param_dict)
        self.addCleanup(self.pubsub.delete_stream_definition,
                        input_stream_def_dict)

        log.debug("Got the parsed parameter dictionary: id: %s",
                  input_param_dict)
        log.debug("Got the stream def for parsed input: %s",
                  input_stream_def_dict)

        # Input data product
        parsed_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='parsed_stream',
            description='Parsed stream input to CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        input_dp_id = self.dataproduct_management.create_data_product(
            data_product=parsed_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        input_dp_id)

        # output data product
        L0_stream_dp_obj = IonObject(
            RT.DataProduct,
            name='L0_stream',
            description='L0_stream output of CTBP L0 transform',
            temporal_domain=tdom,
            spatial_domain=sdom)

        L0_stream_dp_id = self.dataproduct_management.create_data_product(
            data_product=L0_stream_dp_obj,
            stream_definition_id=input_stream_def_dict)
        self.addCleanup(self.dataproduct_management.delete_data_product,
                        L0_stream_dp_id)

        # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into
        # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process
        out_stream_ids, _ = self.resource_registry.find_objects(
            L0_stream_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(out_stream_ids))
        output_stream_id = out_stream_ids[0]

        dproc_id = self.data_process_management.create_data_process(
            data_process_definition_id=dprocdef_id,
            in_data_product_ids=[input_dp_id],
            out_data_product_ids=[L0_stream_dp_id],
            configuration=None)

        self.addCleanup(self.data_process_management.delete_data_process,
                        dproc_id)

        log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id)

        # Activate the data process
        self.data_process_management.activate_data_process(dproc_id)
        self.addCleanup(self.data_process_management.deactivate_data_process,
                        dproc_id)

        #----------- Find the stream that is associated with the input data product when it was created by create_data_product() --------------------------------

        stream_ids, _ = self.resource_registry.find_objects(
            input_dp_id, PRED.hasStream, RT.Stream, True)
        self.assertTrue(len(stream_ids))

        input_stream_id = stream_ids[0]
        stream_route = self.pubsub.read_stream_route(input_stream_id)

        log.debug("The input stream for the L0 transform: %s", input_stream_id)

        #----------- Create a subscriber that will listen to the transform's output --------------------------------

        ar = gevent.event.AsyncResult()

        def subscriber(m, r, s):
            ar.set(m)

        sub = StandaloneStreamSubscriber(exchange_name='sub',
                                         callback=subscriber)

        sub_id = self.pubsub.create_subscription('subscriber_to_transform',
                                                 stream_ids=[output_stream_id],
                                                 exchange_name='sub')
        self.addCleanup(self.pubsub.delete_subscription, sub_id)

        self.pubsub.activate_subscription(sub_id)
        self.addCleanup(self.pubsub.deactivate_subscription, sub_id)

        sub.start()
        self.addCleanup(sub.stop)

        #----------- Publish on that stream so that the transform can receive it --------------------------------

        pub = StandaloneStreamPublisher(input_stream_id, stream_route)
        publish_granule = self._get_new_ctd_packet(
            stream_definition_id=input_stream_def_dict, length=5)

        pub.publish(publish_granule)

        log.debug("Published the following granule: %s", publish_granule)

        granule_from_transform = ar.get(timeout=20)

        log.debug("Got the following granule from the transform: %s",
                  granule_from_transform)

        # Check that the granule published by the L0 transform has the right properties
        self._check_granule_from_transform(granule_from_transform)

    def _check_granule_from_transform(self, granule):
        """
        An internal method to check if a granule has the right properties
        """

        pass
Exemplo n.º 50
0
class TestTransformPrime(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Because hey why not?!

        self.dataset_management      = DatasetManagementServiceClient()
        self.data_process_management = DataProcessManagementServiceClient()
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()


    def setup_streams(self):
        in_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L0_test', id_only=True)
        out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_L1_test', id_only=True)

        in_stream_def_id = self.pubsub_management.create_stream_definition('L0 SBE37', parameter_dictionary_id=in_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id)
        out_stream_def_id = self.pubsub_management.create_stream_definition('L1 SBE37', parameter_dictionary_id=out_pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream('L0 input', stream_definition_id=in_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)
        out_stream_id, out_route = self.pubsub_management.create_stream('L0 output', stream_definition_id=out_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]

    def setup_advanced_streams(self):
        in_pdict_id = out_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('sbe37_LC_TEST', id_only=True)
        in_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_instrument', parameter_dictionary_id=in_pdict_id, available_fields=['time', 'TEMPWAT_L0', 'CONDWAT_L0', 'PRESWAT_L0', 'lat', 'lon'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, in_stream_def_id)

        out_stream_def_id = self.pubsub_management.create_stream_definition('sbe37_l2', parameter_dictionary_id=out_pdict_id, available_fields=['time', 'rho','PRACSAL_L2'])
        self.addCleanup(self.pubsub_management.delete_stream_definition, out_stream_def_id)

        in_stream_id, in_route = self.pubsub_management.create_stream('instrument stream', stream_definition_id=in_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, in_stream_id)

        out_stream_id, out_route = self.pubsub_management.create_stream('data product stream', stream_definition_id=out_stream_def_id, exchange_point='test')
        self.addCleanup(self.pubsub_management.delete_stream, out_stream_id)

        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]


    def preload(self):
        config = DotDict()
        config.op = 'load'
        config.scenario = 'BASE,LC_TEST'
        config.categories = 'ParameterFunctions,ParameterDefs,ParameterDictionary'
        config.path = 'res/preload/r2_ioc'
        
        self.container.spawn_process('preload','ion.processes.bootstrap.ion_loader','IONLoader', config)

    def setup_advanced_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_advanced_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)]= None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id:out_stream_id}

        sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id)
        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]


    def setup_transform(self):
        self.preload()
        queue_name = 'transform_prime'

        stream_info = self.setup_streams()
        in_stream_id, in_stream_def_id = stream_info[0]
        out_stream_id, out_stream_def_id = stream_info[1]

        routes = {}
        routes[(in_stream_id, out_stream_id)]= None

        config = DotDict()

        config.process.queue_name = queue_name
        config.process.routes = routes
        config.process.publish_streams = {out_stream_id:out_stream_id}

        sub_id = self.pubsub_management.create_subscription(queue_name, stream_ids=[in_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)

        self.container.spawn_process('transform_prime', 'ion.processes.data.transforms.transform_prime','TransformPrime', config)

        listen_sub_id = self.pubsub_management.create_subscription('listener', stream_ids=[out_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, listen_sub_id)

        self.pubsub_management.activate_subscription(listen_sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, listen_sub_id)
        return [(in_stream_id, in_stream_def_id), (out_stream_id, out_stream_def_id)]

    def setup_validator(self, validator):
        listener = StandaloneStreamSubscriber('listener', validator)
        listener.start()
        self.addCleanup(listener.stop)

    
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_execute_advanced_transform(self):
        # Runs a transform across L0-L2 with stream definitions including available fields
        streams = self.setup_advanced_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_defs_id = streams[1]

        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['rho'], np.array([1001.0055034])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_execute_transform(self):
        streams = self.setup_transform()
        in_stream_id, in_stream_def_id = streams[0]
        out_stream_id, out_stream_def_id = streams[1]


        validation_event = Event()
        def validator(msg, route, stream_id):
            rdt = RecordDictionaryTool.load_from_granule(msg)
            if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])):
                return
            if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])):
                return
            if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])):
                return
            validation_event.set()

        self.setup_validator(validator)

        in_route = self.pubsub_management.read_stream_route(in_stream_id)
        publisher = StandaloneStreamPublisher(in_stream_id, in_route)

        outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id)
        outbound_rdt['time'] = [0]
        outbound_rdt['TEMPWAT_L0'] = [280000]
        outbound_rdt['CONDWAT_L0'] = [100000]
        outbound_rdt['PRESWAT_L0'] = [2789]

        outbound_rdt['lat'] = [45]
        outbound_rdt['lon'] = [-71]

        outbound_granule = outbound_rdt.to_granule()

        publisher.publish(outbound_granule)

        self.assertTrue(validation_event.wait(2))
Exemplo n.º 51
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0
        self.cci                  = 0

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset = Dataset('test_dataset_%i'%self.i)
        dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id)
        self.addCleanup(self.dataset_management.delete_dataset, dataset_id)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''
        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})
        self.addCleanup(self.container.terminate_process, pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        dataset_id = self.create_dataset(pdict_id)

        # self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)


    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


    def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.wait())

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   19 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2))
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)


    @unittest.skip('deprecated')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.wait())


    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id,dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        # Publish initial granule
        # the first one has the sparse value set inside it, sets lat to 45 and lon to -71
        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = ['']
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        # Check the values and make sure they're correct
        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        np.testing.assert_allclose(rdt_out['temp'], rdt['temp'])
        np.testing.assert_allclose(rdt_out['lat'], np.array([45]))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71]))

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


        # We're going to change the lat/lon
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = time.time() + 2208988800
        rdt['lat'] = [46]
        rdt['lon'] = [-73]
        
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()


        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        
        for i in xrange(9):
            ntp_now = time.time() + 2208988800
            rdt['time'] = [ntp_now]
            rdt['internal_timestamp'] = [ntp_now]
            rdt['temp'] = [300000]
            rdt['preferred_timestamp'] = ['driver_timestamp']
            rdt['port_timestamp'] = [ntp_now]
            rdt['quality_flag'] = [None]
            rdt['conductivity'] = [4341400]
            rdt['driver_timestamp'] = [ntp_now]
            rdt['pressure'] = [256.8]

            publisher.publish(rdt.to_granule())
            self.assertTrue(dataset_monitor.wait())
            dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10))
        np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
Exemplo n.º 52
0
class TransformPrototypeIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(TransformPrototypeIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.rrc = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.ssclient = SchedulerServiceClient()
        self.event_publisher = EventPublisher()
        self.user_notification = UserNotificationServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()

        self.exchange_names = []
        self.exchange_points = []

    def tearDown(self):

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def now_utc(self):
        return time.time()

    def _create_interval_timer_with_end_time(self,timer_interval= None, end_time = None ):
        '''
        A convenience method to set up an interval timer with an end time
        '''
        self.timer_received_time = 0
        self.timer_interval = timer_interval

        start_time = self.now_utc()
        if not end_time:
            end_time = start_time + 2 * timer_interval + 1

        log.debug("got the end time here!! %s" % end_time)

        # Set up the interval timer. The scheduler will publish event with origin set as "Interval Timer"
        sid = self.ssclient.create_interval_timer(start_time="now" ,
            interval=self.timer_interval,
            end_time=end_time,
            event_origin="Interval Timer",
            event_subtype="")

        def cleanup_timer(scheduler, schedule_id):
            """
            Do a friendly cancel of the scheduled event.
            If it fails, it's ok.
            """
            try:
                scheduler.cancel_timer(schedule_id)
            except:
                log.warn("Couldn't cancel")

        self.addCleanup(cleanup_timer, self.ssclient, sid)

        return sid

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_event_processing(self):
        '''
        Test that events are processed by the transforms according to a provided algorithm
        '''


        #-------------------------------------------------------------------------------------
        # Set up the scheduler for an interval timer with an end time
        #-------------------------------------------------------------------------------------
        id = self._create_interval_timer_with_end_time(timer_interval=2)
        self.assertIsNotNone(id)

        #-------------------------------------------------------------------------------------
        # Create an event alert transform....
        # The configuration for the Event Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        configuration = {
            'process':{
                'event_type': 'ResourceEvent',
                'timer_origin': 'Interval Timer',
                'instrument_origin': 'My_favorite_instrument'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process(  name= 'event_alert_transform',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='EventAlertTransform',
            configuration= configuration)
        self.addCleanup(self.process_dispatcher.cancel_process, pid)
        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish events and make assertions about alerts
        #-------------------------------------------------------------------------------------

        queue = gevent.queue.Queue()

        def event_received(message, headers):
            queue.put(message)

        event_subscriber = EventSubscriber( origin="EventAlertTransform",
            event_type="DeviceEvent",
            callback=event_received)

        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        # publish event twice

        for i in xrange(5):
            self.event_publisher.publish_event(    event_type = 'ExampleDetectableEvent',
                origin = "My_favorite_instrument",
                voltage = 5,
                telemetry = 10,
                temperature = 20)
            gevent.sleep(0.1)
            self.assertTrue(queue.empty())



        #publish event the third time but after a time interval larger than 2 seconds
        gevent.sleep(5)

        #-------------------------------------------------------------------------------------
        # Make assertions about the alert event published by the EventAlertTransform
        #-------------------------------------------------------------------------------------

        event = queue.get(timeout=10)

        log.debug("Alarm event received from the EventAertTransform %s" % event)

        self.assertEquals(event.type_, "DeviceEvent")
        self.assertEquals(event.origin, "EventAlertTransform")

        #------------------------------------------------------------------------------------------------
        # Now clear the event queue being populated by alarm events and publish normally once again
        #------------------------------------------------------------------------------------------------

        queue.queue.clear()

        for i in xrange(5):
            self.event_publisher.publish_event(    event_type = 'ExampleDetectableEvent',
                origin = "My_favorite_instrument",
                voltage = 5,
                telemetry = 10,
                temperature = 20)
            gevent.sleep(0.1)
            self.assertTrue(queue.empty())

        log.debug("This completes the requirement that the EventAlertTransform publishes \
                    an alarm event when it does not hear from the instrument for some time.")


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_stream_processing(self):
        #--------------------------------------------------------------------------------
        #Test that streams are processed by the transforms according to a provided algorithm
        #--------------------------------------------------------------------------------

        #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH,
        #todo(contd) and if the word VALUE=<number> exists and that number is less than something

        #todo later on we are going to use complex algorithms to make this prototype powerful

        #-------------------------------------------------------------------------------------
        # Start a subscriber to listen for an alert event from the Stream Alert Transform
        #-------------------------------------------------------------------------------------

        queue = gevent.queue.Queue()

        def event_received(message, headers):
            queue.put(message)

        event_subscriber = EventSubscriber( origin="StreamAlertTransform",
            event_type="DeviceEvent",
            callback=event_received)

        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        #-------------------------------------------------------------------------------------
        # The configuration for the Stream Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        config = {
            'process':{
                'queue_name': 'a_queue',
                'value': 10,
                'event_type':'DeviceEvent'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='StreamAlertTransform',
            configuration= config)
        self.addCleanup(self.process_dispatcher.cancel_process, pid)
        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish streams and make assertions about alerts
        #-------------------------------------------------------------------------------------
        exchange_name = 'a_queue'
        exchange_point = 'test_exchange'
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(exchange_name)
        xp = self.container.ex_manager.create_xp(exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\
                  " will trigger an alert event from the StreamAlertTransform because the value provided is "\
                  "less than 10 that was passed in through the config."

        pub.publish(message)

        event = queue.get(timeout=10)
        self.assertEquals(event.type_, "DeviceEvent")
        self.assertEquals(event.origin, "StreamAlertTransform")

    #        self.purge_queues(exchange_name)

    #    def purge_queues(self, exchange_name):
    #        xn = self.container.ex_manager.create_xn_queue(exchange_name)
    #        xn.purge()

    @staticmethod
    def create_process(name= '', module = '', class_name = '', configuration = None):
        '''
        A helper method to create a process
        '''

        producer_definition = ProcessDefinition(name=name)
        producer_definition.executable = {
            'module':module,
            'class': class_name
        }

        process_dispatcher = ProcessDispatcherServiceClient()

        procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)
        pid = process_dispatcher.schedule_process(process_definition_id= procdef_id, configuration=configuration)

        return pid

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_demo_stream_granules_processing(self):
        """
        Test that the Demo Stream Alert Transform is functioning. The transform coordinates with the scheduler.
        It is configured to listen to a source that publishes granules. It publishes a DeviceStatusEvent if it
        receives a granule with bad data or a DeviceCommsEvent if no granule has arrived between two timer events.

        The transform is configured at launch using a config dictionary.
        """
        #-------------------------------------------------------------------------------------
        # Start a subscriber to listen for an alert event from the Stream Alert Transform
        #-------------------------------------------------------------------------------------

        queue_bad_data = gevent.queue.Queue()
        queue_no_data = gevent.queue.Queue()

        def bad_data(message, headers):
            log.debug("Got a BAD data event: %s" % message)
            if message.type_ == "DeviceStatusEvent":
                queue_bad_data.put(message)

        def no_data(message, headers):
            log.debug("Got a NO data event: %s" % message)
            queue_no_data.put(message)

        event_subscriber_bad_data = EventSubscriber( origin="instrument_1",
            event_type="DeviceStatusEvent",
            callback=bad_data)

        event_subscriber_no_data = EventSubscriber( origin="instrument_1",
            event_type="DeviceCommsEvent",
            callback=no_data)

        event_subscriber_bad_data.start()
        event_subscriber_no_data.start()

        self.addCleanup(event_subscriber_bad_data.stop)
        self.addCleanup(event_subscriber_no_data.stop)

        #-------------------------------------------------------------------------------------
        # The configuration for the Stream Alert Transform... set up the event types to listen to
        #-------------------------------------------------------------------------------------
        self.valid_values = [-100, 100]
        self.timer_interval = 5
        self.queue_name = 'a_queue'

        config = {
            'process':{
                'timer_interval': self.timer_interval,
                'queue_name': self.queue_name,
                'variable_name': 'input_voltage',
                'time_field_name': 'preferred_timestamp',
                'valid_values': self.valid_values,
                'timer_origin': 'Interval Timer',
                'event_origin': 'instrument_1'
            }
        }

        #-------------------------------------------------------------------------------------
        # Create the process
        #-------------------------------------------------------------------------------------
        pid = TransformPrototypeIntTest.create_process( name= 'DemoStreamAlertTransform',
            module='ion.processes.data.transforms.event_alert_transform',
            class_name='DemoStreamAlertTransform',
            configuration= config)
        self.addCleanup(self.process_dispatcher.cancel_process, pid)
        self.assertIsNotNone(pid)

        #-------------------------------------------------------------------------------------
        # Publish streams and make assertions about alerts
        #-------------------------------------------------------------------------------------

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(name= 'platform_eng_parsed', id_only=True)

        stream_def_id = self.pubsub_management.create_stream_definition('demo_stream', parameter_dictionary_id=pdict_id)
        stream_id, stream_route = self.pubsub_management.create_stream( name='test_demo_alert',
            exchange_point='exch_point_1',
            stream_definition_id=stream_def_id)

        sub_1 = self.pubsub_management.create_subscription(name='sub_1', stream_ids=[stream_id], exchange_points=['exch_point_1'], exchange_name=self.queue_name)
        self.pubsub_management.activate_subscription(sub_1)
        self.exchange_names.append('sub_1')
        self.exchange_points.append('exch_point_1')

        #-------------------------------------------------------------------------------------
        # publish a *GOOD* granule
        #-------------------------------------------------------------------------------------
        self.length = 2
        val = numpy.array([random.uniform(0,50)  for l in xrange(self.length)])
        self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val)

        self.assertTrue(queue_bad_data.empty())

        #-------------------------------------------------------------------------------------
        # publish a few *BAD* granules
        #-------------------------------------------------------------------------------------
        self.number = 2
        val = numpy.array([(110 + l)  for l in xrange(self.length)])
        self._publish_granules(stream_id= stream_id, stream_route= stream_route, number= self.number, values=val)

        for number in xrange(self.number):
            event = queue_bad_data.get(timeout=40)
            self.assertEquals(event.type_, "DeviceStatusEvent")
            self.assertEquals(event.origin, "instrument_1")
            self.assertEquals(event.state, DeviceStatusType.STATUS_WARNING)
            self.assertEquals(event.valid_values, self.valid_values)
            self.assertEquals(event.sub_type, 'input_voltage')
            self.assertTrue(set(event.values) ==  set(val))

            s = set(event.time_stamps)
            cond = s in [set(numpy.array([1  for l in xrange(self.length)]).tolist()), set(numpy.array([2  for l in xrange(self.length)]).tolist())]
            self.assertTrue(cond)

        # To ensure that only the bad values generated the alert events. Queue should be empty now
        self.assertEquals(queue_bad_data.qsize(), 0)

        #-------------------------------------------------------------------------------------
        # Do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status
        #-------------------------------------------------------------------------------------
        event = queue_no_data.get(timeout=15)

        self.assertEquals(event.type_, "DeviceCommsEvent")
        self.assertEquals(event.origin, "instrument_1")
        self.assertEquals(event.origin_type, "PlatformDevice")
        self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION)
        self.assertEquals(event.sub_type, 'input_voltage')

        #-------------------------------------------------------------------------------------
        # Empty the queues and repeat tests
        #-------------------------------------------------------------------------------------
        queue_bad_data.queue.clear()
        queue_no_data.queue.clear()

        #-------------------------------------------------------------------------------------
        # publish a *GOOD* granule again
        #-------------------------------------------------------------------------------------
        val = numpy.array([(l + 20)  for l in xrange(self.length)])
        self._publish_granules(stream_id= stream_id, stream_route= stream_route, number=1, values=val)

        self.assertTrue(queue_bad_data.empty())

        #-------------------------------------------------------------------------------------
        # Again do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status
        #-------------------------------------------------------------------------------------

        event = queue_no_data.get(timeout=20)

        self.assertEquals(event.type_, "DeviceCommsEvent")
        self.assertEquals(event.origin, "instrument_1")
        self.assertEquals(event.origin_type, "PlatformDevice")
        self.assertEquals(event.state, DeviceCommsType.DATA_DELIVERY_INTERRUPTION)
        self.assertEquals(event.sub_type, 'input_voltage')

        #-------------------------------------------------------------------------------------
        # Again do not publish any granules for some time. This should generate a DeviceCommsEvent for the communication status
        #-------------------------------------------------------------------------------------

        ar = gevent.event.AsyncResult()
        def poller(ar, method, *args):
            events_in_db = method(*args)
            if len(events_in_db) > 0:
                ar.set(events_in_db)
                return True
            else:
                return False

        poll(poller, ar, self.user_notification.find_events, 'instrument_1')

#        events_in_db = self.user_notification.find_events(origin='instrument_1')

        events_in_db = ar.get(10)
        log.debug("events::: %s" % events_in_db)

        bad_data_events = []
        no_data_events = []

        for event in events_in_db:
            if event.type_ == 'DeviceStatusEvent':
                bad_data_events.append(event)
                self.assertEquals(event.origin, "instrument_1")
                self.assertEquals(event.status, DeviceStatusType.STATUS_WARNING)
                self.assertEquals(event.valid_values, self.valid_values)
                self.assertEquals(event.sub_type, 'input_voltage')
            elif event.type_ == 'DeviceCommsEvent':
                no_data_events.append(event)
                self.assertEquals(event.origin, "instrument_1")
                self.assertEquals(event.origin_type, "PlatformDevice")
                self.assertEquals(event.status, DeviceCommsType.DATA_DELIVERY_INTERRUPTION)
                self.assertEquals(event.sub_type, 'input_voltage')

        self.assertTrue(len(bad_data_events) > 0)
        self.assertTrue(len(no_data_events) > 0)

        log.debug("This satisfies L4-CI-SA-RQ-114 : 'Marine facility shall monitor marine infrastructure usage by instruments.'"
                  " The req is satisfied because the stream alert transform"
                  "is able to send device status and communication events over selected time intervals. This capability will be "
                  "augmented in the future.")

    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        times = numpy.array([number  for l in xrange(self.length)])

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))]
            rdt['time'] = times

            g = rdt.to_granule()
            g.data_producer_id = 'instrument_1'

            log.debug("granule #%s published by instrument:: %s" % ( number,g))

            pub.publish(g)

    @staticmethod
    def makeEpochTime(date_time = None):
        """
        provides the seconds since epoch give a python datetime object.

        @param date_time Python datetime object
        @retval seconds_since_epoch int
        """
        seconds_since_epoch = calendar.timegm(date_time.timetuple())

        return seconds_since_epoch
Exemplo n.º 53
0
def instrument_test_driver(container):

    org_client = OrgManagementServiceClient(node=container.node)
    id_client = IdentityManagementServiceClient(node=container.node)

    system_actor = id_client.find_actor_identity_by_name(name=CFG.system.system_actor)
    log.info('system actor:' + system_actor._id)

    sa_header_roles = get_role_message_headers(org_client.find_all_roles_by_user(system_actor._id))


    # Names of agent data streams to be configured.
    parsed_stream_name = 'ctd_parsed'
    raw_stream_name = 'ctd_raw'

    # Driver configuration.
    #Simulator

    driver_config = {
        'svr_addr': 'localhost',
        'cmd_port': 5556,
        'evt_port': 5557,
        'dvr_mod': 'ion.agents.instrument.drivers.sbe37.sbe37_driver',
        'dvr_cls': 'SBE37Driver',
        'comms_config': {
            SBE37Channel.CTD: {
                'method':'ethernet',
                'device_addr': CFG.device.sbe37.host,
                'device_port': CFG.device.sbe37.port,
                'server_addr': 'localhost',
                'server_port': 8888
            }
        }
    }

    #Hardware

    _container_client = ContainerAgentClient(node=container.node,
        name=container.name)

# Create a pubsub client to create streams.
    _pubsub_client = PubsubManagementServiceClient(node=container.node)

    # A callback for processing subscribed-to data.
    def consume(message, headers):
        log.info('Subscriber received message: %s', str(message))

    # Create a stream subscriber registrar to create subscribers.
    subscriber_registrar = StreamSubscriberRegistrar(process=container,
        node=container.node)

    subs = []

    # Create streams for each stream named in driver.
    stream_config = {}
    for (stream_name, val) in PACKET_CONFIG.iteritems():
        stream_def = ctd_stream_definition(stream_id=None)
        stream_def_id = _pubsub_client.create_stream_definition(
            container=stream_def)
        stream_id = _pubsub_client.create_stream(
            name=stream_name,
            stream_definition_id=stream_def_id,
            original=True,
            encoding='ION R2', headers={'ion-actor-id': system_actor._id, 'ion-actor-roles': sa_header_roles })
        stream_config[stream_name] = stream_id

        # Create subscriptions for each stream.
        exchange_name = '%s_queue' % stream_name
        sub = subscriber_registrar.create_subscriber(exchange_name=exchange_name, callback=consume)
        sub.start()
        query = StreamQuery(stream_ids=[stream_id])
        sub_id = _pubsub_client.create_subscription(\
            query=query, exchange_name=exchange_name )
        _pubsub_client.activate_subscription(sub_id)
        subs.append(sub)


    # Create agent config.

    agent_resource_id = '123xyz'

    agent_config = {
        'driver_config' : driver_config,
        'stream_config' : stream_config,
        'agent'         : {'resource_id': agent_resource_id}
    }

    # Launch an instrument agent process.
    _ia_name = 'agent007'
    _ia_mod = 'ion.agents.instrument.instrument_agent'
    _ia_class = 'InstrumentAgent'
    _ia_pid = _container_client.spawn_process(name=_ia_name,
        module=_ia_mod, cls=_ia_class,
        config=agent_config)


    log.info('got pid=%s for resource_id=%s' % (str(_ia_pid), str(agent_resource_id)))
Exemplo n.º 54
0
    def test_dm_integration(self):
        '''
        test_salinity_transform
        Test full DM Services Integration
        '''
        cc = self.container
        assertions = self.assertTrue


        #-----------------------------
        # Copy below here to run as a script (don't forget the imports of course!)
        #-----------------------------


        # Create some service clients...
        pubsub_management_service = PubsubManagementServiceClient(node=cc.node)
        ingestion_management_service = IngestionManagementServiceClient(node=cc.node)
        dataset_management_service = DatasetManagementServiceClient(node=cc.node)
        data_retriever_service = DataRetrieverServiceClient(node=cc.node)
        transform_management_service = TransformManagementServiceClient(node=cc.node)
        process_dispatcher = ProcessDispatcherServiceClient(node=cc.node)

        # declare some handy variables

        datastore_name = 'test_dm_integration'



        ###
        ### In the beginning there were two stream definitions...
        ###
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = pubsub_management_service.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = pubsub_management_service.create_stream_definition(container=SalinityTransform.outgoing_stream_def, name='Scalar Salinity data stream')



        ###
        ### And two process definitions...
        ###
        # one for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.ctd_stream_publisher',
            'class':'SimpleCtdPublisher'
        }

        ctd_sim_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)

        # one for the salinity transform
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'class':'SalinityTransform'
        }

        salinity_transform_procdef_id = process_dispatcher.create_process_definition(process_definition=producer_definition)



        #---------------------------
        # Set up ingestion - this is an operator concern - not done by SA in a deployed system
        #---------------------------
        # Configure ingestion using eight workers, ingesting to test_dm_integration datastore with the SCIDATA profile
        log.debug('Calling create_ingestion_configuration')
        ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'),
            number_of_workers=1
        )
        #
        ingestion_management_service.activate_ingestion_configuration(
            ingestion_configuration_id=ingestion_configuration_id)



        #---------------------------
        # Set up the producer (CTD Simulator)
        #---------------------------

        # Create the stream
        ctd_stream_id = pubsub_management_service.create_stream(stream_definition_id=ctd_stream_def_id)


        # Set up the datasets
        ctd_dataset_id = dataset_management_service.create_dataset(
            stream_id=ctd_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of this dataset
        ctd_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = ctd_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto ctd_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service

        #---------------------------
        # Set up the salinity transform
        #---------------------------


        # Create the stream
        sal_stream_id = pubsub_management_service.create_stream(stream_definition_id=sal_stream_def_id)


        # Set up the datasets
        sal_dataset_id = dataset_management_service.create_dataset(
            stream_id=sal_stream_id,
            datastore_name=datastore_name,
            view_name='datasets/stream_join_granule'
        )

        # Configure ingestion of the salinity as a dataset
        sal_dataset_config_id = ingestion_management_service.create_dataset_configuration(
            dataset_id = sal_dataset_id,
            archive_data = True,
            archive_metadata = True,
            ingestion_configuration_id = ingestion_configuration_id, # you need to know the ingestion configuration id!
        )
        # Hold onto sal_dataset_config_id if you want to stop/start ingestion of that dataset by the ingestion service



        # Create a subscription as input to the transform
        sal_transform_input_subscription_id = pubsub_management_service.create_subscription(
            query = StreamQuery(stream_ids=[ctd_stream_id,]),
            exchange_name='salinity_transform_input') # how do we make these names??? i.e. Should they be anonymous?

        # create the salinity transform
        sal_transform_id = transform_management_service.create_transform(
            name='example salinity transform',
            in_subscription_id=sal_transform_input_subscription_id,
            out_streams={'output':sal_stream_id,},
            process_definition_id = salinity_transform_procdef_id,
            # no configuration needed at this time...
            )
        # start the transform - for a test case it makes sense to do it before starting the producer but it is not required
        transform_management_service.activate_transform(transform_id=sal_transform_id)



        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
            }
        }
        ctd_sim_pid = process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)


        ###
        ### Make a subscriber in the test to listen for salinity data
        ###
        salinity_subscription_id = pubsub_management_service.create_subscription(
            query=StreamQuery([sal_stream_id,]),
            exchange_name = 'salinity_test',
            name = "test salinity subscription",
            )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, node=cc.node)

        result = gevent.event.AsyncResult()
        results = []
        def message_received(message, headers):
            # Heads
            log.warn('Salinity data received!')
            results.append(message)
            if len(results) >3:
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='salinity_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        pubsub_management_service.activate_subscription(subscription_id=salinity_subscription_id)


        # Assert that we have received data
        assertions(result.get(timeout=10))

        # stop the flow parse the messages...
        process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data



        for message in results:

            psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)

            # Test the handy info method for the names of fields in the stream def
            assertions('salinity' in psd.list_field_names())


            # you have to know the name of the coverage in stream def
            salinity = psd.get_values('salinity')

            import numpy

            assertions(isinstance(salinity, numpy.ndarray))

            assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0
Exemplo n.º 55
0
class CtdTransformsIntTest(IonIntegrationTestCase):
    def setUp(self):
        super(CtdTransformsIntTest, self).setUp()

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.queue_cleanup = []
        self.exchange_cleanup = []


        self.pubsub             = PubsubManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()

        self.exchange_name = 'ctd_L0_all_queue'
        self.exchange_point = 'test_exchange'
        self.i = 0

    def tearDown(self):
        for queue in self.queue_cleanup:
            xn = self.container.ex_manager.create_xn_queue(queue)
            xn.delete()
        for exchange in self.exchange_cleanup:
            xp = self.container.ex_manager.create_xp(exchange)
            xp.delete()


    def test_ctd_L0_all(self):
        '''
        Test that packets are processed by the ctd_L0_all transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='ctd_L0_all',
            description='For testing ctd_L0_all')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L0_all'
        process_definition.executable['class'] = 'ctd_L0_all'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id =  self.pubsub.create_stream_definition('ctd_all_stream_def', parameter_dictionary_id=pdict_id)

        cond_stream_id, _ = self.pubsub.create_stream('test_cond',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        pres_stream_id, _ = self.pubsub.create_stream('test_pres',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        temp_stream_id, _ = self.pubsub.create_stream('test_temp',
            exchange_point='science_data',
            stream_definition_id=stream_def_id)

        config.process.publish_streams.conductivity = cond_stream_id
        config.process.publish_streams.pressure = pres_stream_id
        config.process.publish_streams.temperature = temp_stream_id

        # Schedule the process
        pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the conductivity, temperature and pressure granules from
        # the ctd transform
        #---------------------------------------------------------------------------------------------
        ar_cond = gevent.event.AsyncResult()
        def subscriber1(m, r, s):
            ar_cond.set(m)
        sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1)
        self.addCleanup(sub_cond.stop)

        ar_temp = gevent.event.AsyncResult()
        def subscriber2(m,r,s):
            ar_temp.set(m)
        sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2)
        self.addCleanup(sub_temp.stop)

        ar_pres = gevent.event.AsyncResult()
        def subscriber3(m,r,s):
            ar_pres.set(m)
        sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3)
        self.addCleanup(sub_pres.stop)

        sub_cond_id= self.pubsub.create_subscription('subscription_cond',
                                stream_ids=[cond_stream_id],
                                exchange_name='sub_cond')

        sub_temp_id = self.pubsub.create_subscription('subscription_temp',
            stream_ids=[temp_stream_id],
            exchange_name='sub_temp')

        sub_pres_id = self.pubsub.create_subscription('subscription_pres',
            stream_ids=[pres_stream_id],
            exchange_name='sub_pres')

        self.pubsub.activate_subscription(sub_cond_id)
        self.pubsub.activate_subscription(sub_temp_id)
        self.pubsub.activate_subscription(sub_pres_id)

        self.queue_cleanup.append(sub_cond.xn.queue)
        self.queue_cleanup.append(sub_temp.xn.queue)
        self.queue_cleanup.append(sub_pres.xn.queue)

        sub_cond.start()
        sub_temp.start()
        sub_pres.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result_cond = ar_cond.get(timeout=10)
        result_temp = ar_temp.get(timeout=10)
        result_pres = ar_pres.get(timeout=10)

        out_dict = {}
        out_dict['c'] = RecordDictionaryTool.load_from_granule(result_cond)['conductivity']
        out_dict['t'] = RecordDictionaryTool.load_from_granule(result_temp)['temp']
        out_dict['p'] = RecordDictionaryTool.load_from_granule(result_pres)['pressure']

        # Check that the transform algorithm was successfully executed
        self.check_granule_splitting(publish_granule, out_dict)

    def test_ctd_L1_conductivity(self):
        '''
        Test that packets are processed by the ctd_L1_conductivity transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='CTDL1ConductivityTransform',
            description='For testing CTDL1ConductivityTransform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L1_conductivity'
        process_definition.executable['class'] = 'CTDL1ConductivityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)

        stream_def_id =  self.pubsub.create_stream_definition('cond_stream_def', parameter_dictionary_id=pdict_id)
        cond_stream_id, _ = self.pubsub.create_stream('test_conductivity',
                                                        exchange_point='science_data',
                                                        stream_definition_id=stream_def_id)

        config.process.publish_streams.conductivity = cond_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the conductivity, temperature and pressure granules from
        # the ctd transform
        #---------------------------------------------------------------------------------------------
        ar_cond = gevent.event.AsyncResult()
        def subscriber1(m, r, s):
            ar_cond.set(m)
        sub_cond = StandaloneStreamSubscriber('sub_cond', subscriber1)
        self.addCleanup(sub_cond.stop)

        sub_cond_id = self.pubsub.create_subscription('subscription_cond',
            stream_ids=[cond_stream_id],
            exchange_name='sub_cond')

        self.pubsub.activate_subscription(sub_cond_id)

        self.queue_cleanup.append(sub_cond.xn.queue)

        sub_cond.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result_cond = ar_cond.get(timeout=10)
        self.assertTrue(isinstance(result_cond, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result_cond)
        self.assertTrue(rdt.__contains__('conductivity'))

        self.check_cond_algorithm_execution(publish_granule, result_cond)

    def check_cond_algorithm_execution(self, publish_granule, granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform)

        output_data = output_rdt_transform['conductivity']
        input_data = input_rdt_to_transform['conductivity']

        self.assertTrue(((input_data / 100000.0) - 0.5).all() == output_data.all())

    def check_pres_algorithm_execution(self, publish_granule, granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform)

        output_data = output_rdt_transform['pressure']
        input_data = input_rdt_to_transform['pressure']

        self.assertTrue(input_data.all() == output_data.all())

    def check_temp_algorithm_execution(self, publish_granule, granule_from_transform):

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform)

        output_data = output_rdt_transform['temp']
        input_data = input_rdt_to_transform['temp']

        self.assertTrue(((input_data / 10000.0) - 10).all() == output_data.all())

    def check_density_algorithm_execution(self, publish_granule, granule_from_transform):

        #------------------------------------------------------------------
        # Calculate the correct density from the input granule data
        #------------------------------------------------------------------
        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform)

        conductivity = input_rdt_to_transform['conductivity']
        pressure = input_rdt_to_transform['pressure']
        temperature = input_rdt_to_transform['temp']

        longitude = input_rdt_to_transform['lon']
        latitude = input_rdt_to_transform['lat']

        sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)
        sa = SA_from_SP(sp, pressure, longitude, latitude)
        dens_value = rho(sa, temperature, pressure)

        out_density = output_rdt_transform['density']

        #-----------------------------------------------------------------------------
        # Check that the output data from the transform has the correct density values
        #-----------------------------------------------------------------------------
        self.assertTrue(dens_value.all() == out_density.all())

    def check_salinity_algorithm_execution(self, publish_granule, granule_from_transform):

        #------------------------------------------------------------------
        # Calculate the correct density from the input granule data
        #------------------------------------------------------------------
        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)
        output_rdt_transform = RecordDictionaryTool.load_from_granule(granule_from_transform)

        conductivity = input_rdt_to_transform['conductivity']
        pressure = input_rdt_to_transform['pressure']
        temperature = input_rdt_to_transform['temp']

        sal_value = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)

        out_salinity = output_rdt_transform['salinity']

        #-----------------------------------------------------------------------------
        # Check that the output data from the transform has the correct density values
        #-----------------------------------------------------------------------------
        self.assertTrue(sal_value.all() == out_salinity.all())

    def check_granule_splitting(self, publish_granule, out_dict):
        '''
        This checks that the ctd_L0_all transform is able to split out one of the
        granules from the whole granule
        fed into the transform
        '''

        input_rdt_to_transform = RecordDictionaryTool.load_from_granule(publish_granule)

        in_cond = input_rdt_to_transform['conductivity']
        in_pressure = input_rdt_to_transform['pressure']
        in_temp = input_rdt_to_transform['temp']

        out_cond = out_dict['c']
        out_pres = out_dict['p']
        out_temp = out_dict['t']

        self.assertTrue(in_cond.all() == out_cond.all())
        self.assertTrue(in_pressure.all() == out_pres.all())
        self.assertTrue(in_temp.all() == out_temp.all())

    def test_ctd_L1_pressure(self):
        '''
        Test that packets are processed by the ctd_L1_pressure transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='CTDL1PressureTransform',
            description='For testing CTDL1PressureTransform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L1_pressure'
        process_definition.executable['class'] = 'CTDL1PressureTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        stream_def_id =  self.pubsub.create_stream_definition('pres_stream_def', parameter_dictionary_id=pdict_id)
        pres_stream_id, _ = self.pubsub.create_stream('test_pressure',
                                                        stream_definition_id=stream_def_id,
                                                        exchange_point='science_data')

        config.process.publish_streams.pressure = pres_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscribers that will receive the pressure granules from
        # the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_pres = gevent.event.AsyncResult()
        def subscriber3(m,r,s):
            ar_pres.set(m)
        sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3)
        self.addCleanup(sub_pres.stop)

        sub_pres_id = self.pubsub.create_subscription('subscription_pres',
            stream_ids=[pres_stream_id],
            exchange_name='sub_pres')

        self.pubsub.activate_subscription(sub_pres_id)

        self.queue_cleanup.append(sub_pres.xn.queue)

        sub_pres.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_pres.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('pressure'))

        self.check_pres_algorithm_execution(publish_granule, result)

    def test_ctd_L1_temperature(self):
        '''
        Test that packets are processed by the ctd_L1_temperature transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='CTDL1TemperatureTransform',
            description='For testing CTDL1TemperatureTransform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L1_temperature'
        process_definition.executable['class'] = 'CTDL1TemperatureTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        stream_def_id =  self.pubsub.create_stream_definition('temp_stream_def', parameter_dictionary_id=pdict_id)
        temp_stream_id, _ = self.pubsub.create_stream('test_temperature', stream_definition_id=stream_def_id,
                                                        exchange_point='science_data')

        config.process.publish_streams.temperature = temp_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create subscriber that will receive the temperature granule from
        # the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_temp = gevent.event.AsyncResult()
        def subscriber2(m,r,s):
            ar_temp.set(m)
        sub_temp = StandaloneStreamSubscriber('sub_temp', subscriber2)
        self.addCleanup(sub_temp.stop)

        sub_temp_id = self.pubsub.create_subscription('subscription_temp',
            stream_ids=[temp_stream_id],
            exchange_name='sub_temp')

        self.pubsub.activate_subscription(sub_temp_id)

        self.queue_cleanup.append(sub_temp.xn.queue)

        sub_temp.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_temp.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('temp'))

        self.check_temp_algorithm_execution(publish_granule, result)

    def test_ctd_L2_density(self):
        '''
        Test that packets are processed by the ctd_L1_density transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='DensityTransform',
            description='For testing DensityTransform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L2_density'
        process_definition.executable['class'] = 'DensityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        config.process.interval = 1.0

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        stream_def_id =  self.pubsub.create_stream_definition('dens_stream_def', parameter_dictionary_id=pdict_id)
        dens_stream_id, _ = self.pubsub.create_stream('test_density', stream_definition_id=stream_def_id,
            exchange_point='science_data')
        config.process.publish_streams.density = dens_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create a subscriber that will receive the density granule from the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_dens = gevent.event.AsyncResult()
        def subscriber3(m,r,s):
            ar_dens.set(m)
        sub_dens = StandaloneStreamSubscriber('sub_dens', subscriber3)
        self.addCleanup(sub_dens.stop)

        sub_dens_id = self.pubsub.create_subscription('subscription_dens',
            stream_ids=[dens_stream_id],
            exchange_name='sub_dens')

        self.pubsub.activate_subscription(sub_dens_id)

        self.queue_cleanup.append(sub_dens.xn.queue)

        sub_dens.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_dens.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('density'))

        self.check_density_algorithm_execution(publish_granule, result)

    def test_ctd_L2_salinity(self):
        '''
        Test that packets are processed by the ctd_L1_salinity transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='SalinityTransform',
            description='For testing SalinityTransform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L2_salinity'
        process_definition.executable['class'] = 'SalinityTransform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id =  self.pubsub.create_stream_definition('sal_stream_def', parameter_dictionary_id=pdict_id)
        sal_stream_id, _ = self.pubsub.create_stream('test_salinity', stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.salinity = sal_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

        #---------------------------------------------------------------------------------------------
        # Create a subscriber that will receive the salinity granule from the ctd transform
        #---------------------------------------------------------------------------------------------

        ar_sal = gevent.event.AsyncResult()
        def subscriber3(m,r,s):
            ar_sal.set(m)
        sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3)
        self.addCleanup(sub_sal.stop)

        sub_sal_id = self.pubsub.create_subscription('subscription_sal',
            stream_ids=[sal_stream_id],
            exchange_name='sub_sal')

        self.pubsub.activate_subscription(sub_sal_id)

        self.queue_cleanup.append(sub_sal.xn.queue)

        sub_sal.start()

        #------------------------------------------------------------------------------------------------------
        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
        #------------------------------------------------------------------------------------------------------

        # Do all the routing stuff for the publishing
        routing_key = 'stream_id.stream'
        stream_route = StreamRoute(self.exchange_point, routing_key)

        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
        xp = self.container.ex_manager.create_xp(self.exchange_point)
        xn.bind('stream_id.stream', xp)

        pub = StandaloneStreamPublisher('stream_id', stream_route)

        # Build a packet that can be published
        self.px_ctd = SimpleCtdPublisher()
        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)

        # Publish the packet
        pub.publish(publish_granule)

        #------------------------------------------------------------------------------------------------------
        # Make assertions about whether the ctd transform executed its algorithm and published the correct
        # granules
        #------------------------------------------------------------------------------------------------------

        # Get the granule that is published by the ctd transform post processing
        result = ar_sal.get(timeout=10)
        self.assertTrue(isinstance(result, Granule))

        rdt = RecordDictionaryTool.load_from_granule(result)
        self.assertTrue(rdt.__contains__('salinity'))

        self.check_salinity_algorithm_execution(publish_granule, result)


    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g


    def test_presf_L0_splitter(self):
        '''
        Test that packets are processed by the ctd_L1_pressure transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='Presf L0 Splitter',
            description='For testing Presf L0 Splitter')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.presf_L0_splitter'
        process_definition.executable['class'] = 'PresfL0Splitter'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        stream_def_id =  self.pubsub.create_stream_definition('pres_stream_def', parameter_dictionary_id=pdict_id)
        pres_stream_id, _ = self.pubsub.create_stream('test_pressure',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.absolute_pressure = pres_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)

#        #---------------------------------------------------------------------------------------------
#        # Create subscribers that will receive the pressure granules from
#        # the ctd transform
#        #---------------------------------------------------------------------------------------------
#
#        ar_pres = gevent.event.AsyncResult()
#        def subscriber3(m,r,s):
#            ar_pres.set(m)
#        sub_pres = StandaloneStreamSubscriber('sub_pres', subscriber3)
#        self.addCleanup(sub_pres.stop)
#
#        sub_pres_id = self.pubsub.create_subscription('subscription_pres',
#            stream_ids=[pres_stream_id],
#            exchange_name='sub_pres')
#
#        self.pubsub.activate_subscription(sub_pres_id)
#
#        self.queue_cleanup.append(sub_pres.xn.queue)
#
#        sub_pres.start()
#
#        #------------------------------------------------------------------------------------------------------
#        # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform
#        #------------------------------------------------------------------------------------------------------
#
#        # Do all the routing stuff for the publishing
#        routing_key = 'stream_id.stream'
#        stream_route = StreamRoute(self.exchange_point, routing_key)
#
#        xn = self.container.ex_manager.create_xn_queue(self.exchange_name)
#        xp = self.container.ex_manager.create_xp(self.exchange_point)
#        xn.bind('stream_id.stream', xp)
#
#        pub = StandaloneStreamPublisher('stream_id', stream_route)
#
#        # Build a packet that can be published
#        self.px_ctd = SimpleCtdPublisher()
#        publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5)
#
#        # Publish the packet
#        pub.publish(publish_granule)
#
#        #------------------------------------------------------------------------------------------------------
#        # Make assertions about whether the ctd transform executed its algorithm and published the correct
#        # granules
#        #------------------------------------------------------------------------------------------------------
#
#        # Get the granule that is published by the ctd transform post processing
#        result = ar_pres.get(timeout=10)
#        self.assertTrue(isinstance(result, Granule))
#
#        rdt = RecordDictionaryTool.load_from_granule(result)
#        self.assertTrue(rdt.__contains__('pressure'))
#
#        self.check_pres_algorithm_execution(publish_granule, result)
#

    def test_presf_L1(self):
        '''
        Test that packets are processed by the ctd_L1_pressure transform
        '''

        #---------------------------------------------------------------------------------------------
        # Launch a ctd transform
        #---------------------------------------------------------------------------------------------
        # Create the process definition
        process_definition = ProcessDefinition(
            name='PresfL1Transform',
            description='For testing PresfL1Transform')
        process_definition.executable['module']= 'ion.processes.data.transforms.ctd.presf_L1'
        process_definition.executable['class'] = 'PresfL1Transform'
        ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition)

        # Build the config
        config = DotDict()
        config.process.queue_name = self.exchange_name
        config.process.exchange_point = self.exchange_point

        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        stream_def_id =  self.pubsub.create_stream_definition('pres_stream_def', parameter_dictionary_id=pdict_id)
        pres_stream_id, _ = self.pubsub.create_stream('test_pressure',
            stream_definition_id=stream_def_id,
            exchange_point='science_data')

        config.process.publish_streams.seafloor_pressure = pres_stream_id

        # Schedule the process
        self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config)
Exemplo n.º 56
0
    def test_blog_ingestion_replay(self):

        #-----------------------------------------------------------------------------------------------------
        # Do this statement just once in your script
        #-----------------------------------------------------------------------------------------------------
        cc = self.container

        #-------------------------------------------------------------------------------------------------------
        # Make a registrar object - this is work usually done for you by the container in a transform or data stream process
        #-------------------------------------------------------------------------------------------------------

        subscriber_registrar = StreamSubscriberRegistrar(process=cc, node=cc.node)

        #-----------------------------------------------------------------------------------------------------
        # Service clients
        #-----------------------------------------------------------------------------------------------------

        ingestion_cli = IngestionManagementServiceClient(node=cc.node)
        dr_cli = DataRetrieverServiceClient(node=cc.node)
        dsm_cli = DatasetManagementServiceClient(node=cc.node)
        pubsub_cli = PubsubManagementServiceClient(node=cc.node)

        #-------------------------------------------------------------------------------------------------------
        # Create and activate ingestion configuration
        #-------------------------------------------------------------------------------------------------------

        ingestion_configuration_id = ingestion_cli.create_ingestion_configuration(
            exchange_point_id='science_data',
            couch_storage=CouchStorage(datastore_name='dm_datastore',datastore_profile='EXAMPLES'),
            hdf_storage=HdfStorage(),
            number_of_workers=6,
        )
        # activates the transforms... so bindings will be created in this step
        ingestion_cli.activate_ingestion_configuration(ingestion_configuration_id)

        #------------------------------------------------------------------------------------------------------
        # Create subscriber to listen to the messages published to the ingestion
        #------------------------------------------------------------------------------------------------------

        # Define the query we want
        query = ExchangeQuery()

        # Create the stateful listener to hold the captured data for comparison with replay
        captured_input = BlogListener()


        # Make a subscription to the input stream to ingestion
        subscription_id = pubsub_cli.create_subscription(query = query, exchange_name='input_capture_queue' ,name = 'input_capture_queue')


        # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
        # Normally the container creates and starts subscribers for you when a transform process is spawned
        subscriber = subscriber_registrar.create_subscriber(exchange_name='input_capture_queue', callback=captured_input.blog_store)
        subscriber.start()

        captured_input.subscriber = subscriber

        pubsub_cli.activate_subscription(subscription_id)


        #-------------------------------------------------------------------------------------------------------
        # Launching blog scraper
        #-------------------------------------------------------------------------------------------------------

        blogs = [
            'saintsandspinners',
            'strobist',
            'voodoofunk'
        ]


        log.debug('before spawning blog scraper')

        for blog in blogs:
            config = {'process':{'type':'stream_process','blog':blog}}
            cc.spawn_process(name=blog,
                module='ion.services.dm.ingestion.example.blog_scraper',
                cls='FeedStreamer',
                config=config)

        # wait ten seconds for some data to come in...
        log.warn('Sleeping for 10 seconds to wait for some input')
        time.sleep(10)


        #------------------------------------------------------------------------------------------------------
        # For 3 posts captured, make 3 replays and verify we get back what came in
        #------------------------------------------------------------------------------------------------------


        # Cute list comprehension method does not give enough control
        #self.assertTrue(len(captured_input.blogs)>3)
        #post_ids = [id for idx, id in enumerate(captured_input.blogs.iterkeys()) if idx < 3]

        post_ids = []




        for post_id, blog in captured_input.blogs.iteritems(): # Use items not iter items - I copy of fixed length

            log.info('Captured Input: %s' % post_id)
            if len(blog.get('comments',[])) > 2:
                post_ids.append(post_id)

            if len(post_ids) >3:
                break

        ###=======================================================
        ### This section is not scriptable
        ###=======================================================


        if len(post_ids) < 3:
            self.fail('Not enough comments returned by the blog scrappers in 30 seconds')

        if len(captured_input.blogs) < 1:
            self.fail('No data returned in ten seconds by the blog scrappers!')

        ###=======================================================
        ### End non-scriptable
        ###=======================================================


        #------------------------------------------------------------------------------------------------------
        # Create subscriber to listen to the replays
        #------------------------------------------------------------------------------------------------------

        captured_replays = {}

        for idx, post_id in enumerate(post_ids):
            # Create the stateful listener to hold the captured data for comparison with replay


            dataset_id = dsm_cli.create_dataset(
                stream_id=post_id,
                datastore_name='dm_datastore',
                view_name='posts/posts_join_comments')

            replay_id, stream_id =dr_cli.define_replay(dataset_id)


            query = StreamQuery(stream_ids=[stream_id])

            captured_replay = BlogListener()

            #------------------------------------------------------------------------------------------------------
            # Create subscriber to listen to the messages published to the ingestion
            #------------------------------------------------------------------------------------------------------

            # Make a subscription to the input stream to ingestion
            subscription_name = 'replay_capture_queue_%d' % idx
            subscription_id = pubsub_cli.create_subscription(query = query, exchange_name=subscription_name ,name = subscription_name)


            # It is not required or even generally a good idea to use the subscription resource name as the queue name, but it makes things simple here
            # Normally the container creates and starts subscribers for you when a transform process is spawned
            subscriber = subscriber_registrar.create_subscriber(exchange_name=subscription_name, callback=captured_replay.blog_store)
            subscriber.start()

            captured_replay.subscriber = subscriber

            pubsub_cli.activate_subscription(subscription_id)

            #------------------------------------------------------------------------------------------------------
            # Start the replay and listen to the results!
            #------------------------------------------------------------------------------------------------------

            dr_cli.start_replay(replay_id)

            captured_replays[post_id] = captured_replay


        ###=======================================================
        ### The rest is not scriptable
        ###=======================================================


        # wait five seconds for some data to come in...
        log.warn('Sleeping for 5 seconds to wait for some output')
        time.sleep(5)

        matched_comments={}
        for post_id, captured_replay in captured_replays.iteritems():

            # There should be only one blog in here!
            self.assertEqual(len(captured_replay.blogs),1)

            replayed_blog = captured_replay.blogs[post_id]

            input_blog = captured_input.blogs[post_id]

            self.assertEqual(replayed_blog['post'].content, input_blog['post'].content)

            # can't deterministically assert that the number of comments is the same...
            matched_comments[post_id] = 0

            for updated, comment in replayed_blog.get('comments',{}).iteritems():
                self.assertIn(updated, input_blog['comments'])
                matched_comments[post_id] += 1


        # Assert that we got some comments back!
        self.assertTrue(sum(matched_comments.values()) > 0)

        log.info('Matched comments on the following blogs: %s' % matched_comments)