def assert_raw_granules_ingested(self, count, payload_size):
        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------
        data_retriever = DataRetrieverServiceClient()

        for i in range(0, count-1):
            granule = data_retriever.retrieve(dataset_id=self._raw_dataset_id, query={'tdoa':slice(i,i+1)})
            rdt = RecordDictionaryTool.load_from_granule(granule)

            log.info("Granule index: %d, time: %s, size: %s", i, rdt['time'][0], len(rdt['raw'][0]))
            self.assertEqual(payload_size, len(rdt['raw'][0]))
Exemplo n.º 2
0
    def assert_raw_granules_ingested(self, count, payload_size):
        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------
        data_retriever = DataRetrieverServiceClient()

        for i in range(0, count - 1):
            granule = data_retriever.retrieve(dataset_id=self._raw_dataset_id,
                                              query={'tdoa': slice(i, i + 1)})
            rdt = RecordDictionaryTool.load_from_granule(granule)

            log.info("Granule index: %d, time: %s, size: %s", i,
                     rdt['time'][0], len(rdt['raw'][0]))
            self.assertEqual(payload_size, len(rdt['raw'][0]))
Exemplo n.º 3
0
class TestActivateInstrumentIntegration(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        super(TestActivateInstrumentIntegration, self).setUp()
        config = DotDict()
        config.bootstrap.use_es = True

        self._start_container()
        self.addCleanup(TestActivateInstrumentIntegration.es_cleanup)

        self.container.start_rel_from_url('res/deploy/r2deploy.yml', config)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(
            node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()
        self.usernotificationclient = UserNotificationServiceClient()

        #setup listerner vars
        self._data_greenlets = []
        self._no_samples = None
        self._samples_received = []

        self.event_publisher = EventPublisher()

    @staticmethod
    def es_cleanup():
        es_host = CFG.get_safe('server.elasticsearch.host', 'localhost')
        es_port = CFG.get_safe('server.elasticsearch.port', '9200')
        es = ep.ElasticSearch(host=es_host, port=es_port, timeout=10)
        indexes = STD_INDEXES.keys()
        indexes.append('%s_resources_index' % get_sys_name().lower())
        indexes.append('%s_events_index' % get_sys_name().lower())

        for index in indexes:
            IndexManagementService._es_call(es.river_couchdb_delete, index)
            IndexManagementService._es_call(es.index_delete, index)

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name + '_logger')
        producer_definition.executable = {
            'module': 'ion.processes.data.stream_granule_logger',
            'class': 'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(
            process_definition=producer_definition)
        configuration = {
            'process': {
                'stream_id': stream_id,
            }
        }
        pid = self.processdispatchclient.schedule_process(
            process_definition_id=logger_procdef_id,
            configuration=configuration)

        return pid

    def _create_notification(self,
                             user_name='',
                             instrument_id='',
                             product_id=''):
        #--------------------------------------------------------------------------------------
        # Make notification request objects
        #--------------------------------------------------------------------------------------

        notification_request_1 = NotificationRequest(
            name='notification_1',
            origin=instrument_id,
            origin_type="instrument",
            event_type='ResourceLifecycleEvent')

        notification_request_2 = NotificationRequest(
            name='notification_2',
            origin=product_id,
            origin_type="data product",
            event_type='DetectionEvent')

        #--------------------------------------------------------------------------------------
        # Create a user and get the user_id
        #--------------------------------------------------------------------------------------

        user = UserInfo()
        user.name = user_name
        user.contact.email = '*****@*****.**' % user_name

        user_id, _ = self.rrclient.create(user)

        #--------------------------------------------------------------------------------------
        # Create notification
        #--------------------------------------------------------------------------------------

        self.usernotificationclient.create_notification(
            notification=notification_request_1, user_id=user_id)
        self.usernotificationclient.create_notification(
            notification=notification_request_2, user_id=user_id)
        log.debug(
            "test_activateInstrumentSample: create_user_notifications user_id %s",
            str(user_id))

        return user_id

    def get_datastore(self, dataset_id):
        dataset = self.datasetclient.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def _check_computed_attributes_of_extended_instrument(
            self, expected_instrument_device_id='', extended_instrument=None):

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(
            extended_instrument.computed.last_data_received_datetime,
            ComputedFloatValue)
        self.assertIsInstance(extended_instrument.computed.uptime,
                              ComputedStringValue)

        self.assertIsInstance(
            extended_instrument.computed.power_status_roll_up,
            ComputedIntValue)
        self.assertIsInstance(
            extended_instrument.computed.communications_status_roll_up,
            ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.data_status_roll_up,
                              ComputedIntValue)
        self.assertIsInstance(
            extended_instrument.computed.location_status_roll_up,
            ComputedIntValue)

        # the following assert will not work without elasticsearch.
        #self.assertEqual( 1, len(extended_instrument.computed.user_notification_requests.value) )

        # Verify the computed attribute for user notification requests
        self.assertEqual(
            1,
            len(extended_instrument.computed.user_notification_requests.value))
        notifications = extended_instrument.computed.user_notification_requests.value
        notification = notifications[0]
        self.assertEqual(expected_instrument_device_id, notification.origin)
        self.assertEqual("instrument", notification.origin_type)
        self.assertEqual('ResourceLifecycleEvent', notification.event_type)

    def _check_computed_attributes_of_extended_product(
            self, expected_data_product_id='', extended_data_product=None):

        self.assertEqual(expected_data_product_id, extended_data_product._id)
        log.debug("extended_data_product.computed: %s",
                  extended_data_product.computed)

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(
            extended_data_product.computed.product_download_size_estimated,
            ComputedFloatValue)
        self.assertIsInstance(
            extended_data_product.computed.number_active_subscriptions,
            ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.data_url,
                              ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.stored_data_size,
                              ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.recent_granules,
                              ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.parameters,
                              ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.recent_events,
                              ComputedEventListValue)

        self.assertIsInstance(extended_data_product.computed.provenance,
                              ComputedDictValue)
        self.assertIsInstance(
            extended_data_product.computed.user_notification_requests,
            ComputedListValue)
        self.assertIsInstance(
            extended_data_product.computed.active_user_subscriptions,
            ComputedListValue)
        self.assertIsInstance(
            extended_data_product.computed.past_user_subscriptions,
            ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.last_granule,
                              ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.is_persisted,
                              ComputedIntValue)
        self.assertIsInstance(
            extended_data_product.computed.data_contents_updated,
            ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.data_datetime,
                              ComputedListValue)

        # exact text here keeps changing to fit UI capabilities.  keep assertion general...
        self.assertIn(
            'ok',
            extended_data_product.computed.last_granule.value['quality_flag'])
        self.assertEqual(
            2, len(extended_data_product.computed.data_datetime.value))

        notifications = extended_data_product.computed.user_notification_requests.value

        notification = notifications[0]
        self.assertEqual(expected_data_product_id, notification.origin)
        self.assertEqual("data product", notification.origin_type)
        self.assertEqual('DetectionEvent', notification.event_type)

    @attr('LOCOINT')
    #@unittest.skip('refactoring')
    @unittest.skipIf(not use_es, 'No ElasticSearch')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    @patch.dict(CFG, {'endpoint': {'receive': {'timeout': 90}}})
    def test_activateInstrumentSample(self):

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='SBE37IMModel',
                                  description="SBE37IMModel")
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        log.debug('new InstrumentModel id = %s ', instModel_id)

        raw_config = StreamConfiguration(stream_name='raw',
                                         parameter_dictionary_name='raw')
        parsed_config = StreamConfiguration(
            stream_name='parsed',
            parameter_dictionary_name='ctd_parsed_param_dict')

        # Create InstrumentAgent
        instAgent_obj = IonObject(
            RT.InstrumentAgent,
            name='agent007',
            description="SBE37IMAgent",
            driver_uri=DRV_URI_GOOD,
            stream_configurations=[raw_config, parsed_config])
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        log.debug('new InstrumentAgent id = %s', instAgent_id)

        self.imsclient.assign_instrument_model_to_instrument_agent(
            instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug(
            'test_activateInstrumentSample: Create instrument resource to represent the SBE37 (SA Req: L4-CI-SA-RQ-241) '
        )
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='SBE37IMDevice',
                                   description="SBE37IMDevice",
                                   serial_number="12345")
        instDevice_id = self.imsclient.create_instrument_device(
            instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(
            instModel_id, instDevice_id)
        log.debug(
            "test_activateInstrumentSample: new InstrumentDevice id = %s (SA Req: L4-CI-SA-RQ-241) ",
            instDevice_id)

        port_agent_config = {
            'device_addr': CFG.device.sbe37.host,
            'device_port': CFG.device.sbe37.port,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'port_agent_addr': 'localhost',
            'command_port': CFG.device.sbe37.port_agent_cmd_port,
            'data_port': CFG.device.sbe37.port_agent_data_port,
            'log_level': 5,
            'type': PortAgentType.ETHERNET
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance,
                                          name='SBE37IMAgentInstance',
                                          description="SBE37IMAgentInstance",
                                          port_agent_config=port_agent_config,
                                          alerts=[])

        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(
            instAgentInstance_obj, instAgent_id, instDevice_id)

        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        parsed_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        parsed_stream_def_id = self.pubsubcli.create_stream_definition(
            name='parsed', parameter_dictionary_id=parsed_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'raw', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(
            name='raw', parameter_dictionary_id=raw_pdict_id)

        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
                           name='the parsed data',
                           description='ctd stream test',
                           temporal_domain=tdom,
                           spatial_domain=sdom)

        data_product_id1 = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=parsed_stream_def_id)
        log.debug('new dp_id = %s', data_product_id1)
        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id1)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id1)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1,
                                                   PRED.hasStream, None, True)
        log.debug('Data product streams1 = %s', stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id1,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        log.debug('Data set for data_product_id1 = %s', dataset_ids[0])
        self.parsed_dataset = dataset_ids[0]

        pid = self.create_logger('ctd_parsed', stream_ids[0])
        self.loggerpids.append(pid)

        dp_obj = IonObject(RT.DataProduct,
                           name='the raw data',
                           description='raw stream test',
                           temporal_domain=tdom,
                           spatial_domain=sdom)

        data_product_id2 = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        log.debug('new dp_id = %s', data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id2)

        # setup notifications for the device and parsed data product
        user_id_1 = self._create_notification(user_name='user_1',
                                              instrument_id=instDevice_id,
                                              product_id=data_product_id1)
        #---------- Create notifications for another user and verify that we see different computed subscriptions for the two users ---------
        user_id_2 = self._create_notification(user_name='user_2',
                                              instrument_id=instDevice_id,
                                              product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                   PRED.hasStream, None, True)
        log.debug('Data product streams2 = %s', str(stream_ids))

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        log.debug('Data set for data_product_id2 = %s', dataset_ids[0])
        self.raw_dataset = dataset_ids[0]

        #elastic search debug
        es_indexes, _ = self.container.resource_registry.find_resources(
            restype='ElasticSearchIndex')
        log.debug('ElasticSearch indexes: %s', [i.name for i in es_indexes])
        log.debug('Bootstrap %s', CFG.bootstrap.use_es)

        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(
                instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])

        #cleanup
        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
                        instrument_agent_instance_id=instAgentInstance_id)

        #wait for start
        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(
            instAgentInstance_id)
        gate = AgentProcessStateGate(self.processdispatchclient.read_process,
                                     instDevice_id, ProcessStateEnum.RUNNING)
        self.assertTrue(
            gate. await (30),
            "The instrument agent instance (%s) did not spawn in 30 seconds" %
            gate.process_id)

        #log.trace('Instrument agent instance obj: = %s' , str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(instDevice_id,
                                              to_name=gate.process_id,
                                              process=FakeProcess())

        log.debug("test_activateInstrumentSample: got ia client %s",
                  str(self._ia_client))

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: initialize %s", str(retval))
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.INACTIVE, state)

        log.debug("(L4-CI-SA-RQ-334): Sending go_active command ")
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrument: return value from go_active %s",
                  str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.IDLE, state)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug(
            "(L4-CI-SA-RQ-334): current state after sending go_active command %s",
            str(state))

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: run %s", str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.COMMAND, state)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.STOPPED, state)

        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.COMMAND, state)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.IDLE, state)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.COMMAND, state)

        cmd = AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)
        for i in xrange(10):
            retval = self._ia_client.execute_resource(cmd)
            log.debug("test_activateInstrumentSample: return from sample %s",
                      str(retval))

        log.debug("test_activateInstrumentSample: calling reset ")
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: return from reset %s",
                  str(reply))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data_raw = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data_raw, Granule)
        rdt_raw = RecordDictionaryTool.load_from_granule(replay_data_raw)
        log.debug("RDT raw: %s", str(rdt_raw.pretty_print()))

        self.assertIn('raw', rdt_raw)
        raw_vals = rdt_raw['raw']

        all_raw = "".join(raw_vals)

        # look for 't' entered after a prompt -- ">t"
        t_commands = all_raw.count(">t")

        if 10 != t_commands:
            log.error("%s raw_vals: ", len(raw_vals))
            for i, r in enumerate(raw_vals):
                log.error("raw val %s: %s", i, [r])
            self.fail("Expected 10 't' strings in raw_vals, got %s" %
                      t_commands)
        else:
            log.debug("%s raw_vals: ", len(raw_vals))
            for i, r in enumerate(raw_vals):
                log.debug("raw val %s: %s", i, [r])

        replay_data_parsed = self.dataretrieverclient.retrieve(
            self.parsed_dataset)
        self.assertIsInstance(replay_data_parsed, Granule)
        rdt_parsed = RecordDictionaryTool.load_from_granule(replay_data_parsed)
        log.debug("test_activateInstrumentSample: RDT parsed: %s",
                  str(rdt_parsed.pretty_print()))
        self.assertIn('temp', rdt_parsed)
        temp_vals = rdt_parsed['temp']
        pressure_vals = rdt_parsed['pressure']
        if 10 != len(temp_vals):
            log.error("%s temp_vals: %s", len(temp_vals), temp_vals)
            self.fail("Expected 10 temp_vals, got %s" % len(temp_vals))

        log.debug("l4-ci-sa-rq-138")
        """
        Physical resource control shall be subject to policy

        Instrument management control capabilities shall be subject to policy

        The actor accessing the control capabilities must be authorized to send commands.

        note from maurice 2012-05-18: Talk to tim M to verify that this is policy.  If it is then talk with Stephen to
                                      get an example of a policy test and use that to create a test stub that will be
                                      completed when we have instrument policies.

        Tim M: The "actor", aka observatory operator, will access the instrument through ION.

        """

        #--------------------------------------------------------------------------------
        # Get the extended data product to see if it contains the granules
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(
            data_product_id=data_product_id1, user_id=user_id_1)

        def poller(extended_product):
            return len(extended_product.computed.user_notification_requests.
                       value) == 1

        poll(poller, extended_product, timeout=30)

        self._check_computed_attributes_of_extended_product(
            expected_data_product_id=data_product_id1,
            extended_data_product=extended_product)

        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(
            instrument_device_id=instDevice_id, user_id=user_id_1)

        #--------------------------------------------------------------------------------
        # For the second user, check the extended data product and the extended intrument
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(
            data_product_id=data_product_id2, user_id=user_id_2)
        self._check_computed_attributes_of_extended_product(
            expected_data_product_id=data_product_id2,
            extended_data_product=extended_product)

        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(
            instrument_device_id=instDevice_id, user_id=user_id_2)
        self._check_computed_attributes_of_extended_instrument(
            expected_instrument_device_id=instDevice_id,
            extended_instrument=extended_instrument)

        #--------------------------------------------------------------------------------
        # Deactivate loggers
        #--------------------------------------------------------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        self.dpclient.delete_data_product(data_product_id1)
        self.dpclient.delete_data_product(data_product_id2)
Exemplo n.º 4
0
class ExhaustiveParameterTest(IonIntegrationTestCase):
    def setUp(self):
        self.i=0
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2params.yml')

        self.dataset_management      = DatasetManagementServiceClient()
        self.pubsub_management       = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.resource_registry       = self.container.resource_registry
        self.data_retriever          = DataRetrieverServiceClient()

        pdicts, _ = self.resource_registry.find_resources(restype='ParameterDictionary', id_only=False)
        self.dp_ids = []
        for pdict in pdicts:
            stream_def_id = self.pubsub_management.create_stream_definition(pdict.name, parameter_dictionary_id=pdict._id)
            dp_id = self.make_dp(stream_def_id)
            if dp_id: self.dp_ids.append(dp_id)

    def make_dp(self, stream_def_id):
        stream_def = self.resource_registry.read(stream_def_id)
        dp_obj = DataProduct(
                name=stream_def.name,
                description=stream_def.name,
                processing_level_code='Parsed_Canonical')


        data_product_id = self.data_product_management.create_data_product(dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(data_product_id)
        return data_product_id

    def fill_values(self, ptype, size):
        if isinstance(ptype, ArrayType):
            return ['blah'] * size
        elif isinstance(ptype, QuantityType):
            return np.sin(np.arange(size, dtype=ptype.value_encoding) * 2 * np.pi / 3)
        elif isinstance(ptype, RecordType):
            return [{'record': 'ok'}] * size
        elif isinstance(ptype, ConstantRangeType):
            return (1,1000)
        elif isinstance(ptype, ConstantType):
            return np.dtype(ptype.value_encoding).type(1)
        elif isinstance(ptype, CategoryType):
            return ptype.categories.keys()[0]
        else:
            return


    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                extents = self.dataset_management.dataset_extents(dataset_id, rdt._pdict.temporal_parameter_name)[0]
                if rdt[rdt._pdict.temporal_parameter_name] and rdt[rdt._pdict.temporal_parameter_name][0] != rdt._pdict.get_context(rdt._pdict.temporal_parameter_name).fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    def write_to_data_product(self,data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name 
            return
        rdt[time_param] = np.arange(40)


        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id,40)


        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad

        
    def test_data_products(self):
        bad_data_products = {}
        for dp_id in self.dp_ids:
            try:
                bad_fields = self.write_to_data_product(dp_id)
                if bad_fields:
                    bad_data_products[dp_id] = "Couldn't write and retrieve %s." % bad_fields
            except:
                import traceback
                bad_data_products[dp_id] = traceback.format_exc()


        for dp_id, tb in bad_data_products.iteritems():
            print '----------'
            print 'Problem with %s' % self.resource_registry.read(dp_id).name
            print tb
            print '----------'


        if bad_data_products:
            raise AssertionError('There are bad parameter dictionaries.')
Exemplo n.º 5
0
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli = DataProductManagementServiceClient()
        self.rrclient = ResourceRegistryServiceClient()
        self.damsclient = DataAcquisitionManagementServiceClient()
        self.pubsubcli = PubsubManagementServiceClient()
        self.ingestclient = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        self.stream_def_id = self.pubsubcli.create_stream_definition(
            name='SBE37_CDM')

        self.process_definitions = {}
        ingestion_worker_definition = ProcessDefinition(
            name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':
            'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class': 'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(
            process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space = 'science_granule_ingestion'
        self.exchange_point = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(
            self.process_definitions['ingestion_worker'], configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data',
            parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id,
                                                   PRED.hasStreamDefinition,
                                                   RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
                           name='DP2',
                           description='some new dp')

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream,
                                                RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition,
                                                  RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)

        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)

        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description, 'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(
            ComputedValueAvailability.PROVIDED,
            extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(
            0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)

        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)

        def ion_object_encoder(obj):
            return obj.__dict__

        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].resources),
            2)
        self.assertEqual(
            len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].
                associated_resources), 0)
        self.assertEqual(
            len(data_product_data.associations['Dataset'].associated_resources
                ), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].resources),
            2)

        self.assertEqual(
            len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(
            len(data_product_data.associations['StreamDefinition'].
                associated_resources), 1)
        self.assertEqual(
            data_product_data.associations['StreamDefinition'].
            associated_resources[0].s, dp_id)

        self.assertEqual(
            len(data_product_data.associations['Dataset'].associated_resources
                ), 1)
        self.assertEqual(
            data_product_data.associations['Dataset'].associated_resources[0].
            s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')
        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)

    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT',
                                 category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))

    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent,
                             callback=info_event_received,
                             origin=dp_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60:
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Exemplo n.º 6
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0
        self.cci                  = 0

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset = Dataset('test_dataset_%i'%self.i)
        dataset_id = self.dataset_management.create_dataset(dataset, parameter_dictionary_id=parameter_dict_id)
        self.addCleanup(self.dataset_management.delete_dataset, dataset_id)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''
        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})
        self.addCleanup(self.container.terminate_process, pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data %i' % self.i, parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        dataset_id = self.create_dataset(pdict_id)

        # self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)


    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        sub_id = self.pubsub_management.create_subscription(self.exchange_space_name,stream_ids=[replay_stream_id])
        self.addCleanup(self.pubsub_management.delete_subscription, sub_id)
        self.pubsub_management.activate_subscription(sub_id)
        self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        subscriber.start()
        self.addCleanup(subscriber.stop)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


    def test_ingestion_pause(self):
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        ingestion_config_id = self.get_ingestion_config()
        self.start_ingestion(ctd_stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, ctd_stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(monitor.wait())
        granule = self.data_retriever.retrieve(dataset_id)


        self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id)

        monitor.event.clear()
        rdt['time'] = np.arange(10,20)
        publisher.publish(rdt.to_granule())
        self.assertFalse(monitor.event.wait(1))

        self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id)

        self.assertTrue(monitor.wait())

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.wait())

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   19 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        np.testing.assert_array_equal(rdt['time'], np.arange(0,20,2))
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)


    @unittest.skip('deprecated')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.wait())


    def test_sparse_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_sparse()
        stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)
        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)
        dataset_id = self.create_dataset(pdict_id)
        self.start_ingestion(stream_id,dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        # Publish initial granule
        # the first one has the sparse value set inside it, sets lat to 45 and lon to -71
        ntp_now = time.time() + 2208988800
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['internal_timestamp'] = [ntp_now]
        rdt['temp'] = [300000]
        rdt['preferred_timestamp'] = ['driver_timestamp']
        rdt['port_timestamp'] = [ntp_now]
        rdt['quality_flag'] = ['']
        rdt['lat'] = [45]
        rdt['conductivity'] = [4341400]
        rdt['driver_timestamp'] = [ntp_now]
        rdt['lon'] = [-71]
        rdt['pressure'] = [256.8]

        publisher = StandaloneStreamPublisher(stream_id, route)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        # Check the values and make sure they're correct
        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        np.testing.assert_allclose(rdt_out['temp'], rdt['temp'])
        np.testing.assert_allclose(rdt_out['lat'], np.array([45]))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71]))

        np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32'))
        np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))


        # We're going to change the lat/lon
        rdt = ph.get_rdt(stream_def_id)
        rdt['time'] = time.time() + 2208988800
        rdt['lat'] = [46]
        rdt['lon'] = [-73]
        
        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.wait())
        dataset_monitor.reset()


        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['time'], rdt['time'])
        
        for i in xrange(9):
            ntp_now = time.time() + 2208988800
            rdt['time'] = [ntp_now]
            rdt['internal_timestamp'] = [ntp_now]
            rdt['temp'] = [300000]
            rdt['preferred_timestamp'] = ['driver_timestamp']
            rdt['port_timestamp'] = [ntp_now]
            rdt['quality_flag'] = [None]
            rdt['conductivity'] = [4341400]
            rdt['driver_timestamp'] = [ntp_now]
            rdt['pressure'] = [256.8]

            publisher.publish(rdt.to_granule())
            self.assertTrue(dataset_monitor.wait())
            dataset_monitor.reset()

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10))
        np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9))
        np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli           = DataProductManagementServiceClient()
        self.rrclient           = ResourceRegistryServiceClient()
        self.damsclient         = DataAcquisitionManagementServiceClient()
        self.pubsubcli          = PubsubManagementServiceClient()
        self.ingestclient       = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc               = UserNotificationServiceClient()
        self.data_retriever     = DataRetrieverServiceClient()
        self.identcli           = IdentityManagementServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------




        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp')

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)


        #----------------------------------------------------------------------------------------
        # Create users then notifications to this data product for each user
        #----------------------------------------------------------------------------------------

        # user_1
        user_1 = UserInfo()
        user_1.name = 'user_1'
        user_1.contact.email = '*****@*****.**'

        # user_2
        user_2 = UserInfo()
        user_2.name = 'user_2'
        user_2.contact.email = '*****@*****.**'
        #user1 is a complete user
        self.subject = "/DC=org/DC=cilogon/C=US/O=ProtectNetwork/CN=Roger Unwin A254"
        actor_identity_obj = IonObject("ActorIdentity", {"name": self.subject})
        actor_id = self.identcli.create_actor_identity(actor_identity_obj)

        user_credentials_obj = IonObject("UserCredentials", {"name": self.subject})
        self.identcli.register_user_credentials(actor_id, user_credentials_obj)
        user_id_1 = self.identcli.create_user_info(actor_id, user_1)
        user_id_2, _ = self.rrclient.create(user_2)

        delivery_config1a = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        delivery_config1b = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        notification_request_1 = NotificationRequest(   name = "notification_1",
            origin=dp_id,
            origin_type="type_1",
            event_type=OT.ResourceLifecycleEvent,
            disabled_by_system = False,
            delivery_configurations=[delivery_config1a, delivery_config1b])

        delivery_config2a = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        delivery_config2b = IonObject(OT.DeliveryConfiguration, email='*****@*****.**', mode=DeliveryModeEnum.EMAIL, frequency=NotificationFrequencyEnum.BATCH)
        notification_request_2 = NotificationRequest(   name = "notification_2",
            origin=dp_id,
            origin_type="type_2",
            disabled_by_system = False,
            event_type=OT.DetectionEvent,
            delivery_configurations=[delivery_config2a, delivery_config2b])

        notification_request_1_id = self.unsc.create_notification(notification=notification_request_1, user_id=user_id_1)
        notification_request_2_id = self.unsc.create_notification(notification=notification_request_2, user_id=user_id_2)
        self.unsc.delete_notification(notification_request_1_id)



        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)


        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        #validate that there is one active and one retired user notification for this data product
        self.assertEqual(1, len(extended_product.computed.active_user_subscriptions.value))
        self.assertEqual(1, len(extended_product.computed.past_user_subscriptions.value))

        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)


        def ion_object_encoder(obj):
            return obj.__dict__


        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)
        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0)
        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)

        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id)

        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)


        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)


    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)


        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT', category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))


    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)


        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)


        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60 :
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Exemplo n.º 8
0
class TestActivateInstrumentIntegration(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        super(TestActivateInstrumentIntegration, self).setUp()
        config = DotDict()
        config.bootstrap.use_es = True

        self._start_container()
        self.addCleanup(TestActivateInstrumentIntegration.es_cleanup)

        self.container.start_rel_from_url('res/deploy/r2deploy.yml', config)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(
            node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()
        self.usernotificationclient = UserNotificationServiceClient()

        #setup listerner vars
        self._data_greenlets = []
        self._no_samples = None
        self._samples_received = []

        self.event_publisher = EventPublisher()

    @staticmethod
    def es_cleanup():
        es_host = CFG.get_safe('server.elasticsearch.host', 'localhost')
        es_port = CFG.get_safe('server.elasticsearch.port', '9200')
        es = ep.ElasticSearch(host=es_host, port=es_port, timeout=10)
        indexes = STD_INDEXES.keys()
        indexes.append('%s_resources_index' % get_sys_name().lower())
        indexes.append('%s_events_index' % get_sys_name().lower())

        for index in indexes:
            IndexManagementService._es_call(es.river_couchdb_delete, index)
            IndexManagementService._es_call(es.index_delete, index)

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name + '_logger')
        producer_definition.executable = {
            'module': 'ion.processes.data.stream_granule_logger',
            'class': 'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(
            process_definition=producer_definition)
        configuration = {
            'process': {
                'stream_id': stream_id,
            }
        }
        pid = self.processdispatchclient.schedule_process(
            process_definition_id=logger_procdef_id,
            configuration=configuration)

        return pid

    def _create_notification(self,
                             user_name='',
                             instrument_id='',
                             product_id=''):
        #--------------------------------------------------------------------------------------
        # Make notification request objects
        #--------------------------------------------------------------------------------------

        notification_request_1 = NotificationRequest(
            name='notification_1',
            origin=instrument_id,
            origin_type="instrument",
            event_type='ResourceLifecycleEvent')

        notification_request_2 = NotificationRequest(
            name='notification_2',
            origin=product_id,
            origin_type="data product",
            event_type='DetectionEvent')

        #--------------------------------------------------------------------------------------
        # Create a user and get the user_id
        #--------------------------------------------------------------------------------------

        user = UserInfo()
        user.name = user_name
        user.contact.email = '*****@*****.**' % user_name

        user_id, _ = self.rrclient.create(user)

        #--------------------------------------------------------------------------------------
        # Create notification
        #--------------------------------------------------------------------------------------

        self.usernotificationclient.create_notification(
            notification=notification_request_1, user_id=user_id)
        self.usernotificationclient.create_notification(
            notification=notification_request_2, user_id=user_id)
        log.debug(
            "test_activateInstrumentSample: create_user_notifications user_id %s",
            str(user_id))

        return user_id

    def get_datastore(self, dataset_id):
        dataset = self.datasetclient.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def _check_computed_attributes_of_extended_instrument(
            self, expected_instrument_device_id='', extended_instrument=None):

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(extended_instrument.computed.firmware_version,
                              ComputedFloatValue)
        self.assertIsInstance(
            extended_instrument.computed.last_data_received_datetime,
            ComputedFloatValue)
        self.assertIsInstance(
            extended_instrument.computed.last_calibration_datetime,
            ComputedFloatValue)
        self.assertIsInstance(extended_instrument.computed.uptime,
                              ComputedStringValue)

        self.assertIsInstance(
            extended_instrument.computed.power_status_roll_up,
            ComputedIntValue)
        self.assertIsInstance(
            extended_instrument.computed.communications_status_roll_up,
            ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.data_status_roll_up,
                              ComputedIntValue)
        self.assertIsInstance(
            extended_instrument.computed.location_status_roll_up,
            ComputedIntValue)

        # the following assert will not work without elasticsearch.
        #self.assertEqual( 1, len(extended_instrument.computed.user_notification_requests.value) )
        self.assertEqual(
            extended_instrument.computed.communications_status_roll_up.value,
            StatusType.STATUS_WARNING)
        self.assertEqual(
            extended_instrument.computed.data_status_roll_up.value,
            StatusType.STATUS_OK)
        self.assertEqual(
            extended_instrument.computed.power_status_roll_up.value,
            StatusType.STATUS_WARNING)

        # Verify the computed attribute for user notification requests
        self.assertEqual(
            1,
            len(extended_instrument.computed.user_notification_requests.value))
        notifications = extended_instrument.computed.user_notification_requests.value
        notification = notifications[0]
        self.assertEqual(notification.origin, expected_instrument_device_id)
        self.assertEqual(notification.origin_type, "instrument")
        self.assertEqual(notification.event_type, 'ResourceLifecycleEvent')

    def _check_computed_attributes_of_extended_product(
            self, expected_data_product_id='', extended_data_product=None):

        self.assertEqual(expected_data_product_id, extended_data_product._id)
        log.debug("extended_data_product.computed: %s",
                  extended_data_product.computed)

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(
            extended_data_product.computed.product_download_size_estimated,
            ComputedIntValue)
        self.assertIsInstance(
            extended_data_product.computed.number_active_subscriptions,
            ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.data_url,
                              ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.stored_data_size,
                              ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.recent_granules,
                              ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.parameters,
                              ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.recent_events,
                              ComputedEventListValue)

        self.assertIsInstance(extended_data_product.computed.provenance,
                              ComputedDictValue)
        self.assertIsInstance(
            extended_data_product.computed.user_notification_requests,
            ComputedListValue)
        self.assertIsInstance(
            extended_data_product.computed.active_user_subscriptions,
            ComputedListValue)
        self.assertIsInstance(
            extended_data_product.computed.past_user_subscriptions,
            ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.last_granule,
                              ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.is_persisted,
                              ComputedIntValue)
        self.assertIsInstance(
            extended_data_product.computed.data_contents_updated,
            ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.data_datetime,
                              ComputedListValue)

        # exact text here keeps changing to fit UI capabilities.  keep assertion general...
        self.assertTrue('ok' in extended_data_product.computed.last_granule.
                        value['quality_flag'])
        self.assertEqual(
            2, len(extended_data_product.computed.data_datetime.value))

        notifications = extended_data_product.computed.user_notification_requests.value

        notification = notifications[0]
        self.assertEqual(notification.origin, expected_data_product_id)
        self.assertEqual(notification.origin_type, "data product")
        self.assertEqual(notification.event_type, 'DetectionEvent')

    @attr('LOCOINT')
    @unittest.skipIf(not use_es, 'No ElasticSearch')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    @patch.dict(CFG, {'endpoint': {'receive': {'timeout': 60}}})
    def test_activateInstrumentSample(self):

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='SBE37IMModel',
                                  description="SBE37IMModel")
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        log.debug('new InstrumentModel id = %s ', instModel_id)

        #Create stream alarms
        """
        test_two_sided_interval
        Test interval alarm and alarm event publishing for a closed
        inteval.
        """

        #        kwargs = {
        #            'name' : 'test_sim_warning',
        #            'stream_name' : 'parsed',
        #            'value_id' : 'temp',
        #            'message' : 'Temperature is above test range of 5.0.',
        #            'type' : StreamAlarmType.WARNING,
        #            'upper_bound' : 5.0,
        #            'upper_rel_op' : '<'
        #        }

        kwargs = {
            'name': 'temperature_warning_interval',
            'stream_name': 'parsed',
            'value_id': 'temp',
            'message':
            'Temperature is below the normal range of 50.0 and above.',
            'type': StreamAlarmType.WARNING,
            'lower_bound': 50.0,
            'lower_rel_op': '<'
        }

        # Create alarm object.
        alarm = {}
        alarm['type'] = 'IntervalAlarmDef'
        alarm['kwargs'] = kwargs

        raw_config = StreamConfiguration(
            stream_name='raw',
            parameter_dictionary_name='ctd_raw_param_dict',
            records_per_granule=2,
            granule_publish_rate=5)
        parsed_config = StreamConfiguration(
            stream_name='parsed',
            parameter_dictionary_name='ctd_parsed_param_dict',
            records_per_granule=2,
            granule_publish_rate=5,
            alarms=[alarm])

        # Create InstrumentAgent
        instAgent_obj = IonObject(
            RT.InstrumentAgent,
            name='agent007',
            description="SBE37IMAgent",
            driver_uri=
            "http://sddevrepo.oceanobservatories.org/releases/seabird_sbe37smb_ooicore-0.0.1a-py2.7.egg",
            stream_configurations=[raw_config, parsed_config])
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        log.debug('new InstrumentAgent id = %s', instAgent_id)

        self.imsclient.assign_instrument_model_to_instrument_agent(
            instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug(
            'test_activateInstrumentSample: Create instrument resource to represent the SBE37 (SA Req: L4-CI-SA-RQ-241) '
        )
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='SBE37IMDevice',
                                   description="SBE37IMDevice",
                                   serial_number="12345")
        instDevice_id = self.imsclient.create_instrument_device(
            instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(
            instModel_id, instDevice_id)

        log.debug(
            "test_activateInstrumentSample: new InstrumentDevice id = %s (SA Req: L4-CI-SA-RQ-241) ",
            instDevice_id)

        port_agent_config = {
            'device_addr': CFG.device.sbe37.host,
            'device_port': CFG.device.sbe37.port,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'port_agent_addr': 'localhost',
            'command_port': CFG.device.sbe37.port_agent_cmd_port,
            'data_port': CFG.device.sbe37.port_agent_data_port,
            'log_level': 5,
            'type': PortAgentType.ETHERNET
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance,
                                          name='SBE37IMAgentInstance',
                                          description="SBE37IMAgentInstance",
                                          port_agent_config=port_agent_config)

        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(
            instAgentInstance_obj, instAgent_id, instDevice_id)

        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()

        parsed_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        parsed_stream_def_id = self.pubsubcli.create_stream_definition(
            name='parsed', parameter_dictionary_id=parsed_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_raw_param_dict', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(
            name='raw', parameter_dictionary_id=raw_pdict_id)

        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
                           name='the parsed data',
                           description='ctd stream test',
                           temporal_domain=tdom,
                           spatial_domain=sdom)

        data_product_id1 = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=parsed_stream_def_id)
        log.debug('new dp_id = %s', data_product_id1)
        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id1)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id1)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1,
                                                   PRED.hasStream, None, True)
        log.debug('Data product streams1 = %s', stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id1,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        log.debug('Data set for data_product_id1 = %s', dataset_ids[0])
        self.parsed_dataset = dataset_ids[0]

        pid = self.create_logger('ctd_parsed', stream_ids[0])
        self.loggerpids.append(pid)

        dp_obj = IonObject(RT.DataProduct,
                           name='the raw data',
                           description='raw stream test',
                           temporal_domain=tdom,
                           spatial_domain=sdom)

        data_product_id2 = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        log.debug('new dp_id = %s', data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id2)

        # setup notifications for the device and parsed data product
        user_id_1 = self._create_notification(user_name='user_1',
                                              instrument_id=instDevice_id,
                                              product_id=data_product_id1)
        #---------- Create notifications for another user and verify that we see different computed subscriptions for the two users ---------
        user_id_2 = self._create_notification(user_name='user_2',
                                              instrument_id=instDevice_id,
                                              product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                   PRED.hasStream, None, True)
        log.debug('Data product streams2 = %s', str(stream_ids))

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        log.debug('Data set for data_product_id2 = %s', dataset_ids[0])
        self.raw_dataset = dataset_ids[0]

        #elastic search debug
        es_indexes, _ = self.container.resource_registry.find_resources(
            restype='ElasticSearchIndex')
        log.debug('ElasticSearch indexes: %s', [i.name for i in es_indexes])
        log.debug('Bootstrap %s', CFG.bootstrap.use_es)

        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(
                instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])

        #setup a subscriber to alarm events from the device
        self._events_received = []
        self._event_count = 0
        self._samples_out_of_range = 0
        self._samples_complete = False
        self._async_sample_result = AsyncResult()

        def consume_event(*args, **kwargs):
            log.debug(
                'TestActivateInstrument recieved ION event: args=%s, kwargs=%s, event=%s.',
                str(args), str(kwargs), str(args[0]))
            self._events_received.append(args[0])
            self._event_count = len(self._events_received)
            self._async_sample_result.set()

        self._event_subscriber = EventSubscriber(
            event_type=
            'StreamWarningAlarmEvent',  #'StreamWarningAlarmEvent', #  StreamAlarmEvent
            callback=consume_event,
            origin=instDevice_id)
        self._event_subscriber.start()

        #cleanup
        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
                        instrument_agent_instance_id=instAgentInstance_id)

        def stop_subscriber():
            self._event_subscriber.stop()
            self._event_subscriber = None

        self.addCleanup(stop_subscriber)

        #wait for start
        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(
            instAgentInstance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
                                inst_agent_instance_obj.agent_process_id,
                                ProcessStateEnum.RUNNING)
        self.assertTrue(
            gate. await (30),
            "The instrument agent instance (%s) did not spawn in 30 seconds" %
            inst_agent_instance_obj.agent_process_id)

        log.debug('Instrument agent instance obj: = %s',
                  str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(
            instDevice_id,
            to_name=inst_agent_instance_obj.agent_process_id,
            process=FakeProcess())

        log.debug("test_activateInstrumentSample: got ia client %s",
                  str(self._ia_client))

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: initialize %s", str(retval))
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        log.debug("(L4-CI-SA-RQ-334): Sending go_active command ")
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrument: return value from go_active %s",
                  str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug(
            "(L4-CI-SA-RQ-334): current state after sending go_active command %s",
            str(state))

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: run %s", str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STOPPED)

        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)
        for i in xrange(10):
            retval = self._ia_client.execute_resource(cmd)
            log.debug("test_activateInstrumentSample: return from sample %s",
                      str(retval))

        log.debug("test_activateInstrumentSample: calling reset ")
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: return from reset %s",
                  str(reply))

        self._samples_complete = True

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.dataretrieverclient.retrieve(self.parsed_dataset)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        log.debug("test_activateInstrumentSample: RDT parsed: %s",
                  str(rdt.pretty_print()))
        temp_vals = rdt['temp']
        self.assertEquals(len(temp_vals), 10)
        log.debug("test_activateInstrumentSample: all temp_vals: %s",
                  temp_vals)

        #out_of_range_temp_vals = [i for i in temp_vals if i > 5]
        out_of_range_temp_vals = [i for i in temp_vals if i < 50.0]
        log.debug("test_activateInstrumentSample: Out_of_range_temp_vals: %s",
                  out_of_range_temp_vals)
        self._samples_out_of_range = len(out_of_range_temp_vals)

        # if no bad values were produced, then do not wait for an event
        if self._samples_out_of_range == 0:
            self._async_sample_result.set()

        log.debug("test_activateInstrumentSample: _events_received: %s",
                  self._events_received)
        log.debug("test_activateInstrumentSample: _event_count: %s",
                  self._event_count)

        self._async_sample_result.get(timeout=CFG.endpoint.receive.timeout)

        replay_data = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        log.debug("RDT raw: %s", str(rdt.pretty_print()))

        raw_vals = rdt['raw']
        self.assertEquals(len(raw_vals), 10)

        log.debug("l4-ci-sa-rq-138")
        """
        Physical resource control shall be subject to policy

        Instrument management control capabilities shall be subject to policy

        The actor accessing the control capabilities must be authorized to send commands.

        note from maurice 2012-05-18: Talk to tim M to verify that this is policy.  If it is then talk with Stephen to
                                      get an example of a policy test and use that to create a test stub that will be
                                      completed when we have instrument policies.

        Tim M: The "actor", aka observatory operator, will access the instrument through ION.

        """

        #--------------------------------------------------------------------------------
        # Get the extended data product to see if it contains the granules
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(
            data_product_id=data_product_id1, user_id=user_id_1)

        def poller(extended_product):
            return len(extended_product.computed.user_notification_requests.
                       value) == 1

        poll(poller, extended_product, timeout=30)

        self._check_computed_attributes_of_extended_product(
            expected_data_product_id=data_product_id1,
            extended_data_product=extended_product)

        #--------------------------------------------------------------------------------
        #put some events into the eventsdb to test - this should set the comms and data status to WARNING
        #--------------------------------------------------------------------------------

        t = get_ion_ts()
        self.event_publisher.publish_event(ts_created=t,
                                           event_type='DeviceStatusEvent',
                                           origin=instDevice_id,
                                           state=DeviceStatusType.OUT_OF_RANGE,
                                           values=[200])
        self.event_publisher.publish_event(
            ts_created=t,
            event_type='DeviceCommsEvent',
            origin=instDevice_id,
            state=DeviceCommsType.DATA_DELIVERY_INTERRUPTION,
            lapse_interval_seconds=20)

        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(
            instrument_device_id=instDevice_id, user_id=user_id_1)
        self._check_computed_attributes_of_extended_instrument(
            expected_instrument_device_id=instDevice_id,
            extended_instrument=extended_instrument)

        #--------------------------------------------------------------------------------
        # For the second user, check the extended data product and the extended intrument
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(
            data_product_id=data_product_id2, user_id=user_id_2)
        self._check_computed_attributes_of_extended_product(
            expected_data_product_id=data_product_id2,
            extended_data_product=extended_product)

        #---------- Put some events into the eventsdb to test - this should set the comms and data status to WARNING  ---------

        t = get_ion_ts()
        self.event_publisher.publish_event(ts_created=t,
                                           event_type='DeviceStatusEvent',
                                           origin=instDevice_id,
                                           state=DeviceStatusType.OUT_OF_RANGE,
                                           values=[200])
        self.event_publisher.publish_event(
            ts_created=t,
            event_type='DeviceCommsEvent',
            origin=instDevice_id,
            state=DeviceCommsType.DATA_DELIVERY_INTERRUPTION,
            lapse_interval_seconds=20)

        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(
            instrument_device_id=instDevice_id, user_id=user_id_2)
        self._check_computed_attributes_of_extended_instrument(
            expected_instrument_device_id=instDevice_id,
            extended_instrument=extended_instrument)

        #--------------------------------------------------------------------------------
        # Deactivate loggers
        #--------------------------------------------------------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        self.dpclient.delete_data_product(data_product_id1)
        self.dpclient.delete_data_product(data_product_id2)
Exemplo n.º 9
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self):  # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.pids = []
        self.event = Event()
        self.exchange_space_name = "test_granules"
        self.exchange_point_name = "science_data"
        self.i = 0

        self.purge_queues()
        self.queue_buffer = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue("science_granule_ingestion")
        xn.purge()

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    # --------------------------------------------------------------------------------
    # Helper/Utility methods
    # --------------------------------------------------------------------------------

    def create_dataset(self, parameter_dict_id=""):
        """
        Creates a time-series dataset
        """
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name(
                "ctd_parsed_param_dict", id_only=True
            )

        dataset_id = self.dataset_management.create_dataset(
            "test_dataset_%i" % self.i,
            parameter_dictionary_id=parameter_dict_id,
            spatial_domain=sdom,
            temporal_domain=tdom,
        )
        return dataset_id

    def get_datastore(self, dataset_id):
        """
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        """
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def get_ingestion_config(self):
        """
        Grab the ingestion configuration from the resource registry
        """
        # The ingestion configuration should have been created by the bootstrap service
        # which is configured through r2deploy.yml

        ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration, id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=""):
        """
        Launch the producer
        """

        pid = self.container.spawn_process(
            "better_data_producer",
            "ion.processes.data.example_data_producer",
            "BetterDataProducer",
            {"process": {"stream_id": stream_id}},
        )

        self.pids.append(pid)

    def make_simple_dataset(self):
        """
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        """
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition("ctd data", parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(
            "ctd stream %i" % self.i, "xp1", stream_definition_id=stream_def_id
        )

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self, stream_id, stream_route, offset=0):
        """
        Publish deterministic data
        """

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt["time"] = np.arange(10) + (offset * 10)
        rdt["temp"] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self, stream_id, route):
        """
        Make four granules
        """
        for i in xrange(4):
            self.publish_hifi(stream_id, route, i)

    def start_ingestion(self, stream_id, dataset_id):
        """
        Starts ingestion/persistence for a given dataset
        """
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
        )

    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id
        )

    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        """
        Validation for granule format
        """
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info("%s", rdt.pretty_print())
        self.assertIsInstance(msg, Granule, "Message is improperly formatted. (%s)" % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id="", data_size=40):
        """
        Loops until there is a sufficient amount of data in the dataset
        """
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, "time")[0]
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt = RecordDictionaryTool.load_from_granule(granule)
                if rdt["time"] and rdt["time"][0] != rdt._pdict.get_context("time").fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    # --------------------------------------------------------------------------------
    # Test Methods
    # --------------------------------------------------------------------------------

    @attr("SMOKE")
    def test_dm_end_2_end(self):
        # --------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        # --------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_definition = self.pubsub_management.create_stream_definition(
            "ctd data", parameter_dictionary_id=pdict_id
        )

        stream_id, route = self.pubsub_management.create_stream(
            "producer", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition
        )

        # --------------------------------------------------------------------------------
        # Start persisting the data on the stream
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        # --------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        # --------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        # --------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        # --------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt["time"][:10] == np.arange(10)).all(), "%s" % rdt["time"][:])
        self.assertTrue((rdt["binary"][:10] == np.array(["hi"] * 10, dtype="object")).all())

        # --------------------------------------------------------------------------------
        # Now to try the streamed approach
        # --------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream(
            "replay_out", exchange_point=self.exchange_point_name, stream_definition_id=stream_definition
        )
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream_id
        )
        log.info("Process ID: %s", process_id)

        replay_client = ReplayClient(process_id)

        # --------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to
        # --------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), "The process never launched")
        replay_client.start_replay()

        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)

        # --------------------------------------------------------------------------------
        # Test the slicing capabilities
        # --------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={"tdoa": slice(0, 5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt["time"] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b, bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @unittest.skip("Doesnt work")
    @attr("LOCOINT")
    @unittest.skipIf(os.getenv("CEI_LAUNCH_TEST", False), "Skip test while in CEI LAUNCH mode")
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_def_id = self.pubsub_management.create_stream_definition(
            "replay_stream", parameter_dictionary_id=pdict_id
        )
        replay_stream, replay_route = self.pubsub_management.create_stream(
            "replay", "xp1", stream_definition_id=stream_def_id
        )
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt["time"] = np.arange(100)
        bb.rdt["temp"] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id, bb.coverage
        )  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp("xp1")
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()

    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            "ctd_parsed_param_dict", id_only=True
        )
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            "sal data", parameter_dictionary_id=salinity_pdict_id
        )

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt["time"] = np.arange(10)
        rdt["temp"] = np.random.randn(10) * 10 + 30
        rdt["conductivity"] = np.random.randn(10) * 2 + 10
        rdt["pressure"] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt["time"] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            "ion.processes.data.transforms.ctd.ctd_L2_salinity",
            "CTDL2SalinityTransformAlgorithm",
            kwargs=dict(params=sal_stream_def_id),
        )
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt["salinity"]:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)

        self.wait_until_we_have_enough_granules(dataset_id, 20)  # I just need two

        success = False

        def verifier():
            replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt["time"] == np.arange(10) + 10
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verifier)

        self.assertTrue(success)

        success = False

        def verify_points():
            replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 5)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt["time"] == np.arange(15, 20)
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        # --------------------------------------------------------------------------------
        # Create the configurations and the dataset
        # --------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name("ctd_parsed_param_dict", id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext("binary", param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context("binary", bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext("records", param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context("records", rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            "replay_pdict", parameter_context_ids=context_ids, temporal_context="time"
        )

        stream_def_id = self.pubsub_management.create_stream_definition(
            "replay_stream", parameter_dictionary_id=pdict_id
        )

        stream_id, route = self.pubsub_management.create_stream(
            "replay_with_params", exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id
        )
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        query = {
            "start_time": 0 - 2208988800,
            "end_time": 20 - 2208988800,
            "stride_time": 2,
            "parameters": ["time", "temp"],
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0, 20, 2) == rdt["time"]
        self.assertTrue(comp.all(), "%s" % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"]))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=["time", "temp"])
        self.assertTrue(extents["time"] >= 20)
        self.assertTrue(extents["temp"] >= 20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, 20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        self.publish_hifi(stream_id, route, 2)
        self.publish_hifi(stream_id, route, 3)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt["time"] == np.arange(0, 40)
                if not isinstance(comp, bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e.
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now = unix_now + 2208988800

        unix_ago = unix_now - 20
        ntp_ago = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values("time", np.arange(ntp_ago, ntp_now))

        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)
        self.assertEquals([coverage.get_parameter_context("time").fill_value] * 2, temporal_bounds)

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt["time"] == np.arange(40)).all())

    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values("time", np.arange(10))
            coverage.set_parameter_values("temp", np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())

    @unittest.skip("Outdated due to ingestion retry")
    @attr("LOCOINT")
    @unittest.skipIf(
        os.getenv("CEI_LAUNCH_TEST", False),
        "Host requires file-system access to coverage files, CEI mode does not support.",
    )
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, "%s_master.hdf5" % dataset_id)

        with open(master_file, "w") as f:
            f.write("this will crash HDF")

        self.publish_hifi(stream_id, route, 5)

        self.assertTrue(event.wait(10))

        sub.stop()
Exemplo n.º 10
0
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli = DataProductManagementServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubcli =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        # Generic time-series data domain creation
        tdom, sdom = time_series_domain()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom.dump(), 
            spatial_domain = sdom.dump())

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)


        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)

        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

#        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)



    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        self.get_datastore(dataset_ids[0])


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)
Exemplo n.º 11
0
class TestWorkflowManagementIntegration(VisualizationIntegrationTestHelper):

    def setUp(self):
        # Start container

        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.workflowclient = WorkflowManagementServiceClient(node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node)
        self.data_retriever = DataRetrieverServiceClient(node=self.container.node)

        self.ctd_stream_def = SBE37_CDM_stream_definition()



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_SA_transform_components(self):

        assertions = self.assertTrue

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)


        ###
        ###  Setup the first transformation
        ###

        # Salinity: Data Process Definition
        ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition()

        l2_salinity_all_data_process_id, ctd_l2_salinity_output_dp_id = self.create_transform_process(ctd_L2_salinity_dprocdef_id,ctd_parsed_data_product_id )

        ## get the stream id for the transform outputs
        stream_ids, _ = self.rrclient.find_objects(ctd_l2_salinity_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_stream_id)


        ###
        ###  Setup the second transformation
        ###

        # Salinity Doubler: Data Process Definition
        salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition()

        salinity_double_data_process_id, salinity_doubler_output_dp_id = self.create_transform_process(salinity_doubler_dprocdef_id, ctd_l2_salinity_output_dp_id )

        stream_ids, _ = self.rrclient.find_objects(salinity_doubler_output_dp_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        sal_dbl_stream_id = stream_ids[0]
        data_product_stream_ids.append(sal_dbl_stream_id)


        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        #Stop the transform processes
        self.dataprocessclient.deactivate_data_process(salinity_double_data_process_id)
        self.dataprocessclient.deactivate_data_process(l2_salinity_all_data_process_id)

        #Validate the data from each of the messages along the way
        self.validate_messages(results)

    @attr('LOCOINT')
    @attr('SMOKE')
    @unittest.skip("not working")
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Salinity_Test_Workflow',description='tests a workflow of multiple transform data processes')

        workflow_data_product_name = 'TEST-Workflow_Output_Product' #Set a specific output product name

        #Add a transformation process definition
        ctd_L2_salinity_dprocdef_id = self.create_salinity_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=ctd_L2_salinity_dprocdef_id, persist_process_output_data=False)  #Don't persist the intermediate data product
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Add a transformation process definition
        salinity_doubler_dprocdef_id = self.create_salinity_doubler_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=salinity_doubler_dprocdef_id, output_data_product_name=workflow_data_product_name)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        aids = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition)
        assertions(len(aids) == 2 )

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=30)

        workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1 )

        #persist the output product
        #self.dataproductclient.activate_data_product_persistence(workflow_product_id)
        dataset_ids,_ = self.rrclient.find_objects(workflow_product_id, PRED.hasDataset, RT.DataSet, True)
        assertions(len(dataset_ids) == 1 )
        dataset_id = dataset_ids[0]

        #Verify the output data product name matches what was specified in the workflow definition
        workflow_product = self.rrclient.read(workflow_product_id)
        assertions(workflow_product.name == workflow_data_product_name)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 2 )

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True)
            assertions(len(stream_ids) == 1 )
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id, False, timeout=15)  # Should test true at some point

        #Make sure the Workflow object was removed
        objs, _ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(objs) == 0)

        #Validate the data from each of the messages along the way
        self.validate_messages(results)

        #validate that the data was persisted and can be retrieved
        self.validate_data_ingest_retrieve(dataset_id)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_google_dt_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='GoogleDT_Test_Workflow',description='Tests the workflow of converting stream data to Google DT')

        #Add a transformation process definition
        google_dt_procdef_id = self.create_google_dt_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id, persist_process_output_data=True)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=20)

        workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1 )

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1 )

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True)
            assertions(len(stream_ids) == 1 )
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id, False)  # Should test true at some point

        #Validate the data from each of the messages along the way
        self.validate_google_dt_transform_results(results)

        # Check to see if ingestion worked. Extract the granules from data_retrieval.
        # First find the dataset associated with the output dp product
        ds_ids,_ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.DataSet, True)
        retrieve_granule = self.data_retriever.retrieve(ds_ids[0])

        #Validate the data from each of the messages along the way
        self.validate_google_dt_transform_results(retrieve_granule)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )



    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_mpl_graphs_transform_workflow(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Mpl_Graphs_Test_Workflow',description='Tests the workflow of converting stream data to Matplotlib graphs')

        #Add a transformation process definition
        mpl_graphs_procdef_id = self.create_mpl_graphs_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=mpl_graphs_procdef_id, persist_process_output_data=True)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the input data product
        ctd_stream_id, ctd_parsed_data_product_id = self.create_ctd_input_stream_and_data_product()
        data_product_stream_ids.append(ctd_stream_id)

        #Create and start the workflow
        workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id, timeout=20)

        workflow_output_ids,_ = self.rrclient.find_subjects(RT.Workflow, PRED.hasOutputProduct, workflow_product_id, True)
        assertions(len(workflow_output_ids) == 1 )

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True)
        assertions(len(workflow_dp_ids) == 1 )

        for dp_id in workflow_dp_ids:
            stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, None, True)
            assertions(len(stream_ids) == 1 )
            data_product_stream_ids.append(stream_ids[0])

        #Start the output stream listener to monitor and collect messages
        results = self.start_output_stream_and_listen(ctd_stream_id, data_product_stream_ids)

        #Stop the workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id, False)  # Should test true at some point

        #Validate the data from each of the messages along the way
        self.validate_mpl_graphs_transform_results(results)

        # Check to see if ingestion worked. Extract the granules from data_retrieval.
        # First find the dataset associated with the output dp product
        ds_ids,_ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasDataset, RT.DataSet, True)

        retrieve_granule = self.data_retriever.retrieve(ds_ids[0])

        #Validate the data from each of the messages along the way
        self.validate_mpl_graphs_transform_results(retrieve_granule)

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),'Not integrated for CEI')
    #@unittest.skip("Skipping for debugging ")
    def test_multiple_workflow_instances(self):

        assertions = self.assertTrue

        # Build the workflow definition
        workflow_def_obj = IonObject(RT.WorkflowDefinition, name='Multiple_Test_Workflow',description='Tests the workflow of converting stream data')

        #Add a transformation process definition
        google_dt_procdef_id = self.create_google_dt_data_process_definition()
        workflow_step_obj = IonObject('DataProcessWorkflowStep', data_process_definition_id=google_dt_procdef_id)
        workflow_def_obj.workflow_steps.append(workflow_step_obj)

        #Create it in the resource registry
        workflow_def_id = self.workflowclient.create_workflow_definition(workflow_def_obj)

        #The list of data product streams to monitor
        data_product_stream_ids = list()

        #Create the first input data product
        ctd_stream_id1, ctd_parsed_data_product_id1 = self.create_ctd_input_stream_and_data_product('ctd_parsed1')
        data_product_stream_ids.append(ctd_stream_id1)

        #Create and start the first workflow
        workflow_id1, workflow_product_id1 = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id1, timeout=20)

        #Create the second input data product
        ctd_stream_id2, ctd_parsed_data_product_id2 = self.create_ctd_input_stream_and_data_product('ctd_parsed2')
        data_product_stream_ids.append(ctd_stream_id2)

        #Create and start the first workflow
        workflow_id2, workflow_product_id2 = self.workflowclient.create_data_process_workflow(workflow_def_id, ctd_parsed_data_product_id2, timeout=20)

        #Walk the associations to find the appropriate output data streams to validate the messages
        workflow_ids,_ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 2 )


        #Start the first input stream process
        ctd_sim_pid1 = self.start_sinusoidal_input_stream_process(ctd_stream_id1)

        #Start the second input stream process
        ctd_sim_pid2 = self.start_simple_input_stream_process(ctd_stream_id2)

        #Start the output stream listener to monitor a set number of messages being sent through the workflows
        results = self.start_output_stream_and_listen(None, data_product_stream_ids, message_count_per_stream=5)

        # stop the flow of messages...
        self.process_dispatcher.cancel_process(ctd_sim_pid1) # kill the ctd simulator process - that is enough data
        self.process_dispatcher.cancel_process(ctd_sim_pid2)

        #Stop the first workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id1, False)  # Should test true at some point

        #Stop the second workflow processes
        self.workflowclient.terminate_data_process_workflow(workflow_id2, False)  # Should test true at some point

        workflow_ids,_ = self.rrclient.find_resources(restype=RT.Workflow)
        assertions(len(workflow_ids) == 0 )

        #Cleanup to make sure delete is correct.
        self.workflowclient.delete_workflow_definition(workflow_def_id)

        workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition)
        assertions(len(workflow_def_ids) == 0 )

        aid_list = self.rrclient.find_associations(workflow_def_id, PRED.hasDataProcessDefinition)
        assertions(len(aid_list) == 0 )
Exemplo n.º 12
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self):  # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.pids = []
        self.event = Event()
        self.exchange_space_name = 'test_granules'
        self.exchange_point_name = 'science_data'
        self.i = 0

        self.purge_queues()
        self.queue_buffer = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue(
            'science_granule_ingestion')
        xn.purge()

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------

    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name(
                'ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset(
            'test_dataset_%i' % self.i,
            parameter_dictionary_id=parameter_dict_id,
            spatial_domain=sdom,
            temporal_domain=tdom)
        return dataset_id

    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(
            datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service
        # which is configured through r2deploy.yml

        ingest_configs, _ = self.resource_registry.find_resources(
            restype=RT.IngestionConfiguration, id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''

        pid = self.container.spawn_process(
            'better_data_producer', 'ion.processes.data.example_data_producer',
            'BetterDataProducer', {'process': {
                'stream_id': stream_id
            }})

        self.pids.append(pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(
            'ctd data', parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(
            'ctd stream %i' % self.i,
            'xp1',
            stream_definition_id=stream_def_id)

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self, stream_id, stream_route, offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(
            stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self, stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id, route, i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingest_config_id,
            dataset_id=dataset_id)

    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id)

    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(
            msg, Granule, 'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(
                    dataset_id, 'time')[0]
                granule = self.data_retriever.retrieve_last_data_points(
                    dataset_id, 1)
                rdt = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context(
                        'time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    @attr('SMOKE')
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_definition = self.pubsub_management.create_stream_definition(
            'ctd data', parameter_dictionary_id=pdict_id)

        stream_id, route = self.pubsub_management.create_stream(
            'producer',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_definition)

        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=ingest_config_id,
            dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),
                        '%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi'] * 10,
                                                        dtype='object')).all())

        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream(
            'replay_out',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_definition)
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(
            self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5),
                        'The process never launched')
        replay_client.start_replay()

        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)

        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id,
                                               query={'tdoa': slice(0, 5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b, bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_def_id = self.pubsub_management.create_stream_definition(
            'replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream(
            'replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(
            dataset_id,
            bb.coverage)  # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(
            self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(
            dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()

        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))

        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))

        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()

    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition(
            'sal data', parameter_dictionary_id=salinity_pdict_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10, 20)

        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(
            dataset_id,
            None,
            None,
            'ion.processes.data.transforms.ctd.ctd_L2_salinity',
            'CTDL2SalinityTransformAlgorithm',
            kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i, 0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)

        self.wait_until_we_have_enough_granules(dataset_id,
                                                20)  # I just need two

        success = False

        def verifier():
            replay_granule = self.data_retriever.retrieve_last_data_points(
                dataset_id, 10)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt['time'] == np.arange(10) + 10
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verifier)

        self.assertTrue(success)

        success = False

        def verify_points():
            replay_granule = self.data_retriever.retrieve_last_data_points(
                dataset_id, 5)

            rdt = RecordDictionaryTool.load_from_granule(replay_granule)

            comp = rdt['time'] == np.arange(15, 20)
            if not isinstance(comp, bool):
                return comp.all()
            return False

        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(
            pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary', param_type=ArrayType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(
            self.dataset_management.create_parameter_context(
                'records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary(
            'replay_pdict',
            parameter_context_ids=context_ids,
            temporal_context='time')

        stream_def_id = self.pubsub_management.create_stream_definition(
            'replay_stream', parameter_dictionary_id=pdict_id)

        stream_id, route = self.pubsub_management.create_stream(
            'replay_with_params',
            exchange_point=self.exchange_point_name,
            stream_definition_id=stream_def_id)
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=config_id,
            dataset_id=dataset_id)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id)
        es.start()

        self.addCleanup(es.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_modified.wait(30))

        query = {
            'start_time': 0 - 2208988800,
            'end_time': 20 - 2208988800,
            'stride_time': 2,
            'parameters': ['time', 'temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,
                                                      query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0, 20, 2) == rdt['time']
        self.assertTrue(comp.all(), '%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time', 'temp']))

        extents = self.dataset_management.dataset_extents(
            dataset_id=dataset_id, parameters=['time', 'temp'])
        self.assertTrue(extents['time'] >= 20)
        self.assertTrue(extents['temp'] >= 20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id, route, 0)
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, 20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id,
            ingestion_configuration_id=config_id,
            dataset_id=dataset_id)
        self.publish_hifi(stream_id, route, 2)
        self.publish_hifi(stream_id, route, 3)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0, 40)
                if not isinstance(comp, bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e.
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now = unix_now + 2208988800

        unix_ago = unix_now - 20
        ntp_ago = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago, ntp_now))

        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)

        self.assertTrue(np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue(np.abs(temporal_bounds[1] - unix_now) < 2)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(
            dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *
                          2, temporal_bounds)

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())

    @unittest.skip('Outdated due to ingestion retry')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv(
        'CEI_LAUNCH_TEST', False
    ), 'Host requires file-system access to coverage files, CEI mode does not support.'
                     )
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset(
        )
        self.start_ingestion(stream_id, dataset_id)

        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)

        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)

        self.assertTrue(event.wait(10))

        sub.stop()
Exemplo n.º 13
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.pids                 = []
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       

        self.purge_queues()
        self.queue_buffer         = []

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion')
        xn.purge()
        

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

        

    def launch_producer(self, stream_id=''):
        #--------------------------------------------------------------------------------
        # Launch the producer
        #--------------------------------------------------------------------------------

        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})

        self.pids.append(pid)

    def get_ingestion_config(self):
        #--------------------------------------------------------------------------------
        # Grab the ingestion configuration from the resource registry
        #--------------------------------------------------------------------------------
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]


    def publish_hifi(self,stream_id,stream_route,offset=0):
        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):

        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)
        

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def validate_granule_subscription(self, msg, route, stream_id):
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def make_file_data(self):
        from interface.objects import File
        import uuid
        data = 'hello world\n'
        rand = str(uuid.uuid4())[:8]
        meta = File(name='/examples/' + rand + '.txt', group_id='example1')
        return {'body': data, 'meta':meta}

    def publish_file(self, stream_id, stream_route):
        publisher = StandaloneStreamPublisher(stream_id,stream_route)
        publisher.publish(self.make_file_data())
        
    def wait_until_we_have_enough_granules(self, dataset_id='',granules=4):
        datastore = self.get_datastore(dataset_id)
        dataset = self.dataset_management.read_dataset(dataset_id)
        
        with gevent.timeout.Timeout(40):
            success = False
            while not success:
                success = len(datastore.query_view(dataset.view_name)) >= granules
                gevent.sleep(0.1)

        log.info(datastore.query_view(dataset.view_name))




    def wait_until_we_have_enough_files(self):
        datastore = self.container.datastore_manager.get_datastore('filesystem', DataStore.DS_PROFILE.FILESYSTEM)

        now = time.time()
        timeout = now + 10
        done = False
        while not done:
            if now >= timeout:
                raise Timeout('Files are not populating in time.')
            if len(datastore.query_view('catalog/file_by_owner')) >= 1:
                done = True
            now = time.time()


    def create_dataset(self, parameter_dict_id=''):
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset('test_dataset', parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom)
        return dataset_id

    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()
        
        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))
        
        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()
        
        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))
    
        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()


    @attr('SMOKE') 
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)




        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,4)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)



    def test_retrieve_and_transform(self):

        # Stream definition for the CTD data
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id)


        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        #--------------------------------------------------------------------------------
        # Again with this ridiculous problem
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)
        self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 2)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)



    def test_last_granule(self):
        #--------------------------------------------------------------------------------
        # Create the necessary configurations for the test
        #--------------------------------------------------------------------------------
        pdict_id          = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id     = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        stream_id, route  = self.pubsub_management.create_stream('last_granule', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id         = self.get_ingestion_config()
        dataset_id        = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        #--------------------------------------------------------------------------------
        # Create the datastore first,
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,2) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)



    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)


        #--------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        #--------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id,4)

        query = {
            'start_time': 0,
            'end_time':   20,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0,20,2) == rdt['time']
        self.assertTrue(comp.all(),'%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)



    def test_repersist_data(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_def_id = self.pubsub_management.create_stream_definition(name='ctd', parameter_dictionary_id=pdict_id)
        stream_id, route = self.pubsub_management.create_stream(name='repersist', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)
        self.get_datastore(dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,2)
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,4)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
class TestActivateInstrumentIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubcli =  PubsubManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()
        
        #setup listerner vars
        self._data_greenlets = []
        self._no_samples = None
        self._samples_received = []


    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.processdispatchclient.schedule_process(process_definition_id=logger_procdef_id,
                                                          configuration=configuration)

        return pid

    def get_datastore(self, dataset_id):
        dataset = self.datasetclient.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    #@unittest.skip("TBD")
    def test_activateInstrumentSample(self):

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='SBE37IMModel',
                                  description="SBE37IMModel",
                                  stream_configuration= {'raw': 'ctd_raw_param_dict' , 'parsed': 'ctd_parsed_param_dict' })
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        print  'new InstrumentModel id = %s ' % instModel_id

        # Create InstrumentAgent
        instAgent_obj = IonObject(RT.InstrumentAgent,
                                  name='agent007',
                                  description="SBE37IMAgent",
                                  driver_module="mi.instrument.seabird.sbe37smb.ooicore.driver",
                                  driver_class="SBE37Driver" )
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        print  'new InstrumentAgent id = %s' % instAgent_id

        self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id)

        # Create InstrumentDevice
        print 'test_activateInstrumentSample: Create instrument resource to represent the SBE37 ' +\
                 '(SA Req: L4-CI-SA-RQ-241) '
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='SBE37IMDevice',
                                   description="SBE37IMDevice",
                                   serial_number="12345" )
        instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id)

        print "test_activateInstrumentSample: new InstrumentDevice id = %s    (SA Req: L4-CI-SA-RQ-241) " %\
                  instDevice_id

        port_agent_config = {
            'device_addr': 'sbe37-simulator.oceanobservatories.org',
            'device_port': 4001,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'command_port': 4003,
            'data_port': 4000,
            'log_level': 5,
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance',
                                          description="SBE37IMAgentInstance",
                                          port_agent_config = port_agent_config)


        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj,
                                                                               instAgent_id,
                                                                               instDevice_id)

        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()


        parsed_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        parsed_stream_def_id = self.pubsubcli.create_stream_definition(name='parsed', parameter_dictionary_id=parsed_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_raw_param_dict', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(name='raw', parameter_dictionary_id=raw_pdict_id)


        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
            name='the parsed data',
            description='ctd stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=parsed_stream_def_id)
        print  'new dp_id = %s' % data_product_id1

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id1)



        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        print  'Data product streams1 = %s' % stream_ids

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasDataset, RT.Dataset, True)
        print  'Data set for data_product_id1 = %s' % dataset_ids[0]
        self.parsed_dataset = dataset_ids[0]
        #create the datastore at the beginning of each int test that persists data
        self.get_datastore(self.parsed_dataset)

        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id1)

        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)


        dp_obj = IonObject(RT.DataProduct,
            name='the raw data',
            description='raw stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id2 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        print  'new dp_id = %s' % str(data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasStream, None, True)
        print  'Data product streams2 = %s' % str(stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasDataset, RT.Dataset, True)
        print  'Data set for data_product_id2 = %s' % dataset_ids[0]
        self.raw_dataset = dataset_ids[0]

        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])

        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
                        instrument_agent_instance_id=instAgentInstance_id)

        #wait for start
        instance_obj = self.imsclient.read_instrument_agent_instance(instAgentInstance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
                                instance_obj.agent_process_id,
                                ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(30), "The instrument agent instance (%s) did not spawn in 30 seconds" %
                                        instance_obj.agent_process_id)

        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(instAgentInstance_id)
        print  'Instrument agent instance obj: = %s' % str(inst_agent_instance_obj)

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(instDevice_id,
                                              to_name=inst_agent_instance_obj.agent_process_id,
                                              process=FakeProcess())

        print "test_activateInstrumentSample: got ia client %s" % str(self._ia_client)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        print "test_activateInstrumentSample: initialize %s" % str(retval)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        print "(L4-CI-SA-RQ-334): Sending go_active command "
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        print "test_activateInstrument: return value from go_active %s" % str(reply)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        print "(L4-CI-SA-RQ-334): current state after sending go_active command %s" % str(state)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        print "test_activateInstrumentSample: run %s" % str(reply)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STOPPED)

        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)
        retval = self._ia_client.execute_resource(cmd)
        print "test_activateInstrumentSample: return from sample %s" % str(retval)
        retval = self._ia_client.execute_resource(cmd)
        print "test_activateInstrumentSample: return from sample %s" % str(retval)
        retval = self._ia_client.execute_resource(cmd)
        print "test_activateInstrumentSample: return from sample %s" % str(retval)

        print "test_activateInstrumentSample: calling reset "
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        print "test_activateInstrumentSample: return from reset %s" % str(reply)

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.dataretrieverclient.retrieve(self.parsed_dataset)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        log.debug("RDT parsed: %s", str(rdt.pretty_print()) )
        temp_vals = rdt['temp']
        self.assertTrue(len(temp_vals) == 3)


        replay_data = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        log.debug("RDT raw: %s", str(rdt.pretty_print()) )

        raw_vals = rdt['raw']
        self.assertTrue(len(raw_vals) == 3)


        print "l4-ci-sa-rq-138"
        """
        Physical resource control shall be subject to policy

        Instrument management control capabilities shall be subject to policy

        The actor accessing the control capabilities must be authorized to send commands.

        note from maurice 2012-05-18: Talk to tim M to verify that this is policy.  If it is then talk with Stephen to
                                      get an example of a policy test and use that to create a test stub that will be
                                      completed when we have instrument policies.

        Tim M: The "actor", aka observatory operator, will access the instrument through ION.

        """

#        #--------------------------------------------------------------------------------
#        # Get the extended data product to see if it contains the granules
#        #--------------------------------------------------------------------------------
#        extended_product = self.dpclient.get_data_product_extension(data_product_id1)
#        self.assertEqual(data_product_id1, extended_product._id)
#        log.debug( "test_activateInstrumentSample: extended_product.computed.last_granule.value %s", str(extended_product.computed.last_granule.value) )
#        log.debug( "test_activateInstrumentSample: extended_product.computed.recent_granules.value %s", str(extended_product.computed.recent_granules.value) )
#        log.debug("test_activateInstrumentSample: extended_product.computed.provenance_product_list.value %s", str(extended_product.computed.provenance_product_list.value) )



        #-------------------------------
        # Deactivate loggers
        #-------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
class TestActivateRSNVel3DInstrument(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        super(TestActivateRSNVel3DInstrument, self).setUp()
        config = DotDict()

        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml', config)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubcli =  PubsubManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()


    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.processdispatchclient.schedule_process(process_definition_id=logger_procdef_id,
                                                            configuration=configuration)
        return pid




    @attr('LOCOINT')
    @unittest.skip('under construction')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 180}}})
    def test_activate_rsn_vel3d(self):


        log.info("--------------------------------------------------------------------------------------------------------")
        # load_parameter_scenarios
        self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=dict(
            op="load",
            scenario="BETA",
            path="master",
            categories="ParameterFunctions,ParameterDefs,ParameterDictionary,StreamDefinition",
            clearcols="owner_id,org_ids",
            assets="res/preload/r2_ioc/ooi_assets",
            parseooi="True",
        ))

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='Vel3DMModel',
                                  description="Vel3DMModel")
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        log.debug( 'test_activate_rsn_vel3d new InstrumentModel id = %s ', instModel_id)


        raw_config = StreamConfiguration(stream_name='raw', parameter_dictionary_name='raw' )
        vel3d_b_sample = StreamConfiguration(stream_name='vel3d_b_sample', parameter_dictionary_name='vel3d_b_sample')
        vel3d_b_engineering = StreamConfiguration(stream_name='vel3d_b_engineering', parameter_dictionary_name='vel3d_b_engineering')

        RSN_VEL3D_01 = {
                           'DEV_ADDR'  : "10.180.80.6",
                           'DEV_PORT'  : 2101,
                           'DATA_PORT' : 1026,
                           'CMD_PORT'  : 1025,
                           'PA_BINARY' : "port_agent"
                       }

        # Create InstrumentAgent
        instAgent_obj = IonObject(RT.InstrumentAgent,
                                  name='Vel3DAgent',
                                  description="Vel3DAgent",
                                  driver_uri="http://sddevrepo.oceanobservatories.org/releases/nobska_mavs4_ooicore-0.0.7-py2.7.egg",
                                  stream_configurations = [raw_config, vel3d_b_sample, vel3d_b_engineering])
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        log.debug('test_activate_rsn_vel3d new InstrumentAgent id = %s', instAgent_id)

        self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug('test_activate_rsn_vel3d: Create instrument resource to represent the Vel3D ')
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='Vel3DDevice',
                                   description="Vel3DDevice",
                                   serial_number="12345" )
        instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id)
        log.debug("test_activate_rsn_vel3d: new InstrumentDevice id = %s  " , instDevice_id)


        port_agent_config = {
            'device_addr':  '10.180.80.6',
            'device_port':  2101,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'port_agent_addr': 'localhost',
            'command_port': 1025,
            'data_port': 1026,
            'log_level': 5,
            'type': PortAgentType.ETHERNET
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='Vel3DAgentInstance',
                                          description="Vel3DAgentInstance",
                                          port_agent_config = port_agent_config,
                                            alerts= [])


        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj,
                                                                               instAgent_id,
                                                                               instDevice_id)


        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()


        parsed_sample_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('vel3d_b_sample', id_only=True)
        parsed_sample_stream_def_id = self.pubsubcli.create_stream_definition(name='vel3d_b_sample', parameter_dictionary_id=parsed_sample_pdict_id)

        parsed_eng_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('vel3d_b_engineering', id_only=True)
        parsed_eng_stream_def_id = self.pubsubcli.create_stream_definition(name='vel3d_b_engineering', parameter_dictionary_id=parsed_eng_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('raw', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(name='raw', parameter_dictionary_id=raw_pdict_id)


        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
            name='vel3d_b_sample',
            description='vel3d_b_sample',
            temporal_domain = tdom,
            spatial_domain = sdom)

        sample_data_product_id = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=parsed_sample_stream_def_id)
        log.debug( 'new dp_id = %s' , sample_data_product_id)
        self.dpclient.activate_data_product_persistence(data_product_id=sample_data_product_id)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=sample_data_product_id)



        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(sample_data_product_id, PRED.hasStream, None, True)
        log.debug('sample_data_product streams1 = %s', stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(sample_data_product_id, PRED.hasDataset, RT.Dataset, True)
        log.debug('Data set for sample_data_product = %s' , dataset_ids[0])
        self.parsed_dataset = dataset_ids[0]

        pid = self.create_logger('vel3d_b_sample', stream_ids[0] )
        self.loggerpids.append(pid)


        dp_obj = IonObject(RT.DataProduct,
            name='vel3d_b_engineering',
            description='vel3d_b_engineering',
            temporal_domain = tdom,
            spatial_domain = sdom)

        eng_data_product_id = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=parsed_eng_stream_def_id)
        log.debug( 'new dp_id = %s' , eng_data_product_id)
        self.dpclient.activate_data_product_persistence(data_product_id=eng_data_product_id)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=eng_data_product_id)



        dp_obj = IonObject(RT.DataProduct,
            name='the raw data',
            description='raw stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id2 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        log.debug('new dp_id = %s', data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasStream, None, True)
        log.debug('test_activate_rsn_vel3d Data product streams2 = %s' , str(stream_ids))

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasDataset, RT.Dataset, True)
        log.debug('test_activate_rsn_vel3d Data set for data_product_id2 = %s' , dataset_ids[0])
        self.raw_dataset = dataset_ids[0]


        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])


        #cleanup
        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
            instrument_agent_instance_id=instAgentInstance_id)


        #wait for start
        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(instAgentInstance_id)
        gate = AgentProcessStateGate(self.processdispatchclient.read_process,
            instDevice_id,
            ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(30), "The instrument agent instance (%s) did not spawn in 30 seconds" %
                                        gate.process_id)

        #log.trace('Instrument agent instance obj: = %s' , str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(instDevice_id,
            to_name=gate.process_id,
            process=FakeProcess())


        def check_state(label, desired_state):
            actual_state = self._ia_client.get_agent_state()
            log.debug("%s instrument agent is in state '%s'", label, actual_state)
            self.assertEqual(desired_state, actual_state)

        log.debug("test_activate_rsn_vel3d: got ia client %s" , str(self._ia_client))

        check_state("just-spawned", ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: initialize %s" , str(retval))
        check_state("initialized", ResourceAgentState.INACTIVE)

        log.debug("test_activate_rsn_vel3d Sending go_active command ")
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: return value from go_active %s" , str(reply))
        check_state("activated", ResourceAgentState.IDLE)


        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug("current state after sending go_active command %s" , str(state))
#
        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: run %s" , str(reply))
        check_state("commanded", ResourceAgentState.COMMAND)



        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug("current state after sending run command %s" , str(state))


#        cmd = AgentCommand(command=ProtocolEvent.START_AUTOSAMPLE)
#        reply = self._ia_client.execute_agent(cmd)
#        log.debug("test_activate_rsn_vel3d: run %s" , str(reply))
#        state = self._ia_client.get_agent_state()
#        self.assertEqual(ResourceAgentState.COMMAND, state)
#
#        gevent.sleep(5)
#
#        cmd = AgentCommand(command=ProtocolEvent.STOP_AUTOSAMPLE)
#        reply = self._ia_client.execute_agent(cmd)
#        log.debug("test_activate_rsn_vel3d: run %s" , str(reply))
#        state = self._ia_client.get_agent_state()
#        self.assertEqual(ResourceAgentState.COMMAND, state)
#
#        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
#        retval = self._ia_client.execute_agent(cmd)
#        state = retval.result
#        log.debug("current state after sending STOP_AUTOSAMPLE command %s" , str(state))

#
#        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
#        retval = self._ia_client.execute_agent(cmd)
#        state = self._ia_client.get_agent_state()
#        self.assertEqual(ResourceAgentState.STOPPED, state)
#
#        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
#        retval = self._ia_client.execute_agent(cmd)
#        state = self._ia_client.get_agent_state()
#        self.assertEqual(ResourceAgentState.COMMAND, state)
#
#        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
#        retval = self._ia_client.execute_agent(cmd)
#        state = self._ia_client.get_agent_state()
#        self.assertEqual(ResourceAgentState.IDLE, state)
#
#        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
#        retval = self._ia_client.execute_agent(cmd)
#        state = self._ia_client.get_agent_state()
#        self.assertEqual(ResourceAgentState.COMMAND, state)

        log.debug( "test_activate_rsn_vel3d: calling reset ")
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: return from reset %s" , str(reply))


        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data_raw = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data_raw, Granule)
        rdt_raw = RecordDictionaryTool.load_from_granule(replay_data_raw)
        log.debug("RDT raw: %s", str(rdt_raw.pretty_print()) )

        self.assertIn('raw', rdt_raw)
        raw_vals = rdt_raw['raw']



        #--------------------------------------------------------------------------------
        # Deactivate loggers
        #--------------------------------------------------------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        self.dpclient.delete_data_product(sample_data_product_id)
        self.dpclient.delete_data_product(eng_data_product_id)
        self.dpclient.delete_data_product(data_product_id2)
Exemplo n.º 16
0
class ExhaustiveParameterTest(IonIntegrationTestCase):
    def setUp(self):
        self.i = 0
        self._start_container()
        self.container.start_rel_from_url('res/deploy/r2params.yml')

        self.dataset_management = DatasetManagementServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.data_product_management = DataProductManagementServiceClient()
        self.resource_registry = self.container.resource_registry
        self.data_retriever = DataRetrieverServiceClient()

        pdicts, _ = self.resource_registry.find_resources(
            restype='ParameterDictionary', id_only=False)
        self.dp_ids = []
        for pdict in pdicts:
            stream_def_id = self.pubsub_management.create_stream_definition(
                pdict.name, parameter_dictionary_id=pdict._id)
            dp_id = self.make_dp(stream_def_id)
            if dp_id: self.dp_ids.append(dp_id)

    def make_dp(self, stream_def_id):
        tdom, sdom = time_series_domain()
        tdom = tdom.dump()
        sdom = sdom.dump()
        stream_def = self.resource_registry.read(stream_def_id)
        dp_obj = DataProduct(name=stream_def.name,
                             description=stream_def.name,
                             processing_level_code='Parsed_Canonical',
                             temporal_domain=tdom,
                             spatial_domain=sdom)

        data_product_id = self.data_product_management.create_data_product(
            dp_obj, stream_definition_id=stream_def_id)
        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        return data_product_id

    def fill_values(self, ptype, size):
        if isinstance(ptype, ArrayType):
            return ['blah'] * size
        elif isinstance(ptype, QuantityType):
            return np.sin(
                np.arange(size, dtype=ptype.value_encoding) * 2 * np.pi / 3)
        elif isinstance(ptype, RecordType):
            return [{'record': 'ok'}] * size
        elif isinstance(ptype, ConstantRangeType):
            return (1, 1000)
        elif isinstance(ptype, ConstantType):
            return np.dtype(ptype.value_encoding).type(1)
        elif isinstance(ptype, CategoryType):
            return ptype.categories.keys()[0]
        else:
            return

    def wait_until_we_have_enough_granules(self, dataset_id='', data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                granule = self.data_retriever.retrieve_last_data_points(
                    dataset_id, 1)
                rdt = RecordDictionaryTool.load_from_granule(granule)
                extents = self.dataset_management.dataset_extents(
                    dataset_id, rdt._pdict.temporal_parameter_name)[0]
                if rdt[rdt._pdict.temporal_parameter_name] and rdt[
                        rdt._pdict.
                        temporal_parameter_name][0] != rdt._pdict.get_context(
                            rdt._pdict.temporal_parameter_name
                        ).fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)

    def write_to_data_product(self, data_product_id):

        dataset_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                             'hasDataset',
                                                             id_only=True)
        dataset_id = dataset_ids.pop()

        stream_ids, _ = self.resource_registry.find_objects(data_product_id,
                                                            'hasStream',
                                                            id_only=True)
        stream_id = stream_ids.pop()
        stream_def_ids, _ = self.resource_registry.find_objects(
            stream_id, 'hasStreamDefinition', id_only=True)
        stream_def_id = stream_def_ids.pop()

        route = self.pubsub_management.read_stream_route(stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        time_param = rdt._pdict.temporal_parameter_name
        if time_param is None:
            print '%s has no temporal parameter' % self.resource_registry.read(
                data_product_id).name
            return
        rdt[time_param] = np.arange(40)

        for field in rdt.fields:
            if field == rdt._pdict.temporal_parameter_name:
                continue
            rdt[field] = self.fill_values(
                rdt._pdict.get_context(field).param_type, 40)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(rdt.to_granule())

        self.wait_until_we_have_enough_granules(dataset_id, 40)

        granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(granule)

        bad = []

        for field in rdt.fields:
            if not np.array_equal(rdt[field], rdt_out[field]):
                print '%s' % field
                print '%s != %s' % (rdt[field], rdt_out[field])
                bad.append(field)

        return bad

    def test_data_products(self):
        bad_data_products = {}
        for dp_id in self.dp_ids:
            try:
                bad_fields = self.write_to_data_product(dp_id)
                if bad_fields:
                    bad_data_products[
                        dp_id] = "Couldn't write and retrieve %s." % bad_fields
            except:
                import traceback
                bad_data_products[dp_id] = traceback.format_exc()

        for dp_id, tb in bad_data_products.iteritems():
            print '----------'
            print 'Problem with %s' % self.resource_registry.read(dp_id).name
            print tb
            print '----------'

        if bad_data_products:
            raise AssertionError('There are bad parameter dictionaries.')
Exemplo n.º 17
0
class VisualizationIntegrationTestHelper(IonIntegrationTestCase):

    def create_ctd_input_stream_and_data_product(self, data_product_name='ctd_parsed'):

        cc = self.container
        assertions = self.assertTrue

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubclient =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.workflowclient = WorkflowManagementServiceClient(node=self.container.node)
        self.process_dispatcher = ProcessDispatcherServiceClient(node=self.container.node)
        self.vis_client = VisualizationServiceClient(node=self.container.node)


        #-------------------------------
        # Create CTD Parsed as the initial data product
        #-------------------------------
        # create a stream definition for the data from the ctd simulator
        ctd_stream_def = SBE37_CDM_stream_definition()
        ctd_stream_def_id = self.pubsubclient.create_stream_definition(container=ctd_stream_def, name='Simulated CTD data')


        log.debug('Creating new CDM data product with a stream definition')

        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        parameter_dictionary = craft.create_parameters()
        parameter_dictionary = parameter_dictionary.dump()

        dp_obj = IonObject(RT.DataProduct,
            name=data_product_name,
            description='ctd stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        ctd_parsed_data_product_id = self.dataproductclient.create_data_product(dp_obj, ctd_stream_def_id, parameter_dictionary)

        log.debug('new ctd_parsed_data_product_id = %s' % ctd_parsed_data_product_id)

        #Only ever need one device for testing purposes.
        instDevice_obj,_ = self.rrclient.find_resources(restype=RT.InstrumentDevice, name='SBE37IMDevice')
        if instDevice_obj:
            instDevice_id = instDevice_obj[0]._id
        else:
            instDevice_obj = IonObject(RT.InstrumentDevice, name='SBE37IMDevice', description="SBE37IMDevice", serial_number="12345" )
            instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=ctd_parsed_data_product_id)

        self.dataproductclient.activate_data_product_persistence(data_product_id=ctd_parsed_data_product_id)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(ctd_parsed_data_product_id, PRED.hasStream, None, True)
        assertions(len(stream_ids) > 0 )
        ctd_stream_id = stream_ids[0]

        return ctd_stream_id, ctd_parsed_data_product_id

    def create_data_product(self, dp_name = "", dp_description = ""):

        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        parameter_dictionary = craft.create_parameters()   # this creates a ParameterDictionary object
        parameter_dictionary = parameter_dictionary.dump()  # this returns a python dictionary

        data_prod_obj = IonObject(RT.DataProduct,
            name=dp_name,
            description=dp_description,
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_prod_id = self.create_data_product(data_prod_obj, stream_definition_id, parameter_dictionary)

        return data_prod_id, data_prod_obj

    def start_simple_input_stream_process(self, ctd_stream_id):
        return self.start_input_stream_process(ctd_stream_id)

    def start_sinusoidal_input_stream_process(self, ctd_stream_id):
        return self.start_input_stream_process(ctd_stream_id, 'ion.processes.data.sinusoidal_stream_publisher', 'SinusoidalCtdPublisher')

    def start_input_stream_process(self, ctd_stream_id, module = 'ion.processes.data.ctd_stream_publisher', class_name= 'SimpleCtdPublisher'):


        ###
        ### Start the process for producing the CTD data
        ###
        # process definition for the ctd simulator...
        producer_definition = ProcessDefinition()
        producer_definition.executable = {
            'module':module,
            'class':class_name
        }

        ctd_sim_procdef_id = self.process_dispatcher.create_process_definition(process_definition=producer_definition)

        # Start the ctd simulator to produce some data
        configuration = {
            'process':{
                'stream_id':ctd_stream_id,
                }
        }
        ctd_sim_pid = self.process_dispatcher.schedule_process(process_definition_id=ctd_sim_procdef_id, configuration=configuration)

        return ctd_sim_pid

    def start_output_stream_and_listen(self, ctd_stream_id, data_product_stream_ids, message_count_per_stream=10):

        cc = self.container
        assertions = self.assertTrue

        ###
        ### Make a subscriber in the test to listen for transformed data
        ###
        salinity_subscription_id = self.pubsubclient.create_subscription(
            query=StreamQuery(data_product_stream_ids),
            exchange_name = 'workflow_test',
            exchange_point = 'science_data',
            name = "test workflow transformations",
        )

        pid = cc.spawn_process(name='dummy_process_for_test',
            module='pyon.ion.process',
            cls='SimpleProcess',
            config={})
        dummy_process = cc.proc_manager.procs[pid]

        subscriber_registrar = StreamSubscriberRegistrar(process=dummy_process, container=cc)

        result = gevent.event.AsyncResult()
        results = []
        message_count = len(data_product_stream_ids) * message_count_per_stream

        def message_received(message, headers):
            # Heads
            results.append(message)
            if len(results) >= message_count:   #Only wait for so many messages - per stream
                result.set(True)

        subscriber = subscriber_registrar.create_subscriber(exchange_name='workflow_test', callback=message_received)
        subscriber.start()

        # after the queue has been created it is safe to activate the subscription
        self.pubsubclient.activate_subscription(subscription_id=salinity_subscription_id)


        #Start the input stream process
        if ctd_stream_id is not None:
            ctd_sim_pid = self.start_simple_input_stream_process(ctd_stream_id)

        # Assert that we have received data
        assertions(result.get(timeout=30))

        # stop the flow parse the messages...
        if ctd_stream_id is not None:
            self.process_dispatcher.cancel_process(ctd_sim_pid) # kill the ctd simulator process - that is enough data

        self.pubsubclient.deactivate_subscription(subscription_id=salinity_subscription_id)

        subscriber.stop()

        return results


    def validate_messages(self, results):

        cc = self.container
        assertions = self.assertTrue

        first_salinity_values = None

        for message in results:
            rdt = RecordDictionaryTool.load_from_granule(message)

            try:
                temp = get_safe(rdt, 'temp')
            #                psd = PointSupplementStreamParser(stream_definition=self.ctd_stream_def, stream_granule=message)
            #                temp = psd.get_values('temperature')
            #                log.info(psd.list_field_names())
            except KeyError as ke:
                temp = None

            if temp is not None:
                assertions(isinstance(temp, numpy.ndarray))

                log.info( 'temperature=' + str(numpy.nanmin(temp)))

                first_salinity_values = None

            else:
                #psd = PointSupplementStreamParser(stream_definition=SalinityTransform.outgoing_stream_def, stream_granule=message)
                #log.info( psd.list_field_names())

                # Test the handy info method for the names of fields in the stream def
                #assertions('salinity' in psd.list_field_names())

                # you have to know the name of the coverage in stream def
                salinity = get_safe(rdt, 'salinity')
                #salinity = psd.get_values('salinity')
                log.info( 'salinity=' + str(numpy.nanmin(salinity)))

                # Check to see if salinity has values
                assertions(salinity != None)

                assertions(isinstance(salinity, numpy.ndarray))
                assertions(numpy.nanmin(salinity) > 0.0) # salinity should always be greater than 0

                if first_salinity_values is None:
                    first_salinity_values = salinity.tolist()
                else:
                    second_salinity_values = salinity.tolist()
                    assertions(len(first_salinity_values) == len(second_salinity_values))
                    for idx in range(0,len(first_salinity_values)):
                        assertions(first_salinity_values[idx]*2.0 == second_salinity_values[idx])


    def validate_data_ingest_retrieve(self, dataset_id):

        assertions = self.assertTrue
        self.data_retriever = DataRetrieverServiceClient(node=self.container.node)

        #validate that data was ingested
        replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(replay_granule)
        salinity = get_safe(rdt, 'salinity')
        assertions(salinity != None)

        #retrieve all the granules from the database and check the values
        replay_granule_all = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(replay_granule_all)
        for k, v in rdt.iteritems():
            if k == 'salinity':
                for val in numpy.nditer(v):
                    assertions(val > 0)

    def create_salinity_data_process_definition(self):

        # Salinity: Data Process Definition

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='ctd_salinity')
        if len(dpd) > 0:
            return dpd[0]

        log.debug("Create data process definition SalinityTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='ctd_salinity',
            description='create a salinity data product',
            module='ion.processes.data.transforms.ctd.ctd_L2_salinity',
            class_name='SalinityTransform',
            process_source='SalinityTransform source code here...')
        try:
            ctd_L2_salinity_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Excpetion as ex:
            self.fail("failed to create new SalinityTransform data process definition: %s" %ex)

        # create a stream definition for the data from the salinity Transform
        sal_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityTransform.outgoing_stream_def,  name='Salinity')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(sal_stream_def_id, ctd_L2_salinity_dprocdef_id )

        return ctd_L2_salinity_dprocdef_id

    def create_salinity_doubler_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='salinity_doubler')
        if len(dpd) > 0:
            return dpd[0]

        # Salinity Doubler: Data Process Definition
        log.debug("Create data process definition SalinityDoublerTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='salinity_doubler',
            description='create a salinity doubler data product',
            module='ion.processes.data.transforms.example_double_salinity',
            class_name='SalinityDoubler',
            process_source='SalinityDoubler source code here...')
        try:
            salinity_doubler_dprocdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new SalinityDoubler data process definition: %s" %ex)


        # create a stream definition for the data from the salinity Transform
        salinity_double_stream_def_id = self.pubsubclient.create_stream_definition(container=SalinityDoubler.outgoing_stream_def,  name='SalinityDoubler')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(salinity_double_stream_def_id, salinity_doubler_dprocdef_id )

        return salinity_doubler_dprocdef_id


    def create_transform_process(self, data_process_definition_id, data_process_input_dp_id):

        data_process_definition = self.rrclient.read(data_process_definition_id)

        # Find the link between the output Stream Definition resource and the Data Process Definition resource
        stream_ids,_ = self.rrclient.find_objects(data_process_definition._id, PRED.hasStreamDefinition, RT.StreamDefinition,  id_only=True)
        if not stream_ids:
            raise Inconsistent("The data process definition %s is missing an association to an output stream definition" % data_process_definition._id )
        process_output_stream_def_id = stream_ids[0]

        #Concatenate the name of the workflow and data process definition for the name of the data product output
        data_process_name = data_process_definition.name

        # Create the output data product of the transform
        transform_dp_obj = IonObject(RT.DataProduct, name=data_process_name,description=data_process_definition.description)
        transform_dp_id = self.dataproductclient.create_data_product(transform_dp_obj, process_output_stream_def_id)
        self.dataproductclient.activate_data_product_persistence(data_product_id=transform_dp_id)

        #last one out of the for loop is the output product id
        output_data_product_id = transform_dp_id

        # Create the  transform data process
        log.debug("create data_process and start it")
        data_process_id = self.dataprocessclient.create_data_process(data_process_definition._id, [data_process_input_dp_id], {'output':transform_dp_id})
        self.dataprocessclient.activate_data_process(data_process_id)


        #Find the id of the output data stream
        stream_ids, _ = self.rrclient.find_objects(transform_dp_id, PRED.hasStream, None, True)
        if not stream_ids:
            raise Inconsistent("The data process %s is missing an association to an output stream" % data_process_id )

        return data_process_id, output_data_product_id



    def create_google_dt_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='google_dt_transform')
        if len(dpd) > 0:
            return dpd[0]

        # Data Process Definition
        log.debug("Create data process definition GoogleDtTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='google_dt_transform',
            description='Convert data streams to Google DataTables',
            module='ion.processes.data.transforms.viz.google_dt',
            class_name='VizTransformGoogleDT',
            process_source='VizTransformGoogleDT source code here...')
        try:
            procdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new VizTransformGoogleDT data process definition: %s" %ex)


        # create a stream definition for the data from the
        stream_def_id = self.pubsubclient.create_stream_definition(container=VizTransformGoogleDT.outgoing_stream_def,  name='VizTransformGoogleDT')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id )

        return procdef_id


    def validate_google_dt_transform_results(self, results):

        cc = self.container
        assertions = self.assertTrue

        # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases
        if isinstance(results,Granule):
            results =[results]

        for g in results:

            if isinstance(g,Granule):

                tx = TaxyTool.load_from_granule(g)
                rdt = RecordDictionaryTool.load_from_granule(g)

                gdt_data = get_safe(rdt, 'google_dt_components')

                # IF this granule does not contains google dt, skip
                if gdt_data == None:
                    continue

                gdt = gdt_data[0]

                assertions(gdt['viz_product_type'] == 'google_dt' )
                assertions(len(gdt['data_description']) >= 0) # Need to come up with a better check
                assertions(len(gdt['data_content']) >= 0)




    def create_mpl_graphs_data_process_definition(self):

        #First look to see if it exists and if not, then create it
        dpd,_ = self.rrclient.find_resources(restype=RT.DataProcessDefinition, name='mpl_graphs_transform')
        if len(dpd) > 0:
            return dpd[0]

        #Data Process Definition
        log.debug("Create data process definition MatplotlibGraphsTransform")
        dpd_obj = IonObject(RT.DataProcessDefinition,
            name='mpl_graphs_transform',
            description='Convert data streams to Matplotlib graphs',
            module='ion.processes.data.transforms.viz.matplotlib_graphs',
            class_name='VizTransformMatplotlibGraphs',
            process_source='VizTransformMatplotlibGraphs source code here...')
        try:
            procdef_id = self.dataprocessclient.create_data_process_definition(dpd_obj)
        except Exception as ex:
            self.fail("failed to create new VizTransformMatplotlibGraphs data process definition: %s" %ex)


        # create a stream definition for the data
        stream_def_id = self.pubsubclient.create_stream_definition(container=VizTransformMatplotlibGraphs.outgoing_stream_def,  name='VizTransformMatplotlibGraphs')
        self.dataprocessclient.assign_stream_definition_to_data_process_definition(stream_def_id, procdef_id )

        return procdef_id

    def validate_mpl_graphs_transform_results(self, results):

        cc = self.container
        assertions = self.assertTrue

        # if its just one granule, wrap it up in a list so we can use the following for loop for a couple of cases
        if isinstance(results,Granule):
            results =[results]

        for g in results:
            if isinstance(g,Granule):

                tx = TaxyTool.load_from_granule(g)
                rdt = RecordDictionaryTool.load_from_granule(g)

                graphs = get_safe(rdt, 'matplotlib_graphs')

                if graphs == None:
                    continue

                for graph in graphs[0]:

                    # At this point only dictionaries containing image data should be passed
                    # For some reason non dictionary values are filtering through.
                    if not isinstance(graph, dict):
                        continue

                    assertions(graph['viz_product_type'] == 'matplotlib_graphs' )
                    # check to see if the list (numpy array) contains actual images
                    assertions(imghdr.what(graph['image_name'], h = graph['image_obj']) == 'png')



    def validate_vis_service_google_dt_results(self, results):


        assertions = self.assertTrue

        assertions(results)
        gdt_str = (results.lstrip("google.visualization.Query.setResponse(")).rstrip(")")

        assertions(len(gdt_str) > 0)

        return

    def validate_vis_service_mpl_graphs_results(self, results):

        assertions = self.assertTrue
        assertions(results)

        # check to see if the object passed is a dictionary with a valid image object in it
        image_format = results["content_type"].lstrip("image/")

        assertions(imghdr.what(results['image_name'], h = base64.decodestring(results['image_obj'])) == image_format)

        return
class TestActivateInstrumentIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        super(TestActivateInstrumentIntegration, self).setUp()
        config = DotDict()
        config.bootstrap.use_es = True

        self._start_container()
        self.addCleanup(TestActivateInstrumentIntegration.es_cleanup)

        self.container.start_rel_from_url('res/deploy/r2deploy.yml', config)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubcli =  PubsubManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()
        self.usernotificationclient = UserNotificationServiceClient()

        #setup listerner vars
        self._data_greenlets = []
        self._no_samples = None
        self._samples_received = []

        self.event_publisher = EventPublisher()

    @staticmethod
    def es_cleanup():
        es_host = CFG.get_safe('server.elasticsearch.host', 'localhost')
        es_port = CFG.get_safe('server.elasticsearch.port', '9200')
        es = ep.ElasticSearch(
            host=es_host,
            port=es_port,
            timeout=10
        )
        indexes = STD_INDEXES.keys()
        indexes.append('%s_resources_index' % get_sys_name().lower())
        indexes.append('%s_events_index' % get_sys_name().lower())

        for index in indexes:
            IndexManagementService._es_call(es.river_couchdb_delete,index)
            IndexManagementService._es_call(es.index_delete,index)

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.processdispatchclient.schedule_process(process_definition_id=logger_procdef_id,
                                                            configuration=configuration)

        return pid

    def _create_notification(self, user_name = '', instrument_id='', product_id=''):
        #--------------------------------------------------------------------------------------
        # Make notification request objects
        #--------------------------------------------------------------------------------------

        notification_request_1 = NotificationRequest(   name= 'notification_1',
            origin=instrument_id,
            origin_type="instrument",
            event_type='ResourceLifecycleEvent')

        notification_request_2 = NotificationRequest(   name='notification_2',
            origin=product_id,
            origin_type="data product",
            event_type='DetectionEvent')

        #--------------------------------------------------------------------------------------
        # Create a user and get the user_id
        #--------------------------------------------------------------------------------------

        user = UserInfo()
        user.name = user_name
        user.contact.email = '*****@*****.**' % user_name

        user_id, _ = self.rrclient.create(user)

        #--------------------------------------------------------------------------------------
        # Create notification
        #--------------------------------------------------------------------------------------

        self.usernotificationclient.create_notification(notification=notification_request_1, user_id=user_id)
        self.usernotificationclient.create_notification(notification=notification_request_2, user_id=user_id)
        log.debug( "test_activateInstrumentSample: create_user_notifications user_id %s", str(user_id) )

        return user_id

    def get_datastore(self, dataset_id):
        dataset = self.datasetclient.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def _check_computed_attributes_of_extended_instrument(self, expected_instrument_device_id = '',extended_instrument = None):

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(extended_instrument.computed.firmware_version, ComputedFloatValue)
        self.assertIsInstance(extended_instrument.computed.last_data_received_datetime, ComputedFloatValue)
        self.assertIsInstance(extended_instrument.computed.last_calibration_datetime, ComputedFloatValue)
        self.assertIsInstance(extended_instrument.computed.uptime, ComputedStringValue)

        self.assertIsInstance(extended_instrument.computed.power_status_roll_up, ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.communications_status_roll_up, ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.data_status_roll_up, ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.location_status_roll_up, ComputedIntValue)

        # the following assert will not work without elasticsearch.
        #self.assertEqual( 1, len(extended_instrument.computed.user_notification_requests.value) )
        self.assertEqual(extended_instrument.computed.communications_status_roll_up.value, StatusType.STATUS_WARNING)
        self.assertEqual(extended_instrument.computed.data_status_roll_up.value, StatusType.STATUS_OK)
        self.assertEqual(extended_instrument.computed.power_status_roll_up.value, StatusType.STATUS_WARNING)

        # Verify the computed attribute for user notification requests
        self.assertEqual( 1, len(extended_instrument.computed.user_notification_requests.value) )
        notifications = extended_instrument.computed.user_notification_requests.value
        notification = notifications[0]
        self.assertEqual(notification.origin, expected_instrument_device_id)
        self.assertEqual(notification.origin_type, "instrument")
        self.assertEqual(notification.event_type, 'ResourceLifecycleEvent')


    def _check_computed_attributes_of_extended_product(self, expected_data_product_id = '', extended_data_product = None):

        self.assertEqual(expected_data_product_id, extended_data_product._id)
        log.debug("extended_data_product.computed: %s", extended_data_product.computed)

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(extended_data_product.computed.product_download_size_estimated, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.number_active_subscriptions, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.data_url, ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.stored_data_size, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.recent_granules, ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.parameters, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.recent_events, ComputedEventListValue)

        self.assertIsInstance(extended_data_product.computed.provenance, ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.user_notification_requests, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.active_user_subscriptions, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.past_user_subscriptions, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.last_granule, ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.is_persisted, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.data_contents_updated, ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.data_datetime, ComputedListValue)

        # exact text here keeps changing to fit UI capabilities.  keep assertion general...
        self.assertTrue( 'ok' in extended_data_product.computed.last_granule.value['quality_flag'] )
        self.assertEqual( 2, len(extended_data_product.computed.data_datetime.value) )

        notifications = extended_data_product.computed.user_notification_requests.value

        notification = notifications[0]
        self.assertEqual(notification.origin, expected_data_product_id)
        self.assertEqual(notification.origin_type, "data product")
        self.assertEqual(notification.event_type, 'DetectionEvent')


    @attr('LOCOINT')
    @unittest.skipIf(not use_es, 'No ElasticSearch')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 60}}})
    def test_activateInstrumentSample(self):

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='SBE37IMModel',
                                  description="SBE37IMModel")
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        log.debug( 'new InstrumentModel id = %s ', instModel_id)



        #Create stream alarms
        """
        test_two_sided_interval
        Test interval alarm and alarm event publishing for a closed
        inteval.
        """

        #        kwargs = {
        #            'name' : 'test_sim_warning',
        #            'stream_name' : 'parsed',
        #            'value_id' : 'temp',
        #            'message' : 'Temperature is above test range of 5.0.',
        #            'type' : StreamAlarmType.WARNING,
        #            'upper_bound' : 5.0,
        #            'upper_rel_op' : '<'
        #        }


        kwargs = {
            'name' : 'temperature_warning_interval',
            'stream_name' : 'parsed',
            'value_id' : 'temp',
            'message' : 'Temperature is below the normal range of 50.0 and above.',
            'type' : StreamAlarmType.WARNING,
            'lower_bound' : 50.0,
            'lower_rel_op' : '<'
        }

        # Create alarm object.
        alarm = {}
        alarm['type'] = 'IntervalAlarmDef'
        alarm['kwargs'] = kwargs

        raw_config = StreamConfiguration(stream_name='raw', parameter_dictionary_name='ctd_raw_param_dict', records_per_granule=2, granule_publish_rate=5 )
        parsed_config = StreamConfiguration(stream_name='parsed', parameter_dictionary_name='ctd_parsed_param_dict', records_per_granule=2, granule_publish_rate=5, alarms=[alarm] )


        # Create InstrumentAgent
        instAgent_obj = IonObject(RT.InstrumentAgent,
                                  name='agent007',
                                  description="SBE37IMAgent",
                                  driver_uri="http://sddevrepo.oceanobservatories.org/releases/seabird_sbe37smb_ooicore-0.0.1a-py2.7.egg",
                                  stream_configurations = [raw_config, parsed_config])
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        log.debug('new InstrumentAgent id = %s', instAgent_id)

        self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug('test_activateInstrumentSample: Create instrument resource to represent the SBE37 (SA Req: L4-CI-SA-RQ-241) ')
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='SBE37IMDevice',
                                   description="SBE37IMDevice",
                                   serial_number="12345" )
        instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id)

        log.debug("test_activateInstrumentSample: new InstrumentDevice id = %s (SA Req: L4-CI-SA-RQ-241) " , instDevice_id)


        port_agent_config = {
            'device_addr':  CFG.device.sbe37.host,
            'device_port':  CFG.device.sbe37.port,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'port_agent_addr': 'localhost',
            'command_port': CFG.device.sbe37.port_agent_cmd_port,
            'data_port': CFG.device.sbe37.port_agent_data_port,
            'log_level': 5,
            'type': PortAgentType.ETHERNET
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance',
                                          description="SBE37IMAgentInstance",
                                          port_agent_config = port_agent_config)


        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj,
                                                                               instAgent_id,
                                                                               instDevice_id)

        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()


        parsed_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        parsed_stream_def_id = self.pubsubcli.create_stream_definition(name='parsed', parameter_dictionary_id=parsed_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_raw_param_dict', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(name='raw', parameter_dictionary_id=raw_pdict_id)


        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
            name='the parsed data',
            description='ctd stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=parsed_stream_def_id)
        log.debug( 'new dp_id = %s' , data_product_id1)
        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id1)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id1)



        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug('Data product streams1 = %s', stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasDataset, RT.Dataset, True)
        log.debug('Data set for data_product_id1 = %s' , dataset_ids[0])
        self.parsed_dataset = dataset_ids[0]


        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)


        dp_obj = IonObject(RT.DataProduct,
            name='the raw data',
            description='raw stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id2 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        log.debug('new dp_id = %s', data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id2)

        # setup notifications for the device and parsed data product
        user_id_1 = self._create_notification( user_name='user_1', instrument_id=instDevice_id, product_id=data_product_id1)
        #---------- Create notifications for another user and verify that we see different computed subscriptions for the two users ---------
        user_id_2 = self._create_notification( user_name='user_2', instrument_id=instDevice_id, product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasStream, None, True)
        log.debug('Data product streams2 = %s' , str(stream_ids))

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasDataset, RT.Dataset, True)
        log.debug('Data set for data_product_id2 = %s' , dataset_ids[0])
        self.raw_dataset = dataset_ids[0]

        #elastic search debug
        es_indexes, _ = self.container.resource_registry.find_resources(restype='ElasticSearchIndex')
        log.debug('ElasticSearch indexes: %s', [i.name for i in es_indexes])
        log.debug('Bootstrap %s', CFG.bootstrap.use_es)


        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])


        #setup a subscriber to alarm events from the device
        self._events_received= []
        self._event_count = 0
        self._samples_out_of_range = 0
        self._samples_complete = False
        self._async_sample_result = AsyncResult()

        def consume_event(*args, **kwargs):
            log.debug('TestActivateInstrument recieved ION event: args=%s, kwargs=%s, event=%s.',
                str(args), str(kwargs), str(args[0]))
            self._events_received.append(args[0])
            self._event_count = len(self._events_received)
            self._async_sample_result.set()

        self._event_subscriber = EventSubscriber(
            event_type= 'StreamWarningAlarmEvent',   #'StreamWarningAlarmEvent', #  StreamAlarmEvent
            callback=consume_event,
            origin=instDevice_id)
        self._event_subscriber.start()


        #cleanup
        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
                        instrument_agent_instance_id=instAgentInstance_id)

        def stop_subscriber():
            self._event_subscriber.stop()
            self._event_subscriber = None

        self.addCleanup(stop_subscriber)


        #wait for start
        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(instAgentInstance_id)
        gate = ProcessStateGate(self.processdispatchclient.read_process,
                                inst_agent_instance_obj.agent_process_id,
                                ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(30), "The instrument agent instance (%s) did not spawn in 30 seconds" %
                                        inst_agent_instance_obj.agent_process_id)

        log.debug('Instrument agent instance obj: = %s' , str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(instDevice_id,
                                              to_name=inst_agent_instance_obj.agent_process_id,
                                              process=FakeProcess())

        log.debug("test_activateInstrumentSample: got ia client %s" , str(self._ia_client))

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: initialize %s" , str(retval))
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.INACTIVE)

        log.debug("(L4-CI-SA-RQ-334): Sending go_active command ")
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrument: return value from go_active %s" , str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug("(L4-CI-SA-RQ-334): current state after sending go_active command %s" , str(state))

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: run %s" , str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.STOPPED)

        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(state, ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)
        for i in xrange(10):
            retval = self._ia_client.execute_resource(cmd)
            log.debug("test_activateInstrumentSample: return from sample %s" , str(retval))

        log.debug( "test_activateInstrumentSample: calling reset ")
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: return from reset %s" , str(reply))

        self._samples_complete = True

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.dataretrieverclient.retrieve(self.parsed_dataset)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        log.debug("test_activateInstrumentSample: RDT parsed: %s", str(rdt.pretty_print()) )
        temp_vals = rdt['temp']
        self.assertEquals(len(temp_vals) , 10)
        log.debug("test_activateInstrumentSample: all temp_vals: %s", temp_vals )

        #out_of_range_temp_vals = [i for i in temp_vals if i > 5]
        out_of_range_temp_vals = [i for i in temp_vals if i < 50.0]
        log.debug("test_activateInstrumentSample: Out_of_range_temp_vals: %s", out_of_range_temp_vals )
        self._samples_out_of_range = len(out_of_range_temp_vals)

        # if no bad values were produced, then do not wait for an event
        if self._samples_out_of_range == 0:
            self._async_sample_result.set()


        log.debug("test_activateInstrumentSample: _events_received: %s", self._events_received )
        log.debug("test_activateInstrumentSample: _event_count: %s", self._event_count )

        self._async_sample_result.get(timeout=CFG.endpoint.receive.timeout)

        replay_data = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        log.debug("RDT raw: %s", str(rdt.pretty_print()) )

        raw_vals = rdt['raw']
        self.assertEquals(len(raw_vals) , 10)


        log.debug("l4-ci-sa-rq-138")
        """
        Physical resource control shall be subject to policy

        Instrument management control capabilities shall be subject to policy

        The actor accessing the control capabilities must be authorized to send commands.

        note from maurice 2012-05-18: Talk to tim M to verify that this is policy.  If it is then talk with Stephen to
                                      get an example of a policy test and use that to create a test stub that will be
                                      completed when we have instrument policies.

        Tim M: The "actor", aka observatory operator, will access the instrument through ION.

        """


        #--------------------------------------------------------------------------------
        # Get the extended data product to see if it contains the granules
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(data_product_id=data_product_id1, user_id=user_id_1)
        def poller(extended_product):
            return len(extended_product.computed.user_notification_requests.value) == 1

        poll(poller, extended_product, timeout=30)

        self._check_computed_attributes_of_extended_product( expected_data_product_id = data_product_id1, extended_data_product = extended_product)

        #--------------------------------------------------------------------------------
        #put some events into the eventsdb to test - this should set the comms and data status to WARNING
        #--------------------------------------------------------------------------------

        t = get_ion_ts()
        self.event_publisher.publish_event(  ts_created= t,  event_type = 'DeviceStatusEvent',
            origin = instDevice_id, state=DeviceStatusType.OUT_OF_RANGE, values = [200] )
        self.event_publisher.publish_event( ts_created= t,   event_type = 'DeviceCommsEvent',
            origin = instDevice_id, state=DeviceCommsType.DATA_DELIVERY_INTERRUPTION, lapse_interval_seconds = 20 )

        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(instrument_device_id=instDevice_id, user_id=user_id_1)
        self._check_computed_attributes_of_extended_instrument(expected_instrument_device_id = instDevice_id, extended_instrument = extended_instrument)

        #--------------------------------------------------------------------------------
        # For the second user, check the extended data product and the extended intrument
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(data_product_id=data_product_id2, user_id=user_id_2)
        self._check_computed_attributes_of_extended_product(expected_data_product_id = data_product_id2, extended_data_product = extended_product)

        #---------- Put some events into the eventsdb to test - this should set the comms and data status to WARNING  ---------

        t = get_ion_ts()
        self.event_publisher.publish_event(  ts_created= t,  event_type = 'DeviceStatusEvent',
            origin = instDevice_id, state=DeviceStatusType.OUT_OF_RANGE, values = [200] )
        self.event_publisher.publish_event( ts_created= t,   event_type = 'DeviceCommsEvent',
            origin = instDevice_id, state=DeviceCommsType.DATA_DELIVERY_INTERRUPTION, lapse_interval_seconds = 20 )

        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(instrument_device_id=instDevice_id, user_id=user_id_2)
        self._check_computed_attributes_of_extended_instrument(expected_instrument_device_id = instDevice_id, extended_instrument = extended_instrument)

        #--------------------------------------------------------------------------------
        # Deactivate loggers
        #--------------------------------------------------------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        self.dpclient.delete_data_product(data_product_id1)
        self.dpclient.delete_data_product(data_product_id2)
Exemplo n.º 19
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self): # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.pubsub_management    = PubsubManagementServiceClient()
        self.resource_registry    = ResourceRegistryServiceClient()
        self.dataset_management   = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever       = DataRetrieverServiceClient()
        self.pids                 = []
        self.event                = Event()
        self.exchange_space_name  = 'test_granules'
        self.exchange_point_name  = 'science_data'       
        self.i                    = 0

        self.purge_queues()
        self.queue_buffer         = []
        self.streams = []
        self.addCleanup(self.stop_all_ingestion)

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue('science_granule_ingestion')
        xn.purge()
        

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.container.proc_manager.terminate_process(pid)
        IngestionManagementIntTest.clean_subscriptions()
        for queue in self.queue_buffer:
            if isinstance(queue, ExchangeNameQueue):
                queue.delete()
            elif isinstance(queue, str):
                xn = self.container.ex_manager.create_xn_queue(queue)
                xn.delete()

    #--------------------------------------------------------------------------------
    # Helper/Utility methods
    #--------------------------------------------------------------------------------
        
    def create_dataset(self, parameter_dict_id=''):
        '''
        Creates a time-series dataset
        '''
        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()
        if not parameter_dict_id:
            parameter_dict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)

        dataset_id = self.dataset_management.create_dataset('test_dataset_%i'%self.i, parameter_dictionary_id=parameter_dict_id, spatial_domain=sdom, temporal_domain=tdom)
        return dataset_id
    
    def get_datastore(self, dataset_id):
        '''
        Gets an instance of the datastore
            This method is primarily used to defeat a bug where integration tests in multiple containers may sometimes 
            delete a CouchDB datastore and the other containers are unaware of the new state of the datastore.
        '''
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore
    
    def get_ingestion_config(self):
        '''
        Grab the ingestion configuration from the resource registry
        '''
        # The ingestion configuration should have been created by the bootstrap service 
        # which is configured through r2deploy.yml

        ingest_configs, _  = self.resource_registry.find_resources(restype=RT.IngestionConfiguration,id_only=True)
        return ingest_configs[0]

    def launch_producer(self, stream_id=''):
        '''
        Launch the producer
        '''

        pid = self.container.spawn_process('better_data_producer', 'ion.processes.data.example_data_producer', 'BetterDataProducer', {'process':{'stream_id':stream_id}})

        self.pids.append(pid)

    def make_simple_dataset(self):
        '''
        Makes a stream, a stream definition and a dataset, the essentials for most of these tests
        '''
        pdict_id             = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        stream_def_id        = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)
        stream_id, route     = self.pubsub_management.create_stream('ctd stream %i' % self.i, 'xp1', stream_definition_id=stream_def_id)

        dataset_id = self.create_dataset(pdict_id)

        self.get_datastore(dataset_id)
        self.i += 1
        return stream_id, route, stream_def_id, dataset_id

    def publish_hifi(self,stream_id,stream_route,offset=0):
        '''
        Publish deterministic data
        '''

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10) + (offset * 10)
        rdt['temp'] = np.arange(10) + (offset * 10)
        pub.publish(rdt.to_granule())

    def publish_fake_data(self,stream_id, route):
        '''
        Make four granules
        '''
        for i in xrange(4):
            self.publish_hifi(stream_id,route,i)

    def start_ingestion(self, stream_id, dataset_id):
        '''
        Starts ingestion/persistence for a given dataset
        '''
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)
    
    def stop_ingestion(self, stream_id):
        ingest_config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id)
        
    def stop_all_ingestion(self):
        try:
            [self.stop_ingestion(sid) for sid in self.streams]
        except:
            pass

    def validate_granule_subscription(self, msg, route, stream_id):
        '''
        Validation for granule format
        '''
        if msg == {}:
            return
        rdt = RecordDictionaryTool.load_from_granule(msg)
        log.info('%s', rdt.pretty_print())
        self.assertIsInstance(msg,Granule,'Message is improperly formatted. (%s)' % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id='',data_size=40):
        '''
        Loops until there is a sufficient amount of data in the dataset
        '''
        done = False
        with gevent.Timeout(40):
            while not done:
                extents = self.dataset_management.dataset_extents(dataset_id, 'time')[0]
                granule = self.data_retriever.retrieve_last_data_points(dataset_id, 1)
                rdt     = RecordDictionaryTool.load_from_granule(granule)
                if rdt['time'] and rdt['time'][0] != rdt._pdict.get_context('time').fill_value and extents >= data_size:
                    done = True
                else:
                    gevent.sleep(0.2)




    #--------------------------------------------------------------------------------
    # Test Methods
    #--------------------------------------------------------------------------------

    @attr('SMOKE') 
    def test_dm_end_2_end(self):
        #--------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        #--------------------------------------------------------------------------------
        self.event.clear()

        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        
        stream_definition = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id)


        stream_id, route = self.pubsub_management.create_stream('producer', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)




        #--------------------------------------------------------------------------------
        # Start persisting the data on the stream 
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        #--------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id)

        #--------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        #--------------------------------------------------------------------------------

        self.launch_producer(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        
        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------
        
        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)
        rdt = RecordDictionaryTool.load_from_granule(replay_data)
        self.assertTrue((rdt['time'][:10] == np.arange(10)).all(),'%s' % rdt['time'][:])
        self.assertTrue((rdt['binary'][:10] == np.array(['hi']*10, dtype='object')).all())

        
        #--------------------------------------------------------------------------------
        # Now to try the streamed approach
        #--------------------------------------------------------------------------------
        replay_stream_id, replay_route = self.pubsub_management.create_stream('replay_out', exchange_point=self.exchange_point_name, stream_definition_id=stream_definition)
        self.replay_id, process_id =  self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream_id)
        log.info('Process ID: %s', process_id)

        replay_client = ReplayClient(process_id)

    
        #--------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to 
        #--------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        self.data_retriever.start_replay_agent(self.replay_id)

        self.assertTrue(replay_client.await_agent_ready(5), 'The process never launched')
        replay_client.start_replay()
        
        self.assertTrue(self.event.wait(10))
        subscriber.stop()

        self.data_retriever.cancel_replay_agent(self.replay_id)


        #--------------------------------------------------------------------------------
        # Test the slicing capabilities
        #--------------------------------------------------------------------------------

        granule = self.data_retriever.retrieve(dataset_id=dataset_id, query={'tdoa':slice(0,5)})
        rdt = RecordDictionaryTool.load_from_granule(granule)
        b = rdt['time'] == np.arange(5)
        self.assertTrue(b.all() if not isinstance(b,bool) else b)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)


    def test_coverage_transform(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_parsed()
        stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)
        publisher = StandaloneStreamPublisher(stream_id, route)
        
        rdt = ph.get_rdt(stream_def_id)
        ph.fill_parsed_rdt(rdt)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time'])
        np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp'])

        np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914]))
        np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.]))
        np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068]))
        np.testing.assert_array_almost_equal(rdt_out['density'], np.array([1021.7144739593881]))
        np.testing.assert_array_almost_equal(rdt_out['salinity'], np.array([30.935132729668283]))


    def test_qc_events(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_qc_pdict()
        stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()

        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.arange(10) * 3

        verified = Event()
        def verification(event, *args, **kwargs):
            self.assertEquals(event.qc_parameter, 'temp_qc')
            self.assertEquals(event.temporal_value, 7)
            verified.set()

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())
        self.assertTrue(verified.wait(10))



    def test_lookup_values_ingest_replay(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsub_management.create_stream_definition('lookups', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id)

        stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        self.addCleanup(self.pubsub_management.delete_stream, stream_id)

        ingestion_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        config = DotDict()
        config.process.lookup_docs = ['test1', 'test2']
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config)
        self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id)

        stored_value_manager = StoredValueManager(self.container)
        stored_value_manager.stored_value_cas('test1',{'offset_a':10.0, 'offset_b':13.1})
        
        publisher = StandaloneStreamPublisher(stream_id, route)
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = [20.0] * 20

        granule = rdt.to_granule()

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))
        
        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20))

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(20,40)
        rdt['temp'] = [20.0] * 20
        granule = rdt.to_granule()

        dataset_monitor.event.clear()

        stored_value_manager.stored_value_cas('test1',{'offset_a':20.0})
        stored_value_manager.stored_value_cas('coefficient_document',{'offset_b':10.0})
        gevent.sleep(2)

        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(30))

        replay_granule = self.data_retriever.retrieve(dataset_id)
        rdt_out = RecordDictionaryTool.load_from_granule(replay_granule)

        np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40))
        np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20))
        np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40))
        np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.]*20 + [40.]*20))
        np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20))



    @unittest.skip('Doesnt work')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    def test_replay_pause(self):
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        replay_stream, replay_route = self.pubsub_management.create_stream('replay', 'xp1', stream_definition_id=stream_def_id)
        dataset_id = self.create_dataset(pdict_id)
        scov = DatasetManagementService._get_simplex_coverage(dataset_id)

        bb = CoverageCraft(scov)
        bb.rdt['time'] = np.arange(100)
        bb.rdt['temp'] = np.random.random(100) + 30
        bb.sync_with_granule()

        DatasetManagementService._persist_coverage(dataset_id, bb.coverage) # This invalidates it for multi-host configurations
        # Set up the subscriber to verify the data
        subscriber = StandaloneStreamSubscriber(self.exchange_space_name, self.validate_granule_subscription)
        xp = self.container.ex_manager.create_xp('xp1')
        self.queue_buffer.append(self.exchange_space_name)
        subscriber.start()
        subscriber.xn.bind(replay_route.routing_key, xp)

        # Set up the replay agent and the client wrapper

        # 1) Define the Replay (dataset and stream to publish on)
        self.replay_id, process_id = self.data_retriever.define_replay(dataset_id=dataset_id, stream_id=replay_stream)
        # 2) Make a client to the interact with the process (optionall provide it a process to bind with)
        replay_client = ReplayClient(process_id)
        # 3) Start the agent (launch the process)
        self.data_retriever.start_replay_agent(self.replay_id)
        # 4) Start replaying...
        replay_client.start_replay()
        
        # Wait till we get some granules
        self.assertTrue(self.event.wait(5))
        
        # We got granules, pause the replay, clear the queue and allow the process to finish consuming
        replay_client.pause_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()
        
        # Make sure there's no remaining messages being consumed
        self.assertFalse(self.event.wait(1))

        # Resume the replay and wait until we start getting granules again
        replay_client.resume_replay()
        self.assertTrue(self.event.wait(5))
    
        # Stop the replay, clear the queues
        replay_client.stop_replay()
        gevent.sleep(1)
        subscriber.xn.purge()
        self.event.clear()

        # Make sure that it did indeed stop
        self.assertFalse(self.event.wait(1))

        subscriber.stop()


    def test_retrieve_and_transform(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(ctd_stream_id, dataset_id)

        # Stream definition for the salinity data
        salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id)


        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = np.arange(10)
        rdt['temp'] = np.random.randn(10) * 10 + 30
        rdt['conductivity'] = np.random.randn(10) * 2 + 10
        rdt['pressure'] = np.random.randn(10) * 1 + 12

        publisher = StandaloneStreamPublisher(ctd_stream_id, route)
        publisher.publish(rdt.to_granule())

        rdt['time'] = np.arange(10,20)

        publisher.publish(rdt.to_granule())


        self.wait_until_we_have_enough_granules(dataset_id, 20)

        granule = self.data_retriever.retrieve(dataset_id, 
                                             None,
                                             None, 
                                             'ion.processes.data.transforms.ctd.ctd_L2_salinity',
                                             'CTDL2SalinityTransformAlgorithm', 
                                             kwargs=dict(params=sal_stream_def_id))
        rdt = RecordDictionaryTool.load_from_granule(granule)
        for i in rdt['salinity']:
            self.assertNotEquals(i,0)
        self.streams.append(ctd_stream_id)
        self.stop_ingestion(ctd_stream_id)

    def test_last_granule(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)

        self.publish_hifi(stream_id,route, 0)
        self.publish_hifi(stream_id,route, 1)
        

        self.wait_until_we_have_enough_granules(dataset_id,20) # I just need two


        success = False
        def verifier():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id, 10)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(10) + 10
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verifier)

        self.assertTrue(success)

        success = False
        def verify_points():
                replay_granule = self.data_retriever.retrieve_last_data_points(dataset_id,5)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(15,20)
                if not isinstance(comp,bool):
                    return comp.all()
                return False
        success = poll(verify_points)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)

    def test_replay_with_parameters(self):
        #--------------------------------------------------------------------------------
        # Create the configurations and the dataset
        #--------------------------------------------------------------------------------
        # Get a precompiled parameter dictionary with basic ctd fields
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        context_ids = self.dataset_management.read_parameter_contexts(pdict_id, id_only=True)

        # Add a field that supports binary data input.
        bin_context = ParameterContext('binary',  param_type=ArrayType())
        context_ids.append(self.dataset_management.create_parameter_context('binary', bin_context.dump()))
        # Add another field that supports dictionary elements.
        rec_context = ParameterContext('records', param_type=RecordType())
        context_ids.append(self.dataset_management.create_parameter_context('records', rec_context.dump()))

        pdict_id = self.dataset_management.create_parameter_dictionary('replay_pdict', parameter_context_ids=context_ids, temporal_context='time')
        

        stream_def_id = self.pubsub_management.create_stream_definition('replay_stream', parameter_dictionary_id=pdict_id)
        
        stream_id, route  = self.pubsub_management.create_stream('replay_with_params', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id)
        config_id  = self.get_ingestion_config()
        dataset_id = self.create_dataset(pdict_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id)

        dataset_monitor = DatasetMonitor(dataset_id)

        self.addCleanup(dataset_monitor.stop)

        self.publish_fake_data(stream_id, route)

        self.assertTrue(dataset_monitor.event.wait(30))

        query = {
            'start_time': 0 - 2208988800,
            'end_time':   20 - 2208988800,
            'stride_time' : 2,
            'parameters': ['time','temp']
        }
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id,query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(0,20,2) == rdt['time']
        self.assertTrue(comp.all(),'%s' % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(['time','temp']))

        extents = self.dataset_management.dataset_extents(dataset_id=dataset_id, parameters=['time','temp'])
        self.assertTrue(extents['time']>=20)
        self.assertTrue(extents['temp']>=20)

        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)
        

    def test_repersist_data(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.publish_hifi(stream_id,route,0)
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id,20)
        config_id = self.get_ingestion_config()
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(stream_id=stream_id,ingestion_configuration_id=config_id,dataset_id=dataset_id)
        self.publish_hifi(stream_id,route,2)
        self.publish_hifi(stream_id,route,3)
        self.wait_until_we_have_enough_granules(dataset_id,40)
        success = False
        with gevent.timeout.Timeout(5):
            while not success:

                replay_granule = self.data_retriever.retrieve(dataset_id)

                rdt = RecordDictionaryTool.load_from_granule(replay_granule)

                comp = rdt['time'] == np.arange(0,40)
                if not isinstance(comp,bool):
                    success = comp.all()
                gevent.sleep(1)

        self.assertTrue(success)
        self.streams.append(stream_id)
        self.stop_ingestion(stream_id)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_correct_time(self):

        # There are 2208988800 seconds between Jan 1 1900 and Jan 1 1970, i.e. 
        #  the conversion factor between unix and NTP time
        unix_now = np.floor(time.time())
        ntp_now  = unix_now + 2208988800 

        unix_ago = unix_now - 20
        ntp_ago  = unix_ago + 2208988800

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
        coverage.insert_timesteps(20)
        coverage.set_parameter_values('time', np.arange(ntp_ago,ntp_now))
        
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)

        self.assertTrue( np.abs(temporal_bounds[0] - unix_ago) < 2)
        self.assertTrue( np.abs(temporal_bounds[1] - unix_now) < 2)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_empty_coverage_time(self):

        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        coverage = DatasetManagementService._get_coverage(dataset_id)
        temporal_bounds = self.dataset_management.dataset_temporal_bounds(dataset_id)
        self.assertEquals([coverage.get_parameter_context('time').fill_value] *2, temporal_bounds)


    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)
            
 
        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()


        self.assertTrue(event.is_set())

        
    def publish_and_wait(self, dataset_id, granule):
        stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True)
        stream_id=stream_ids[0]
        route = self.pubsub_management.read_stream_route(stream_id)
        publisher = StandaloneStreamPublisher(stream_id,route)
        dataset_monitor = DatasetMonitor(dataset_id)
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_thorough_gap_analysis(self):
        dataset_id = self.test_ingestion_gap_analysis()
        vcov = DatasetManagementService._get_coverage(dataset_id)

        self.assertIsInstance(vcov,ViewCoverage)
        ccov = vcov.reference_coverage

        self.assertIsInstance(ccov, ComplexCoverage)
        self.assertEquals(len(ccov._reference_covs), 3)


    def test_ingestion_gap_analysis(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        self.addCleanup(self.stop_ingestion, stream_id)

        connection1 = uuid4().hex
        connection2 = uuid4().hex

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [0]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='0'))
        rdt['time'] = [1]
        rdt['temp'] = [1]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index=1))
        rdt['time'] = [2]
        rdt['temp'] = [2]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection1,connection_index='3')) # Gap, missed message
        rdt['time'] = [3]
        rdt['temp'] = [3]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='3')) # Gap, new connection
        rdt['time'] = [4]
        rdt['temp'] = [4]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index='4'))
        rdt['time'] = [5]
        rdt['temp'] = [5]
        self.publish_and_wait(dataset_id, rdt.to_granule(connection_id=connection2,connection_index=5))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(6))
        np.testing.assert_array_equal(rdt['temp'], np.arange(6))
        return dataset_id


    @unittest.skip('Outdated due to ingestion retry')
    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_ingestion_failover(self):
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        event = Event()

        def cb(*args, **kwargs):
            event.set()

        sub = EventSubscriber(event_type="ExceptionEvent", callback=cb, origin="stream_exception")
        sub.start()

        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id, 40)
        
        file_path = DatasetManagementService._get_coverage_path(dataset_id)
        master_file = os.path.join(file_path, '%s_master.hdf5' % dataset_id)

        with open(master_file, 'w') as f:
            f.write('this will crash HDF')

        self.publish_hifi(stream_id, route, 5)


        self.assertTrue(event.wait(10))

        sub.stop()

    @attr('LOCOINT')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Host requires file-system access to coverage files, CEI mode does not support.')
    def test_coverage_types(self):
        # Make a simple dataset and start ingestion, pretty standard stuff.
        ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        cov = DatasetManagementService._get_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, ViewCoverage)

        cov = DatasetManagementService._get_simplex_coverage(dataset_id=dataset_id)
        self.assertIsInstance(cov, SimplexCoverage)
class TestObservatoryManagementFullIntegration(IonIntegrationTestCase):

    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.RR = ResourceRegistryServiceClient()
        self.RR2 = EnhancedResourceRegistryClient(self.RR)
        self.OMS = ObservatoryManagementServiceClient()
        self.org_management_service = OrgManagementServiceClient()
        self.IMS =  InstrumentManagementServiceClient()
        self.dpclient = DataProductManagementServiceClient()
        self.pubsubcli =  PubsubManagementServiceClient()
        self.damsclient = DataAcquisitionManagementServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self._load_stage = 0
        self._resources = {}

    def preload_ooi(self, stage=STAGE_LOAD_ASSETS):
        # Preloads OOI up to a given stage

        if self._load_stage >= stage:
            return

        if self._load_stage < STAGE_LOAD_ORGS:
            log.info("--------------------------------------------------------------------------------------------------------")
            log.info("Preloading stage: %s (OOIR2 Orgs, users, roles)", STAGE_LOAD_ORGS)
            # load_OOIR2_scenario
            self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=dict(
                op="load",
                scenario="OOIR2",
                path="master",
                ))
            self._load_stage = STAGE_LOAD_ORGS

        if self._load_stage < STAGE_LOAD_PARAMS:
            log.info("--------------------------------------------------------------------------------------------------------")
            log.info("Preloading stage: %s (BASE params, streamdefs)", STAGE_LOAD_PARAMS)
            # load_parameter_scenarios
            self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=dict(
                op="load",
                scenario="BETA",
                path="master",
                categories="ParameterFunctions,ParameterDefs,ParameterDictionary,StreamDefinition",
                clearcols="owner_id,org_ids",
                assets="res/preload/r2_ioc/ooi_assets",
                parseooi="True",
                ))
            self._load_stage = STAGE_LOAD_PARAMS

        if self._load_stage < STAGE_LOAD_AGENTS:
            log.info("--------------------------------------------------------------------------------------------------------")
            log.info("Preloading stage: %s (OOIR2_I agents, model links)", STAGE_LOAD_AGENTS)
            # load_OOIR2_agents
            self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=dict(
                op="load",
                scenario="OOIR2_I",
                path="master",
                ))
            self._load_stage = STAGE_LOAD_AGENTS

        if self._load_stage < STAGE_LOAD_ASSETS:
            log.info("--------------------------------------------------------------------------------------------------------")
            log.info("Preloading stage: %s (OOI assets linked to params, agents)", STAGE_LOAD_ASSETS)
            # load_ooi_assets
            self.container.spawn_process("Loader", "ion.processes.bootstrap.ion_loader", "IONLoader", config=dict(
                op="load",
                loadooi="True",
                path="master",
                assets="res/preload/r2_ioc/ooi_assets",
                bulk="True",
                debug="True",
                ooiuntil="9/1/2013",
                ooiparams="True",
                #excludecategories: DataProduct,DataProductLink,Deployment,Workflow,WorkflowDefinition
                ))
            self._load_stage = STAGE_LOAD_ASSETS

        # 'DataProduct,DataProductLink,WorkflowDefinition,ExternalDataProvider,ExternalDatasetModel,ExternalDataset,ExternalDatasetAgent,ExternalDatasetAgentInstance',


    @unittest.skip('Work in progress')
    def test_observatory(self):
        self._load_stage = 0
        self._resources = {}
        passing = True

        self.assertTrue(True)

        # LOAD STEP 1
        self.preload_ooi(stage=STAGE_LOAD_ORGS)

        passing &= self.orguserrole_assertions()


        # LOAD STEP 2
        self.preload_ooi(stage=STAGE_LOAD_PARAMS)

        passing &= self.parameter_assertions()


        # LOAD STEP 3
        self.preload_ooi(stage=STAGE_LOAD_AGENTS)

        passing &= self.agent_assertions()


        # LOAD STEP 4
        self.preload_ooi(stage=STAGE_LOAD_ASSETS)

        # Check OOI preloaded resources to see if they match needs for this test and for correctness
        passing &= self.sites_assertions()
        passing &= self.device_assertions()
        passing &= self.deployment_assertions()

        # Extensive tests on select RSN nodes
        passing &= self.rsn_node_checks()

        # Extensive tests on select RSN instruments
        passing &= self.check_rsn_instrument()

        passing &= self.check_rsn_instrument_data_product()

        # Extensive tests on a glider
        #passing &= self.check_glider()

        # Extensive tests on a CG assembly
        #passing &= self.check_cg_assembly()



        # Add a new instrument agent
        # Add a new instrument agent instance
        # Check DataProducts
        # Check Provenance

        IonIntegrationTestCase.assertTrue(self, passing)


    # -------------------------------------------------------------------------

    def orguserrole_assertions(self):
        passing = True

        passing &= self._check_marine_facility("MF_CGSN")
        passing &= self._check_marine_facility("MF_RSN")
        passing &= self._check_marine_facility("MF_EA")

        return passing

    def _check_marine_facility(self, preload_id):
        passing = True
        log.debug("Checking marine facility %s and associations", preload_id)

        mf_obj = self.retrieve_ooi_asset(preload_id)
        mf_id = mf_obj._id
        self._resources[preload_id] = mf_id

        passing &= self.assertEquals(mf_obj.lcstate, LCS.DEPLOYED)

        res_list, _ = self.RR.find_objects(subject=mf_id, predicate=PRED.hasMembership, id_only=True)
        passing &= self.assertTrue(len(res_list) >= 3)

        res_list, _ = self.RR.find_objects(subject=mf_id, predicate=PRED.hasRole, id_only=False)
        passing &= self.assertTrue(len(res_list) >= 5)

        passing &= self._check_role_assignments(res_list, "ORG_MANAGER")
        passing &= self._check_role_assignments(res_list, "OBSERVATORY_OPERATOR")
        passing &= self._check_role_assignments(res_list, "INSTRUMENT_OPERATOR")

        return passing

    def _check_role_assignments(self, role_list, role_name):
        passing = True
        role_obj = self._find_resource_in_list(role_list, "governance_name", role_name)
        if role_obj:
            res_list = self.RR.find_subjects(predicate=PRED.hasRole, object=role_obj._id, id_only=True)
            passing &= self.assertTrue(len(res_list) >= 1)

        return passing


    def parameter_assertions(self):
        passing = True

        pctx_list, _ = self.RR.find_resources_ext(restype=RT.ParameterContext)
        passing &= self.assertTrue(len(pctx_list) >= 10)

        pdict_list, _ = self.RR.find_resources_ext(restype=RT.ParameterDictionary)
        passing &= self.assertTrue(len(pdict_list) >= 10)

        sdef_list, _ = self.RR.find_resources_ext(restype=RT.StreamDefinition)
        passing &= self.assertTrue(len(sdef_list) >= 10)

        # Verify that a PDict has the appropriate QC parameters defined
        pdicts, _ = self.RR.find_resources_ext(restype=RT.ParameterDictionary, alt_id_ns='PRE', alt_id='DICT110')
        passing &= self.assertTrue(len(pdicts)==1)
        if not pdicts:
            return passing
        pdict = pdicts[0]

        # According to the latest SAF, density should NOT have trend

        parameters, _ = self.RR.find_objects(pdict, PRED.hasParameterContext)
        names = [i.name for i in parameters if i.name.startswith('density')]
        passing &= self.assertTrue('density_trndtst_qc' not in names)

        return passing

    def agent_assertions(self):
        passing = True

        # TODO: More tests?

        return passing

    def sites_assertions(self):
        passing = True
        observatory_list, _ = self.RR.find_resources_ext(restype=RT.Observatory)
        passing &= self.assertTrue(len(observatory_list) >= 40)
        for obs in observatory_list:
            passing &= self.assertEquals(obs.lcstate, LCS.DEPLOYED)

        platform_site_list, _ = self.RR.find_resources(RT.PlatformSite, id_only=False)
        log.debug('platform sites: %s', [ps.name for ps in platform_site_list])
        passing &= self.assertTrue(len(platform_site_list) >= 30)

        return passing

    def device_assertions(self):
        passing = True
        platform_device_list, _ = self.RR.find_resources(RT.PlatformDevice, id_only=False)
        passing &= self.assertTrue(len(platform_device_list) >= 30)
        for pdev in platform_device_list:
            log.debug('platform device: %s', pdev.name)
            passing &= self.assertEquals(pdev.lcstate, LCS.PLANNED)

        platform_agent_list, _ = self.RR.find_resources(RT.PlatformAgent, id_only=False)
        passing &= self.assertTrue(len(platform_agent_list) >= 2)
        for pagent in platform_agent_list:
            log.debug('platform agent: %s', pagent.name)
            passing &= self.assertEquals(pagent.lcstate, LCS.DEPLOYED)

        instrument_agent_list, _ = self.RR.find_resources(RT.InstrumentAgent, id_only=False)
        passing &= self.assertTrue(len(instrument_agent_list) >= 3)
        for iagent in instrument_agent_list:
            log.debug('instrument agent: %s', iagent.name)
            passing &= self.assertEquals(iagent.lcstate, LCS.DEPLOYED)

            model_list, _ = self.RR.find_objects(subject=iagent._id, predicate=PRED.hasModel, id_only=True)
            passing &= self.assertTrue(len(model_list) >= 1, "IA %s" % iagent.name)

        return passing

    def deployment_assertions(self):
        passing = True
        deployment_list, _ = self.RR.find_resources(RT.Deployment, id_only=False)
        passing &= self.assertTrue(len(deployment_list) >= 30)
        for deploy in deployment_list:
            log.debug('deployment: %s', deploy.name)
            passing &= self.assertEquals(deploy.lcstate, LCS.DEPLOYED)
        return passing

    def rsn_node_checks(self):
        """
        Current preload creates:
        - PlatformDevice in PLANNED
        - PlatformSite in DEPLOYED
        - Deployment in DEPLOYED
        - Deployment is NOT activated
        """
        passing = True

        dp_obj = self.retrieve_ooi_asset("CE04OSHY-PN01C_DEP")

        passing &= self.assertEquals(dp_obj.lcstate, LCS.DEPLOYED)
        passing &= self.assertEquals(dp_obj.availability, AS.AVAILABLE)
        log.debug('test_observatory  retrieve CE04OSHY-PN01C_DEP deployment:  %s', dp_obj)

        # Check existing RSN node CE04OSHY-LV01C Deployment (PLANNED lcstate)
        CE04OSHY_LV01C_deployment = self.retrieve_ooi_asset('CE04OSHY-LV01C_DEP')
        passing &= self.assertEquals(CE04OSHY_LV01C_deployment.lcstate, LCS.DEPLOYED)
        passing &= self.assertEquals(CE04OSHY_LV01C_deployment.availability, AS.AVAILABLE)

        #self.dump_deployment(CE04OSHY_LV01C_deployment._id)
        log.debug('test_observatory  retrieve RSN node CE04OSHY-LV01C Deployment:  %s', CE04OSHY_LV01C_deployment)

        CE04OSHY_LV01C_device = self.retrieve_ooi_asset('CE04OSHY-LV01C_PD')

        # Set CE04OSHY-LV01C device to DEVELOPED state
        passing &= self.transition_lcs_then_verify(resource_id=CE04OSHY_LV01C_device._id, new_lcs_state=LCE.DEVELOP, verify=LCS.DEVELOPED)

        # Set CE04OSHY-LV01C device to INTEGRATED state
        passing &= self.transition_lcs_then_verify(resource_id=CE04OSHY_LV01C_device._id, new_lcs_state=LCE.INTEGRATE, verify=LCS.INTEGRATED)

        # Set CE04OSHY-LV01C device to DEPLOYED state
        passing &= self.transition_lcs_then_verify(resource_id=CE04OSHY_LV01C_device._id, new_lcs_state=LCE.DEPLOY, verify=LCS.DEPLOYED)

        # Set CE04OSHY-LV01C Deployment to DEPLOYED state
        # NOTE: Deployments are created in DEPLOYED state, currently
        #self.transition_lcs_then_verify(resource_id=CE04OSHY_LV01C_deployment._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')
        
        # Activate Deployment for CE04OSHY-LV01C
        self.OMS.activate_deployment(CE04OSHY_LV01C_deployment._id)
        log.debug('---------    activate_deployment CE04OSHY_LV01C_deployment -------------- ')
        self.dump_deployment(CE04OSHY_LV01C_deployment._id)
        passing &= self.validate_deployment_activated(CE04OSHY_LV01C_deployment._id)
        
        # (optional) Start CE04OSHY-LV01C platform agent with simulator

        # NOTE: DataProduct is generated in DEPLOYED state
        # # Set DataProduct for CE04OSHY-LV01C platform to DEPLOYED state
        # output_data_product_ids, assns =self.RR.find_objects(subject=CE04OSHY_LV01C_device._id, predicate=PRED.hasOutputProduct, id_only=True)
        # if output_data_product_ids:
        #     #self.assertEquals(len(child_devs), 3)
        #     for output_data_product_id in output_data_product_ids:
        #         log.debug('DataProduct for CE04OSHY-LV01C platform:  %s', output_data_product_id)
        #         self.transition_lcs_then_verify(resource_id=output_data_product_id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Check events for CE04OSHY-LV01C platform

        # Check existing CE04OSBP-LJ01C Deployment (PLANNED lcstate)
#        dp_list, _  = self.RR.find_resources_ext(alt_id_ns="PRE", alt_id="CE04OSBP-LJ01C_DEP")
#        self.assertEquals(len(dp_list), 1)
#        CE04OSHY_LV01C_deployment = dp_list[0]
#        self.assertEquals(CE04OSHY_LV01C_deployment.lcstate, 'PLANNED')
#        log.debug('test_observatory  retrieve RSN node CE04OSBP-LJ01C Deployment:  %s', CE04OSHY_LV01C_deployment)


        # Set CE04OSBP-LJ01C Deployment to DEPLOYED state

        # Update description and other attributes for CE04OSBP-LJ01C device resource

        # Create attachment (JPG image) for CE04OSBP-LJ01C device resource

        # Activate Deployment for CE04OSBP-LJ01C

        # (optional) Add/register CE04OSBP-LJ01C platform agent to parent agent

        # (optional) Start CE04OSBP-LJ01C platform agent

        return passing
    
    def check_rsn_instrument(self):
        """
        Check existing RSN instrument CE04OSBP-LJ01C-06-CTDBPO108 Deployment (PLANNED lcstate)
        Current preload creates:
        - InstrumentDevice in PLANNED
        - InstrumentSite in DEPLOYED
        - Deployment in DEPLOYED
        - Deployment is activated
        """

        passing = True
        CE04OSBP_LJ01C_06_CTDBPO108_deploy = self.retrieve_ooi_asset('CE04OSBP-LJ01C-06-CTDBPO108_DEP')
        self.dump_deployment(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        #passing &= self.assertEquals(CE04OSBP_LJ01C_06_CTDBPO108_deploy.lcstate, 'PLANNED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 device to DEVELOPED state
        CE04OSBP_LJ01C_06_CTDBPO108_device = self.retrieve_ooi_asset('CE04OSBP-LJ01C-06-CTDBPO108_ID')
        passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.DEVELOP, verify='DEVELOPED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 device to INTEGRATED state
        passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.INTEGRATE, verify='INTEGRATED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 device to DEPLOYED state
        passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 Deployment to DEPLOYED state
        #self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_deploy._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Activate Deployment for CE04OSBP-LJ01C-06-CTDBPO108 instrument
        log.debug('---------    activate_deployment CE04OSBP-LJ01C-06-CTDBPO108 deployment -------------- ')
        self.OMS.activate_deployment(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        passing &= self.validate_deployment_activated(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)

        # (optional) Add/register CE04OSBP-LJ01C-06-CTDBPO108 instrument agent to parent agent

        # (optional) Start CE04OSBP-LJ01C-06-CTDBPO108 instrument agent with simulator

        # Set all DataProducts for CE04OSBP-LJ01C-06-CTDBPO108 to DEPLOYED state


        # (optional) Create a substitute Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with a comparable device
        CE04OSBP_LJ01C_06_CTDBPO108_isite = self.retrieve_ooi_asset('CE04OSBP-LJ01C-06-CTDBPO108')

        ## create device here: retrieve CTD Mooring on Mooring Riser 001 - similiar?
        GP03FLMB_RI001_10_CTDMOG999_ID_idevice = self.retrieve_ooi_asset('GP03FLMB-RI001-10-CTDMOG999_ID')

        deploy_id_2 = self.create_basic_deployment(name='CE04OSBP-LJ01C-06-CTDBPO108_DEP2', description='substitute Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with a comparable device')
        self.IMS.deploy_instrument_device(instrument_device_id=GP03FLMB_RI001_10_CTDMOG999_ID_idevice._id, deployment_id=deploy_id_2)
        self.OMS.deploy_instrument_site(instrument_site_id=CE04OSBP_LJ01C_06_CTDBPO108_isite._id, deployment_id=deploy_id_2)
        self.dump_deployment(deploy_id_2)

        # (optional) Activate this second deployment - check first deployment is deactivated
        self.OMS.deactivate_deployment(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        passing &= self.validate_deployment_deactivated(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)


        # log.debug('Activate deployment deploy_id_2')
        # self.get_deployment_ids(deploy_id_2)
        # self.dump_deployment(deploy_id_2, "deploy_id_2")
        # self.OMS.activate_deployment(deploy_id_2)
        # passing &= self.validate_deployment_deactivated(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        #
        # # (optional) Set first CE04OSBP-LJ01C-06-CTDBPO108 Deployment to INTEGRATED state
        # passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_deploy._id, new_lcs_state=LCE.INTEGRATE, verify='INTEGRATED')
        #
        # # Set first CE04OSBP-LJ01C-06-CTDBPO108 device to INTEGRATED state
        # passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.INTEGRATE, verify='INTEGRATED')
        #
        #
        # # (optional) Create a third Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with a same device from first deployment
        # deploy_id_3 = self.create_basic_deployment(name='CE04OSBP-LJ01C-06-CTDBPO108_DEP3', description='substitute Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with same device as first')
        # self.IMS.deploy_instrument_device(instrument_device_id=GP03FLMB_RI001_10_CTDMOG999_ID_idevice._id, deployment_id=deploy_id_3)
        # self.OMS.deploy_instrument_site(instrument_site_id=CE04OSBP_LJ01C_06_CTDBPO108_isite._id, deployment_id=deploy_id_3)
        # self.dump_deployment(deploy_id_3)
        #
        #
        # # Set first CE04OSBP-LJ01C-06-CTDBPO108 device to DEPLOYED state
        # passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')
        #
        # # (optional) Activate this third deployment - check second deployment is deactivated
        # log.debug('Activate deployment deploy_id_3')
        # self.dump_deployment(deploy_id_3)
        # self.OMS.activate_deployment(deploy_id_3)
        # #todo: check second deployment is deactivated

        return passing




    def check_data_product_reference(self, reference_designator, output=[]):
        passing = True

        data_product_ids, _ = self.RR.find_resources_ext(alt_id_ns='PRE', alt_id='%s_DPI1' % reference_designator, id_only=True) # Assuming DPI1 is parsed
        passing &= self.assertEquals(len(data_product_ids), 1)

        if not data_product_ids:
            return passing

        # Let's go ahead and activate it
        data_product_id = data_product_ids[0]
        self.dpclient.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.dpclient.suspend_data_product_persistence, data_product_id)

        dataset_ids, _ = self.RR.find_objects(data_product_id, PRED.hasDataset, id_only=True)
        passing &= self.assertEquals(len(dataset_ids), 1)
        if not dataset_ids:
            return passing
        dataset_id = dataset_ids[0]

        stream_def_ids, _ = self.RR.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)
        passing &= self.assertEquals(len(dataset_ids), 1)
        if not stream_def_ids:
            return passing
        stream_def_id = stream_def_ids[0]
        output.append((data_product_id, stream_def_id, dataset_id))
        return passing

    def check_tempsf_instrument_data_product(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing: return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['temperature'] = [[ 25.3884, 26.9384, 24.3394, 23.3401, 22.9832,
            29.4434, 26.9873, 15.2883, 16.3374, 14.5883, 15.7253, 18.4383,
            15.3488, 17.2993, 10.2111, 11.5993, 10.9345, 9.4444, 9.9876,
            10.9834, 11.0098, 5.3456, 4.2994, 4.3009]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.event.wait(20))
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['temperature'], [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009]])
        return passing
    
    def check_trhph_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing:
            return passing

        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(stream_def_id)
        passing &= self.assertEquals(pdict.name, 'trhph_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        # calibration constants
        a = 1.98e-9
        b = -2.45e-6
        c = 9.28e-4
        d = -0.0888
        e = 0.731

        V_s = 1.506
        V_c = 0.
        T = 11.8

        r1 = 0.906
        r2 = 4.095
        r3 = 4.095

        ORP_V = 1.806
        Cl = np.nan

        offset = 2008
        gain = 4.0
        # Normally this would be 50 per the DPS but the precision is %4.0f which truncates the values to the nearest 1...
        ORP = ((ORP_V * 1000.) - offset) / gain

        ntp_now = time.time() + 2208988800

        rdt['cc_a'] = [a]
        rdt['cc_b'] = [b]
        rdt['cc_c'] = [c]
        rdt['cc_d'] = [d]
        rdt['cc_e'] = [e]
        rdt['ref_temp_volts'] = [V_s]
        rdt['resistivity_temp_volts'] = [V_c]
        rdt['eh_sensor'] = [ORP_V]
        rdt['resistivity_5'] = [r1]
        rdt['resistivity_x1'] = [r2]
        rdt['resistivity_x5'] = [r3]
        rdt['cc_offset'] = [offset]
        rdt['cc_gain'] = [gain]
        rdt['time'] = [ntp_now]

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'], [T], 2)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP], 4)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.event.wait(60))
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'], [T], 2)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP], 4)

        return passing

    def check_vel3d_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(stream_def_id)
        self.assertEquals(pdict.name, 'vel3d_b_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        lat = 14.6846
        lon = -51.044
        ts = np.array([3319563600, 3319567200, 3319570800, 3319574400,
            3319578000, 3319581600, 3319585200, 3319588800, 3319592400,
            3319596000], dtype=np.float)

        ve = np.array([ -3.2,  0.1,  0. ,  2.3, -0.1,  5.6,  5.1,  5.8,
            8.8, 10.3])

        vn = np.array([ 18.2,  9.9, 12. ,  6.6, 7.4,  3.4, -2.6,  0.2,
            -1.5,  4.1])
        vu = np.array([-1.1, -0.6, -1.4, -2, -1.7, -2, 1.3, -1.6, -1.1, -4.5])
        ve_expected = np.array([-0.085136, -0.028752, -0.036007, 0.002136,
            -0.023158, 0.043218, 0.056451, 0.054727, 0.088446, 0.085952])
        vn_expected = np.array([ 0.164012,  0.094738,  0.114471,  0.06986,  0.07029,
                    0.049237, -0.009499,  0.019311,  0.012096,  0.070017])
        vu_expected = np.array([-0.011, -0.006, -0.014, -0.02, -0.017, -0.02,
            0.013, -0.016, -0.011, -0.045])

        
        rdt['time'] = ts
        rdt['lat'] = [lat] * 10
        rdt['lon'] = [lon] * 10
        rdt['turbulent_velocity_east'] = ve
        rdt['turbulent_velocity_north'] = vn
        rdt['turbulent_velocity_up'] = vu

        passing &= self.assert_array_almost_equal(rdt['eastward_turbulent_velocity'],
                ve_expected)
        passing &= self.assert_array_almost_equal(rdt['northward_turbulent_velocity'],
                vn_expected)
        passing &= self.assert_array_almost_equal(rdt['upward_turbulent_velocity'],
                vu_expected)


        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.event.wait(20))
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['eastward_turbulent_velocity'],
                ve_expected)
        passing &= self.assert_array_almost_equal(rdt['northward_turbulent_velocity'],
                vn_expected)
        passing &= self.assert_array_almost_equal(rdt['upward_turbulent_velocity'],
                vu_expected)
        return passing

    
    def check_presta_instrument_data_products(self, reference_designator):
        # Check the parsed data product make sure it's got everything it needs and can be published persisted etc.

        # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc
        # Let's go ahead and publish some fake data!!!
        # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf
        # Appendix A. Example 1.
        # p_psia_tide = 14.8670
        # the tide should be 10.2504
        passing = True
        

        info_list = []
        passing &= self.check_data_product_reference(reference_designator, info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800.

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['absolute_pressure'] = [14.8670]
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(dataset_monitor.event.wait(20)) # Bumped to 20 to keep buildbot happy
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        passing &= self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)

        return passing

    def check_rsn_instrument_data_product(self):
        passing = True
        # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products
        # Parsed, Engineering
        # SFLPRES-0 SFLPRES-1
        # Check for the two data products and make sure they have the proper parameters
        # SFLPRES-0 should 
        data_products, _ = self.RR.find_resources_ext(alt_id_ns='PRE', alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID', id_only=True)
        passing &=self.assertTrue(len(data_products)==1)
        if not data_products:
            return passing

        data_product_id = data_products[0]
        
        stream_defs, _ = self.RR.find_objects(data_product_id,PRED.hasStreamDefinition,id_only=False)
        passing &= self.assertTrue(len(stream_defs)==1)
        if not stream_defs:
            return passing

        # Assert that the stream definition has the correct reference designator
        stream_def = stream_defs[0]
        passing &= self.assertEquals(stream_def.stream_configuration['reference_designator'], 'RS03AXBS-MJ03A-06-PRESTA301')

        # Get the pdict and make sure that the parameters corresponding to the available fields 
        # begin with the appropriate data product identifier

        pdict_ids, _ = self.RR.find_objects(stream_def, PRED.hasParameterDictionary, id_only=True)
        passing &= self.assertEquals(len(pdict_ids), 1)
        if not pdict_ids:
            return passing

        pdict_id = pdict_ids[0]
        
        pdict = DatasetManagementService.get_parameter_dictionary(pdict_id)
        available_params = [pdict.get_context(i) for i in pdict.keys() if i in stream_def.available_fields]
        for p in available_params:
            if p.name=='time': # Ignore the domain parameter
                continue
            passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES'))
        passing &= self.check_presta_instrument_data_products('RS01SLBS-MJ01A-06-PRESTA101')
        passing &= self.check_vel3d_instrument_data_products( 'RS01SLBS-MJ01A-12-VEL3DB101')
        passing &= self.check_presta_instrument_data_products('RS03AXBS-MJ03A-06-PRESTA301')
        passing &= self.check_vel3d_instrument_data_products( 'RS03AXBS-MJ03A-12-VEL3DB301')
        passing &= self.check_tempsf_instrument_data_product( 'RS03ASHS-MJ03B-07-TMPSFA301')
        passing &= self.check_vel3d_instrument_data_products( 'RS03INT2-MJ03D-12-VEL3DB304')
        passing &= self.check_trhph_instrument_data_products( 'RS03INT1-MJ03C-10-TRHPHA301')

        self.data_product_management.activate_data_product_persistence(data_product_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(data_product_id)
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)
        self.data_product_management.suspend_data_product_persistence(data_product_id) # Should do nothing and not raise anything

        
        return passing


    def check_glider(self):
        '''
        # Check that glider GP05MOAS-GL001 assembly is defined by OOI preload (3 instruments)
        '''
        passing = True
        GP05MOAS_GL001_device = self.retrieve_ooi_asset('GP05MOAS-GL001_PD')
        child_devs, assns =self.RR.find_objects(subject=GP05MOAS_GL001_device._id, predicate=PRED.hasDevice, id_only=True)
        passing &= self.assertEquals(len(child_devs), 3)

        # Set GP05MOAS-GL001 Deployment to DEPLOYED
        GP05MOAS_GL001_deploy = self.retrieve_ooi_asset('GP05MOAS-GL001_DEP')
        passing &= self.transition_lcs_then_verify(resource_id=GP05MOAS_GL001_deploy._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Activate Deployment for GP05MOAS-GL001
        #self.OMS.activate_deployment(GP05MOAS_GL001_deploy._id)

        # Deactivate Deployment for GP05MOAS-GL001
        #self.OMS.deactivate_deployment(GP05MOAS_GL001_deploy._id)


        # Create a new Deployment resource X without any assignment
        x_deploy_id = self.create_basic_deployment(name='X_Deployment', description='new Deployment resource X without any assignment')

        # Assign Deployment X to site GP05MOAS-GL001
        GP05MOAS_GL001_psite = self.retrieve_ooi_asset('GP05MOAS-GL001')
        self.OMS.deploy_platform_site(GP05MOAS_GL001_psite._id, x_deploy_id)

        # Assign Deployment X to first device for GP05MOAS-GL001
        GP05MOAS_GL001_device = self.retrieve_ooi_asset('GP05MOAS-GL001_PD')
        self.IMS.deploy_platform_device(GP05MOAS_GL001_device._id, x_deploy_id)

        # Set GP05MOAS-GL001 Deployment to PLANNED state
        #self.transition_lcs_then_verify(resource_id=x_deploy_id, new_lcs_state=LCE.PLAN, verify='PLANNED')
        # ??? already in planned

        # Set second GP05MOAS-GL001 Deployment to DEPLOYED
        passing &= self.transition_lcs_then_verify(resource_id=x_deploy_id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')
        self.dump_deployment(x_deploy_id)

        # Activate second Deployment for GP05MOAS-GL001
        #self.OMS.activate_deployment(x_deploy_id)

        # Deactivate second Deployment for GP05MOAS-GL001
        #self.OMS.deactivate_deployment(x_deploy_id)
        return passing


    def check_cg_assembly(self):
        passing = True

        # Set several CE01ISSM-RI002-* instrument devices to DEVELOPED state

        # Assemble several CE01ISSM-RI002-* instruments to a CG CE01ISSM-RI002 component platform

        # Set several CE01ISSM-RI002-* instrument devices to INTEGRATED state

        # Assemble CE01ISSM-RI002 platform to CG CE01ISSM-LM001 station platform

        # Set CE01ISSM-RI002 component device to INTEGRATED state

        # Set CE01ISSM-LM001 station device to INTEGRATED state

        # Set CE01ISSM-LM001 station device to DEPLOYED state (children maybe too?)

        # Set CE01ISSM-LM001 Deployment to DEPLOYED

        # Activate CE01ISSM-LM001 platform assembly deployment


        # Dectivate CE01ISSM-LM001 platform assembly deployment

        # Set CE01ISSM-LM001 Deployment to INTEGRATED state

        # Set CE01ISSM-LM001 station device to INTEGRATED state

        # Set CE01ISSM-RI002 component device to INTEGRATED state

        # Set CE01ISSM-RI002 component device to INTEGRATED state

        # Disassemble CE01ISSM-RI002 platform from CG CE01ISSM-LM001 station platform

        # Disassemble all CE01ISSM-RI002-* instruments from a CG CE01ISSM-RI002 component platform


        # Retire instrument one for CE01ISSM-RI002-*

        # Retire device one for CE01ISSM-RI002

        # Retire device one for CE01ISSM-LM001

        return passing

    # -------------------------------------------------------------------------

    def retrieve_ooi_asset(self, alt_id='', namespace='PRE'):
        dp_list, _  = self.RR.find_resources_ext(alt_id_ns=namespace, alt_id=alt_id)
        self.assertEquals(len(dp_list), 1)
        return dp_list[0]

    def transition_lcs_then_verify(self, resource_id, new_lcs_state, verify):
        ret = self.RR2.advance_lcs(resource_id, new_lcs_state)
        resource_obj = self.RR.read(resource_id)
        return self.assertEquals(resource_obj.lcstate, verify)

    def create_basic_deployment(self, name='', description=''):
        start = IonTime(datetime.datetime(2013,1,1))
        end = IonTime(datetime.datetime(2014,1,1))
        temporal_bounds = IonObject(OT.TemporalBounds, name='planned', start_datetime=start.to_string(), end_datetime=end.to_string())
        deployment_obj = IonObject(RT.Deployment,
            name=name,
            description=description,
            context=IonObject(OT.CabledNodeDeploymentContext),
            constraint_list=[temporal_bounds])
        return self.OMS.create_deployment(deployment_obj)

    def validate_deployment_activated(self, deployment_id=''):
        site_id, device_id = self.get_deployment_ids(deployment_id)
        assocs = self.RR.find_associations(subject=site_id, predicate=PRED.hasDevice, object=device_id)
        return self.assertEquals(len(assocs), 1)

    def validate_deployment_deactivated(self, deployment_id=''):
        site_id, device_id = self.get_deployment_ids(deployment_id)
        assocs = self.RR.find_associations(subject=site_id, predicate=PRED.hasDevice, object=device_id)
        return self.assertEquals(len(assocs), 0)

    def dump_deployment(self, deployment_id='', name=""):
        #site_id, device_id = self.get_deployment_ids(deployment_id)
        resource_list,_ = self.RR.find_subjects(predicate=PRED.hasDeployment, object=deployment_id, id_only=True)
        resource_list.append(deployment_id)
        resources = self.RR.read_mult(resource_list )
        log.debug('---------   dump_deployment %s summary---------------', name)
        for resource in resources:
            log.debug('%s: %s (%s)', resource._get_type(), resource.name, resource._id)

        log.debug('---------   dump_deployment %s full dump ---------------', name)

        for resource in resources:
            log.debug('resource: %s ', resource)
        log.debug('---------   dump_deployment %s end  ---------------', name)


        #assocs = self.container.resource_registry.find_assoctiations(anyside=deployment_id)
#        assocs = Container.instance.resource_registry.find_assoctiations(anyside=deployment_id)
#        log.debug('---------   dump_deployment  ---------------')
#        for assoc in assocs:
#            log.debug('SUBJECT: %s      PREDICATE: %s OBJET: %s', assoc.s, assoc.p, assoc.o)
#        log.debug('---------   dump_deployment  end  ---------------')


    def get_deployment_ids(self, deployment_id=''):
        devices = []
        sites = []
        idevice_list,_ = self.RR.find_subjects(RT.InstrumentDevice, PRED.hasDeployment, deployment_id, id_only=True)
        pdevice_list,_ = self.RR.find_subjects(RT.PlatformDevice, PRED.hasDeployment, deployment_id, id_only=True)
        devices = idevice_list + pdevice_list
        self.assertEquals(1, len(devices))
        isite_list,_ = self.RR.find_subjects(RT.InstrumentSite, PRED.hasDeployment, deployment_id, id_only=True)
        psite_list,_ = self.RR.find_subjects(RT.PlatformSite, PRED.hasDeployment, deployment_id, id_only=True)
        sites = isite_list + psite_list
        self.assertEquals(1, len(sites))
        return sites[0], devices[0]

    def _find_resource_in_list(self, res_list, attr, attr_val, assert_found=True):
        for res in res_list:
            v = getattr(res, attr, None)
            if v == attr_val:
                return res
        if assert_found:
            self.assertTrue(False, "Attribute %s value %s not found in list" % (attr, attr_val))
        return None

    # -------------------------------------------------------------------------

    def _get_caller(self):
        s = inspect.stack()
        return "%s:%s" % (s[2][1], s[2][2])

    @assertion_wrapper
    def assert_array_almost_equal(self, *args, **kwargs):
        np.testing.assert_array_almost_equal(*args, **kwargs)

    @assertion_wrapper
    def assertEquals(self, *args, **kwargs):
        IonIntegrationTestCase.assertEquals(self, *args, **kwargs)

    @assertion_wrapper
    def assertTrue(self, *args, **kwargs):
        IonIntegrationTestCase.assertTrue(self, *args, **kwargs)
class TestActivateInstrumentIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        super(TestActivateInstrumentIntegration, self).setUp()
        config = DotDict()

        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml', config)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubcli =  PubsubManagementServiceClient(node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(node=self.container.node)
        self.datasetclient =  DatasetManagementServiceClient(node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()
        self.usernotificationclient = UserNotificationServiceClient()

        #setup listerner vars
        self._data_greenlets = []
        self._no_samples = None
        self._samples_received = []

        self.event_publisher = EventPublisher()


    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name+'_logger')
        producer_definition.executable = {
            'module':'ion.processes.data.stream_granule_logger',
            'class':'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(process_definition=producer_definition)
        configuration = {
            'process':{
                'stream_id':stream_id,
                }
        }
        pid = self.processdispatchclient.schedule_process(process_definition_id=logger_procdef_id,
                                                            configuration=configuration)

        return pid

    def _create_notification(self, user_name = '', instrument_id='', product_id=''):
        #--------------------------------------------------------------------------------------
        # Make notification request objects
        #--------------------------------------------------------------------------------------

        notification_request_1 = NotificationRequest(   name= 'notification_1',
            origin=instrument_id,
            origin_type="instrument",
            event_type='ResourceLifecycleEvent')

        notification_request_2 = NotificationRequest(   name='notification_2',
            origin=product_id,
            origin_type="data product",
            event_type='DetectionEvent')

        #--------------------------------------------------------------------------------------
        # Create a user and get the user_id
        #--------------------------------------------------------------------------------------

        user = UserInfo()
        user.name = user_name
        user.contact.email = '*****@*****.**' % user_name

        user_id, _ = self.rrclient.create(user)

        #--------------------------------------------------------------------------------------
        # Create notification
        #--------------------------------------------------------------------------------------

        self.usernotificationclient.create_notification(notification=notification_request_1, user_id=user_id)
        self.usernotificationclient.create_notification(notification=notification_request_2, user_id=user_id)
        log.debug( "test_activateInstrumentSample: create_user_notifications user_id %s", str(user_id) )

        return user_id

    def get_datastore(self, dataset_id):
        dataset = self.datasetclient.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def _check_computed_attributes_of_extended_instrument(self, expected_instrument_device_id = '',extended_instrument = None):

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(extended_instrument.computed.last_data_received_datetime, ComputedFloatValue)
        self.assertIsInstance(extended_instrument.computed.uptime, ComputedStringValue)

        self.assertIsInstance(extended_instrument.computed.power_status_roll_up, ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.communications_status_roll_up, ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.data_status_roll_up, ComputedIntValue)
        self.assertIsInstance(extended_instrument.computed.location_status_roll_up, ComputedIntValue)

        # the following assert will not work without elasticsearch.
        #self.assertEqual( 1, len(extended_instrument.computed.user_notification_requests.value) )

        # Verify the computed attribute for user notification requests
        self.assertEqual( 1, len(extended_instrument.computed.user_notification_requests.value) )
        notifications = extended_instrument.computed.user_notification_requests.value
        notification = notifications[0]
        self.assertEqual(expected_instrument_device_id, notification.origin)
        self.assertEqual("instrument", notification.origin_type)
        self.assertEqual('ResourceLifecycleEvent', notification.event_type)


    def _check_computed_attributes_of_extended_product(self, expected_data_product_id = '', extended_data_product = None):

        self.assertEqual(expected_data_product_id, extended_data_product._id)
        log.debug("extended_data_product.computed: %s", extended_data_product.computed)

        # Verify that computed attributes exist for the extended instrument
        self.assertIsInstance(extended_data_product.computed.product_download_size_estimated, ComputedFloatValue)
        self.assertIsInstance(extended_data_product.computed.number_active_subscriptions, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.data_url, ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.stored_data_size, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.recent_granules, ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.parameters, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.recent_events, ComputedEventListValue)

        self.assertIsInstance(extended_data_product.computed.provenance, ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.user_notification_requests, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.active_user_subscriptions, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.past_user_subscriptions, ComputedListValue)
        self.assertIsInstance(extended_data_product.computed.last_granule, ComputedDictValue)
        self.assertIsInstance(extended_data_product.computed.is_persisted, ComputedIntValue)
        self.assertIsInstance(extended_data_product.computed.data_contents_updated, ComputedStringValue)
        self.assertIsInstance(extended_data_product.computed.data_datetime, ComputedListValue)

        # exact text here keeps changing to fit UI capabilities.  keep assertion general...
        self.assertEqual( 2, len(extended_data_product.computed.data_datetime.value) )

        notifications = extended_data_product.computed.user_notification_requests.value

        notification = notifications[0]
        self.assertEqual(expected_data_product_id, notification.origin)
        self.assertEqual("data product", notification.origin_type)
        self.assertEqual('DetectionEvent', notification.event_type)


    @attr('LOCOINT')
    #@unittest.skip('refactoring')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode')
    @patch.dict(CFG, {'endpoint':{'receive':{'timeout': 90}}})
    def test_activateInstrumentSample(self):

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='SBE37IMModel',
                                  description="SBE37IMModel")
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        log.debug( 'new InstrumentModel id = %s ', instModel_id)




        raw_config = StreamConfiguration(stream_name='raw', parameter_dictionary_name='raw')
        parsed_config = StreamConfiguration(stream_name='parsed', parameter_dictionary_name='ctd_parsed_param_dict')


        # Create InstrumentAgent
        instAgent_obj = IonObject(RT.InstrumentAgent,
                                  name='agent007',
                                  description="SBE37IMAgent",
                                  driver_uri=DRV_URI_GOOD,
                                  stream_configurations = [raw_config, parsed_config])
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        log.debug('new InstrumentAgent id = %s', instAgent_id)

        self.imsclient.assign_instrument_model_to_instrument_agent(instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug('test_activateInstrumentSample: Create instrument resource to represent the SBE37 (SA Req: L4-CI-SA-RQ-241) ')
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='SBE37IMDevice',
                                   description="SBE37IMDevice",
                                   serial_number="12345" )
        instDevice_id = self.imsclient.create_instrument_device(instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(instModel_id, instDevice_id)
        log.debug("test_activateInstrumentSample: new InstrumentDevice id = %s (SA Req: L4-CI-SA-RQ-241) " , instDevice_id)


        port_agent_config = {
            'device_addr':  CFG.device.sbe37.host,
            'device_port':  CFG.device.sbe37.port,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'port_agent_addr': 'localhost',
            'command_port': CFG.device.sbe37.port_agent_cmd_port,
            'data_port': CFG.device.sbe37.port_agent_data_port,
            'log_level': 5,
            'type': PortAgentType.ETHERNET
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance, name='SBE37IMAgentInstance',
                                          description="SBE37IMAgentInstance",
                                          port_agent_config = port_agent_config,
                                            alerts= [])


        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(instAgentInstance_obj,
                                                                               instAgent_id,
                                                                               instDevice_id)


        tdom, sdom = time_series_domain()
        sdom = sdom.dump()
        tdom = tdom.dump()


        parsed_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        parsed_stream_def_id = self.pubsubcli.create_stream_definition(name='parsed', parameter_dictionary_id=parsed_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('raw', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(name='raw', parameter_dictionary_id=raw_pdict_id)


        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
            name='the parsed data',
            description='ctd stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=parsed_stream_def_id)
        log.debug( 'new dp_id = %s' , data_product_id1)
        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id1)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id1)



        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug('Data product streams1 = %s', stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasDataset, RT.Dataset, True)
        log.debug('Data set for data_product_id1 = %s' , dataset_ids[0])
        self.parsed_dataset = dataset_ids[0]


        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)


        dp_obj = IonObject(RT.DataProduct,
            name='the raw data',
            description='raw stream test',
            temporal_domain = tdom,
            spatial_domain = sdom)

        data_product_id2 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        log.debug('new dp_id = %s', data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(data_product_id=data_product_id2)

        # setup notifications for the device and parsed data product
        user_id_1 = self._create_notification( user_name='user_1', instrument_id=instDevice_id, product_id=data_product_id1)
        #---------- Create notifications for another user and verify that we see different computed subscriptions for the two users ---------
        user_id_2 = self._create_notification( user_name='user_2', instrument_id=instDevice_id, product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasStream, None, True)
        log.debug('Data product streams2 = %s' , str(stream_ids))

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2, PRED.hasDataset, RT.Dataset, True)
        log.debug('Data set for data_product_id2 = %s' , dataset_ids[0])
        self.raw_dataset = dataset_ids[0]


        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])


        #cleanup
        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
                        instrument_agent_instance_id=instAgentInstance_id)


        #wait for start
        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(instAgentInstance_id)
        gate = AgentProcessStateGate(self.processdispatchclient.read_process,
                                     instDevice_id,
                                     ProcessStateEnum.RUNNING)
        self.assertTrue(gate.await(30), "The instrument agent instance (%s) did not spawn in 30 seconds" %
                                        gate.process_id)

        #log.trace('Instrument agent instance obj: = %s' , str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(instDevice_id,
                                              to_name=gate.process_id,
                                              process=FakeProcess())

        log.debug("test_activateInstrumentSample: got ia client %s" , str(self._ia_client))

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: initialize %s" , str(retval))
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.INACTIVE, state)

        log.debug("(L4-CI-SA-RQ-334): Sending go_active command ")
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrument: return value from go_active %s" , str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.IDLE, state)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug("(L4-CI-SA-RQ-334): current state after sending go_active command %s" , str(state))

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: run %s" , str(reply))
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.COMMAND, state)

        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.STOPPED, state)

        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.COMMAND, state)

        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.IDLE, state)

        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        retval = self._ia_client.execute_agent(cmd)
        state = self._ia_client.get_agent_state()
        self.assertEqual(ResourceAgentState.COMMAND, state)

        for i in xrange(10):
            monitor = DatasetMonitor(dataset_id=self.parsed_dataset)
            self._ia_client.execute_resource(AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE))
            if not monitor.wait():
                raise AssertionError('Failed on the %ith granule' % i)
            monitor.stop()


#        cmd = AgentCommand(command=SBE37ProtocolEvent.ACQUIRE_SAMPLE)
#        for i in xrange(10):
#            retval = self._ia_client.execute_resource(cmd)
#            log.debug("test_activateInstrumentSample: return from sample %s" , str(retval))

        log.debug( "test_activateInstrumentSample: calling reset ")
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activateInstrumentSample: return from reset %s" , str(reply))


        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data_raw = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data_raw, Granule)
        rdt_raw = RecordDictionaryTool.load_from_granule(replay_data_raw)
        log.debug("RDT raw: %s", str(rdt_raw.pretty_print()) )

        self.assertIn('raw', rdt_raw)
        raw_vals = rdt_raw['raw']

        all_raw = "".join(raw_vals)

        # look for 't' entered after a prompt -- ">t"
        t_commands = all_raw.count(">t")

        if 10 != t_commands:
            log.error("%s raw_vals: ", len(raw_vals))
            for i, r in enumerate(raw_vals): log.error("raw val %s: %s", i, [r])
            self.fail("Expected 10 't' strings in raw_vals, got %s" % t_commands)
        else:
            log.debug("%s raw_vals: ", len(raw_vals))
            for i, r in enumerate(raw_vals): log.debug("raw val %s: %s", i, [r])

        replay_data_parsed = self.dataretrieverclient.retrieve(self.parsed_dataset)
        self.assertIsInstance(replay_data_parsed, Granule)
        rdt_parsed = RecordDictionaryTool.load_from_granule(replay_data_parsed)
        log.debug("test_activateInstrumentSample: RDT parsed: %s", str(rdt_parsed.pretty_print()) )
        self.assertIn('temp', rdt_parsed)
        temp_vals = rdt_parsed['temp']
        pressure_vals  = rdt_parsed['pressure']
        if 10 != len(temp_vals):
            log.error("%s temp_vals: %s", len(temp_vals), temp_vals)
            self.fail("Expected 10 temp_vals, got %s" % len(temp_vals))


        log.debug("l4-ci-sa-rq-138")
        """
        Physical resource control shall be subject to policy

        Instrument management control capabilities shall be subject to policy

        The actor accessing the control capabilities must be authorized to send commands.

        note from maurice 2012-05-18: Talk to tim M to verify that this is policy.  If it is then talk with Stephen to
                                      get an example of a policy test and use that to create a test stub that will be
                                      completed when we have instrument policies.

        Tim M: The "actor", aka observatory operator, will access the instrument through ION.

        """


        #--------------------------------------------------------------------------------
        # Get the extended data product to see if it contains the granules
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(data_product_id=data_product_id1, user_id=user_id_1)
        def poller(extended_product):
            return len(extended_product.computed.user_notification_requests.value) == 1

        poll(poller, extended_product, timeout=30)

        self._check_computed_attributes_of_extended_product( expected_data_product_id = data_product_id1, extended_data_product = extended_product)


        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(instrument_device_id=instDevice_id, user_id=user_id_1)

        #--------------------------------------------------------------------------------
        # For the second user, check the extended data product and the extended intrument
        #--------------------------------------------------------------------------------
        extended_product = self.dpclient.get_data_product_extension(data_product_id=data_product_id2, user_id=user_id_2)
        self._check_computed_attributes_of_extended_product(expected_data_product_id = data_product_id2, extended_data_product = extended_product)


        #--------------------------------------------------------------------------------
        # Get the extended instrument
        #--------------------------------------------------------------------------------

        extended_instrument = self.imsclient.get_instrument_device_extension(instrument_device_id=instDevice_id, user_id=user_id_2)
        self._check_computed_attributes_of_extended_instrument(expected_instrument_device_id = instDevice_id, extended_instrument = extended_instrument)

        #--------------------------------------------------------------------------------
        # Deactivate loggers
        #--------------------------------------------------------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        self.dpclient.delete_data_product(data_product_id1)
        self.dpclient.delete_data_product(data_product_id2)
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli           = DataProductManagementServiceClient()
        self.rrclient           = ResourceRegistryServiceClient()
        self.damsclient         = DataAcquisitionManagementServiceClient()
        self.pubsubcli          = PubsubManagementServiceClient()
        self.ingestclient       = IngestionManagementServiceClient()
        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc               = UserNotificationServiceClient()
        self.data_retriever     = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    @attr('EXT')
    @attr('PREP')
    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary._id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        # Generic time-series data domain creation
        tdom, sdom = time_series_domain()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom.dump(), 
            spatial_domain = sdom.dump())

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 10.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 10.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -10.0
        dp_obj.ooi_product_name = "PRODNAME"

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        # Assert that the data product has an associated stream at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertNotEquals(len(stream_ids), 0)

        # Assert that the data product has an associated stream def at this stage
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStreamDefinition, RT.StreamDefinition, True)
        self.assertNotEquals(len(stream_ids), 0)

        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)

        group_names = self.dpsc_cli.get_data_product_group_list()
        self.assertIn("PRODNAME", group_names)


        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 20.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = -20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 20.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = -20.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)


        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 0.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)


        def ion_object_encoder(obj):
            return obj.__dict__


        #test prepare for create
        data_product_data = self.dpsc_cli.prepare_data_product_support()

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, "")
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)
        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 0)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 0)
        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 0)

        #test prepare for update
        data_product_data = self.dpsc_cli.prepare_data_product_support(dp_id)

        #print simplejson.dumps(data_product_data, default=ion_object_encoder, indent= 2)

        self.assertEqual(data_product_data._id, dp_id)
        self.assertEqual(data_product_data.type_, OT.DataProductPrepareSupport)
        self.assertEqual(len(data_product_data.associations['StreamDefinition'].resources), 2)

        self.assertEqual(len(data_product_data.associations['Dataset'].resources), 1)

        self.assertEqual(len(data_product_data.associations['StreamDefinition'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['StreamDefinition'].associated_resources[0].s, dp_id)

        self.assertEqual(len(data_product_data.associations['Dataset'].associated_resources), 1)
        self.assertEqual(data_product_data.associations['Dataset'].associated_resources[0].s, dp_id)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)

        # Assert that there are no associated streams leftover after deleting the data product
        stream_ids, assoc_ids = self.rrclient.find_objects(dp_id, PRED.hasStream, RT.Stream, True)
        self.assertEquals(len(stream_ids), 0)
        self.assertEquals(len(assoc_ids), 0)

        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)


    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))


    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = ['offset_document']
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)
            
        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a':2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0]))


        svm.stored_value_cas('updated_document', {'offset_a':3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id, reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2) # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
class TestDataProductManagementServiceIntegration(IonIntegrationTestCase):

    def setUp(self):
        # Start container
        #print 'instantiating container'
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.dpsc_cli = DataProductManagementServiceClient(node=self.container.node)
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(node=self.container.node)
        self.pubsubcli =  PubsubManagementServiceClient(node=self.container.node)
        self.ingestclient = IngestionManagementServiceClient(node=self.container.node)
        self.process_dispatcher   = ProcessDispatcherServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.unsc = UserNotificationServiceClient()
        self.data_retriever = DataRetrieverServiceClient()

        #------------------------------------------
        # Create the environment
        #------------------------------------------

        datastore_name = CACHE_DATASTORE_NAME
        self.db = self.container.datastore_manager.get_datastore(datastore_name)
        self.stream_def_id = self.pubsubcli.create_stream_definition(name='SBE37_CDM')

        self.process_definitions  = {}
        ingestion_worker_definition = ProcessDefinition(name='ingestion worker')
        ingestion_worker_definition.executable = {
            'module':'ion.processes.data.ingestion.science_granule_ingestion_worker',
            'class' :'ScienceGranuleIngestionWorker'
        }
        process_definition_id = self.process_dispatcher.create_process_definition(process_definition=ingestion_worker_definition)
        self.process_definitions['ingestion_worker'] = process_definition_id

        self.pids = []
        self.exchange_points = []
        self.exchange_names = []

        #------------------------------------------------------------------------------------------------
        # First launch the ingestors
        #------------------------------------------------------------------------------------------------
        self.exchange_space       = 'science_granule_ingestion'
        self.exchange_point       = 'science_data'
        config = DotDict()
        config.process.datastore_name = 'datasets'
        config.process.queue_name = self.exchange_space

        self.exchange_names.append(self.exchange_space)
        self.exchange_points.append(self.exchange_point)

        pid = self.process_dispatcher.schedule_process(self.process_definitions['ingestion_worker'],configuration=config)
        log.debug("the ingestion worker process id: %s", pid)
        self.pids.append(pid)

        self.addCleanup(self.cleaning_up)

    def cleaning_up(self):
        for pid in self.pids:
            log.debug("number of pids to be terminated: %s", len(self.pids))
            try:
                self.process_dispatcher.cancel_process(pid)
                log.debug("Terminated the process: %s", pid)
            except:
                log.debug("could not terminate the process id: %s" % pid)
        IngestionManagementIntTest.clean_subscriptions()

        for xn in self.exchange_names:
            xni = self.container.ex_manager.create_xn_queue(xn)
            xni.delete()
        for xp in self.exchange_points:
            xpi = self.container.ex_manager.create_xp(xp)
            xpi.delete()

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore


    def test_create_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        parameter_dictionary_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=parameter_dictionary_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------

        # Generic time-series data domain creation
        tdom, sdom = time_series_domain()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom.dump(), 
            spatial_domain = sdom.dump())

        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 200.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = 100.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 50.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = 100.0

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product( data_product= dp_obj,
                                            stream_definition_id=ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)
        self.assertEquals(dp_obj.geospatial_point_center.lat, 150.0)
        log.debug('Created data product %s', dp_obj)
        #------------------------------------------------------------------------------------------------
        # test creating a new data product with  a stream definition
        #------------------------------------------------------------------------------------------------
        log.debug('Creating new data product with a stream definition')
        dp_obj = IonObject(RT.DataProduct,
            name='DP2',
            description='some new dp',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        dp_id2 = self.dpsc_cli.create_data_product(dp_obj, ctd_stream_def_id)
        self.dpsc_cli.activate_data_product_persistence(dp_id2)
        log.debug('new dp_id = %s' % dp_id2)

        #------------------------------------------------------------------------------------------------
        #make sure data product is associated with stream def
        #------------------------------------------------------------------------------------------------
        streamdefs = []
        streams, _ = self.rrclient.find_objects(dp_id2, PRED.hasStream, RT.Stream, True)
        for s in streams:
            log.debug("Checking stream %s" % s)
            sdefs, _ = self.rrclient.find_objects(s, PRED.hasStreamDefinition, RT.StreamDefinition, True)
            for sd in sdefs:
                log.debug("Checking streamdef %s" % sd)
                streamdefs.append(sd)
        self.assertIn(ctd_stream_def_id, streamdefs)


        # test reading a non-existent data product
        log.debug('reading non-existent data product')

        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product('some_fake_id')

        # update a data product (tests read also)
        log.debug('Updating data product')
        # first get the existing dp object
        dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # now tweak the object
        dp_obj.description = 'the very first dp'
        dp_obj.geospatial_bounds.geospatial_latitude_limit_north = 300.0
        dp_obj.geospatial_bounds.geospatial_latitude_limit_south = 200.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_east = 150.0
        dp_obj.geospatial_bounds.geospatial_longitude_limit_west = 200.0
        # now write the dp back to the registry
        update_result = self.dpsc_cli.update_data_product(dp_obj)


        # now get the dp back to see if it was updated
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertEquals(dp_obj.description,'the very first dp')
        self.assertEquals(dp_obj.geospatial_point_center.lat, 250.0)
        log.debug('Updated data product %s', dp_obj)

        #test extension
        extended_product = self.dpsc_cli.get_data_product_extension(dp_id)
        self.assertEqual(dp_id, extended_product._id)
        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.product_download_size_estimated.status)
        self.assertEqual(0, extended_product.computed.product_download_size_estimated.value)

        self.assertEqual(ComputedValueAvailability.PROVIDED,
                         extended_product.computed.parameters.status)
        #log.debug("test_create_data_product: parameters %s" % extended_product.computed.parameters.value)

        # now 'delete' the data product
        log.debug("deleting data product: %s" % dp_id)
        self.dpsc_cli.delete_data_product(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)

        # now try to get the deleted dp object
        with self.assertRaises(NotFound):
            dp_obj = self.dpsc_cli.read_data_product(dp_id)

        # Get the events corresponding to the data product
        ret = self.unsc.get_recent_events(resource_id=dp_id)
        events = ret.value

        for event in events:
            log.debug("event time: %s" % event.ts_created)

        self.assertTrue(len(events) > 0)

    def test_data_product_stream_def(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)

        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)
        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        stream_def_id = self.dpsc_cli.get_data_product_stream_definition(dp_id)
        self.assertEquals(ctd_stream_def_id, stream_def_id)



    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects
        tdom, sdom = time_series_domain()

        sdom = sdom.dump()
        tdom = tdom.dump()



        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp',
            temporal_domain = tdom,
            spatial_domain = sdom)

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        self.get_datastore(dataset_ids[0])


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)
Exemplo n.º 24
0
class TestObservatoryManagementFullIntegration(IonIntegrationTestCase):
    def setUp(self):
        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml')

        self.RR = ResourceRegistryServiceClient()
        self.RR2 = EnhancedResourceRegistryClient(self.RR)
        self.OMS = ObservatoryManagementServiceClient()
        self.org_management_service = OrgManagementServiceClient()
        self.IMS = InstrumentManagementServiceClient()
        self.dpclient = DataProductManagementServiceClient()
        self.pubsubcli = PubsubManagementServiceClient()
        self.damsclient = DataAcquisitionManagementServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.data_product_management = DataProductManagementServiceClient()

        self._load_stage = 0
        self._resources = {}

    def preload_ooi(self, stage=STAGE_LOAD_ASSETS):
        # Preloads OOI up to a given stage

        if self._load_stage >= stage:
            return

        if self._load_stage < STAGE_LOAD_ORGS:
            log.info(
                "--------------------------------------------------------------------------------------------------------"
            )
            log.info("Preloading stage: %s (OOIR2 Orgs, users, roles)",
                     STAGE_LOAD_ORGS)
            # load_OOIR2_scenario
            self.container.spawn_process("Loader",
                                         "ion.processes.bootstrap.ion_loader",
                                         "IONLoader",
                                         config=dict(
                                             op="load",
                                             scenario="OOIR2",
                                             path="master",
                                         ))
            self._load_stage = STAGE_LOAD_ORGS

        if self._load_stage < STAGE_LOAD_PARAMS:
            log.info(
                "--------------------------------------------------------------------------------------------------------"
            )
            log.info("Preloading stage: %s (BASE params, streamdefs)",
                     STAGE_LOAD_PARAMS)
            # load_parameter_scenarios
            self.container.spawn_process(
                "Loader",
                "ion.processes.bootstrap.ion_loader",
                "IONLoader",
                config=dict(
                    op="load",
                    scenario="BETA",
                    path="master",
                    categories=
                    "ParameterFunctions,ParameterDefs,ParameterDictionary,StreamDefinition",
                    clearcols="owner_id,org_ids",
                    assets="res/preload/r2_ioc/ooi_assets",
                    parseooi="True",
                ))
            self._load_stage = STAGE_LOAD_PARAMS

        if self._load_stage < STAGE_LOAD_AGENTS:
            log.info(
                "--------------------------------------------------------------------------------------------------------"
            )
            log.info("Preloading stage: %s (OOIR2_I agents, model links)",
                     STAGE_LOAD_AGENTS)
            # load_OOIR2_agents
            self.container.spawn_process("Loader",
                                         "ion.processes.bootstrap.ion_loader",
                                         "IONLoader",
                                         config=dict(
                                             op="load",
                                             scenario="OOIR2_I",
                                             path="master",
                                         ))
            self._load_stage = STAGE_LOAD_AGENTS

        if self._load_stage < STAGE_LOAD_ASSETS:
            log.info(
                "--------------------------------------------------------------------------------------------------------"
            )
            log.info(
                "Preloading stage: %s (OOI assets linked to params, agents)",
                STAGE_LOAD_ASSETS)
            # load_ooi_assets
            self.container.spawn_process(
                "Loader",
                "ion.processes.bootstrap.ion_loader",
                "IONLoader",
                config=dict(
                    op="load",
                    loadooi="True",
                    path="master",
                    assets="res/preload/r2_ioc/ooi_assets",
                    bulk="True",
                    debug="True",
                    ooiuntil="9/1/2013",
                    ooiparams="True",
                    #excludecategories: DataProduct,DataProductLink,Deployment,Workflow,WorkflowDefinition
                ))
            self._load_stage = STAGE_LOAD_ASSETS

        # 'DataProduct,DataProductLink,WorkflowDefinition,ExternalDataProvider,ExternalDatasetModel,ExternalDataset,ExternalDatasetAgent,ExternalDatasetAgentInstance',

    @unittest.skip('Work in progress')
    def test_observatory(self):
        self._load_stage = 0
        self._resources = {}
        passing = True

        self.assertTrue(True)

        # LOAD STEP 1
        self.preload_ooi(stage=STAGE_LOAD_ORGS)

        passing &= self.orguserrole_assertions()

        # LOAD STEP 2
        self.preload_ooi(stage=STAGE_LOAD_PARAMS)

        passing &= self.parameter_assertions()

        # LOAD STEP 3
        self.preload_ooi(stage=STAGE_LOAD_AGENTS)

        passing &= self.agent_assertions()

        # LOAD STEP 4
        self.preload_ooi(stage=STAGE_LOAD_ASSETS)

        # Check OOI preloaded resources to see if they match needs for this test and for correctness
        passing &= self.sites_assertions()
        passing &= self.device_assertions()
        passing &= self.deployment_assertions()

        # Extensive tests on select RSN nodes
        passing &= self.rsn_node_checks()

        # Extensive tests on select RSN instruments
        passing &= self.check_rsn_instrument()

        passing &= self.check_rsn_instrument_data_product()

        # Extensive tests on a glider
        #passing &= self.check_glider()

        # Extensive tests on a CG assembly
        #passing &= self.check_cg_assembly()

        # Add a new instrument agent
        # Add a new instrument agent instance
        # Check DataProducts
        # Check Provenance

        IonIntegrationTestCase.assertTrue(self, passing)

    # -------------------------------------------------------------------------

    def orguserrole_assertions(self):
        passing = True

        passing &= self._check_marine_facility("MF_CGSN")
        passing &= self._check_marine_facility("MF_RSN")
        passing &= self._check_marine_facility("MF_EA")

        return passing

    def _check_marine_facility(self, preload_id):
        passing = True
        log.debug("Checking marine facility %s and associations", preload_id)

        mf_obj = self.retrieve_ooi_asset(preload_id)
        mf_id = mf_obj._id
        self._resources[preload_id] = mf_id

        passing &= self.assertEquals(mf_obj.lcstate, LCS.DEPLOYED)

        res_list, _ = self.RR.find_objects(subject=mf_id,
                                           predicate=PRED.hasMembership,
                                           id_only=True)
        passing &= self.assertTrue(len(res_list) >= 3)

        res_list, _ = self.RR.find_objects(subject=mf_id,
                                           predicate=PRED.hasRole,
                                           id_only=False)
        passing &= self.assertTrue(len(res_list) >= 5)

        passing &= self._check_role_assignments(res_list, "ORG_MANAGER")
        passing &= self._check_role_assignments(res_list,
                                                "OBSERVATORY_OPERATOR")
        passing &= self._check_role_assignments(res_list,
                                                "INSTRUMENT_OPERATOR")

        return passing

    def _check_role_assignments(self, role_list, role_name):
        passing = True
        role_obj = self._find_resource_in_list(role_list, "governance_name",
                                               role_name)
        if role_obj:
            res_list = self.RR.find_subjects(predicate=PRED.hasRole,
                                             object=role_obj._id,
                                             id_only=True)
            passing &= self.assertTrue(len(res_list) >= 1)

        return passing

    def parameter_assertions(self):
        passing = True

        pctx_list, _ = self.RR.find_resources_ext(restype=RT.ParameterContext)
        passing &= self.assertTrue(len(pctx_list) >= 10)

        pdict_list, _ = self.RR.find_resources_ext(
            restype=RT.ParameterDictionary)
        passing &= self.assertTrue(len(pdict_list) >= 10)

        sdef_list, _ = self.RR.find_resources_ext(restype=RT.StreamDefinition)
        passing &= self.assertTrue(len(sdef_list) >= 10)

        # Verify that a PDict has the appropriate QC parameters defined
        pdicts, _ = self.RR.find_resources_ext(restype=RT.ParameterDictionary,
                                               alt_id_ns='PRE',
                                               alt_id='DICT110')
        passing &= self.assertTrue(len(pdicts) == 1)
        if not pdicts:
            return passing
        pdict = pdicts[0]

        # According to the latest SAF, density should NOT have trend

        parameters, _ = self.RR.find_objects(pdict, PRED.hasParameterContext)
        names = [i.name for i in parameters if i.name.startswith('density')]
        passing &= self.assertTrue('density_trndtst_qc' not in names)

        return passing

    def agent_assertions(self):
        passing = True

        # TODO: More tests?

        return passing

    def sites_assertions(self):
        passing = True
        observatory_list, _ = self.RR.find_resources_ext(
            restype=RT.Observatory)
        passing &= self.assertTrue(len(observatory_list) >= 40)
        for obs in observatory_list:
            passing &= self.assertEquals(obs.lcstate, LCS.DEPLOYED)

        platform_site_list, _ = self.RR.find_resources(RT.PlatformSite,
                                                       id_only=False)
        log.debug('platform sites: %s', [ps.name for ps in platform_site_list])
        passing &= self.assertTrue(len(platform_site_list) >= 30)

        return passing

    def device_assertions(self):
        passing = True
        platform_device_list, _ = self.RR.find_resources(RT.PlatformDevice,
                                                         id_only=False)
        passing &= self.assertTrue(len(platform_device_list) >= 30)
        for pdev in platform_device_list:
            log.debug('platform device: %s', pdev.name)
            passing &= self.assertEquals(pdev.lcstate, LCS.PLANNED)

        platform_agent_list, _ = self.RR.find_resources(RT.PlatformAgent,
                                                        id_only=False)
        passing &= self.assertTrue(len(platform_agent_list) >= 2)
        for pagent in platform_agent_list:
            log.debug('platform agent: %s', pagent.name)
            passing &= self.assertEquals(pagent.lcstate, LCS.DEPLOYED)

        instrument_agent_list, _ = self.RR.find_resources(RT.InstrumentAgent,
                                                          id_only=False)
        passing &= self.assertTrue(len(instrument_agent_list) >= 3)
        for iagent in instrument_agent_list:
            log.debug('instrument agent: %s', iagent.name)
            passing &= self.assertEquals(iagent.lcstate, LCS.DEPLOYED)

            model_list, _ = self.RR.find_objects(subject=iagent._id,
                                                 predicate=PRED.hasModel,
                                                 id_only=True)
            passing &= self.assertTrue(
                len(model_list) >= 1, "IA %s" % iagent.name)

        return passing

    def deployment_assertions(self):
        passing = True
        deployment_list, _ = self.RR.find_resources(RT.Deployment,
                                                    id_only=False)
        passing &= self.assertTrue(len(deployment_list) >= 30)
        for deploy in deployment_list:
            log.debug('deployment: %s', deploy.name)
            passing &= self.assertEquals(deploy.lcstate, LCS.DEPLOYED)
        return passing

    def rsn_node_checks(self):
        """
        Current preload creates:
        - PlatformDevice in PLANNED
        - PlatformSite in DEPLOYED
        - Deployment in DEPLOYED
        - Deployment is NOT activated
        """
        passing = True

        dp_obj = self.retrieve_ooi_asset("CE04OSHY-PN01C_DEP")

        passing &= self.assertEquals(dp_obj.lcstate, LCS.DEPLOYED)
        passing &= self.assertEquals(dp_obj.availability, AS.AVAILABLE)
        log.debug(
            'test_observatory  retrieve CE04OSHY-PN01C_DEP deployment:  %s',
            dp_obj)

        # Check existing RSN node CE04OSHY-LV01C Deployment (PLANNED lcstate)
        CE04OSHY_LV01C_deployment = self.retrieve_ooi_asset(
            'CE04OSHY-LV01C_DEP')
        passing &= self.assertEquals(CE04OSHY_LV01C_deployment.lcstate,
                                     LCS.DEPLOYED)
        passing &= self.assertEquals(CE04OSHY_LV01C_deployment.availability,
                                     AS.AVAILABLE)

        #self.dump_deployment(CE04OSHY_LV01C_deployment._id)
        log.debug(
            'test_observatory  retrieve RSN node CE04OSHY-LV01C Deployment:  %s',
            CE04OSHY_LV01C_deployment)

        CE04OSHY_LV01C_device = self.retrieve_ooi_asset('CE04OSHY-LV01C_PD')

        # Set CE04OSHY-LV01C device to DEVELOPED state
        passing &= self.transition_lcs_then_verify(
            resource_id=CE04OSHY_LV01C_device._id,
            new_lcs_state=LCE.DEVELOP,
            verify=LCS.DEVELOPED)

        # Set CE04OSHY-LV01C device to INTEGRATED state
        passing &= self.transition_lcs_then_verify(
            resource_id=CE04OSHY_LV01C_device._id,
            new_lcs_state=LCE.INTEGRATE,
            verify=LCS.INTEGRATED)

        # Set CE04OSHY-LV01C device to DEPLOYED state
        passing &= self.transition_lcs_then_verify(
            resource_id=CE04OSHY_LV01C_device._id,
            new_lcs_state=LCE.DEPLOY,
            verify=LCS.DEPLOYED)

        # Set CE04OSHY-LV01C Deployment to DEPLOYED state
        # NOTE: Deployments are created in DEPLOYED state, currently
        #self.transition_lcs_then_verify(resource_id=CE04OSHY_LV01C_deployment._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Activate Deployment for CE04OSHY-LV01C
        self.OMS.activate_deployment(CE04OSHY_LV01C_deployment._id)
        log.debug(
            '---------    activate_deployment CE04OSHY_LV01C_deployment -------------- '
        )
        self.dump_deployment(CE04OSHY_LV01C_deployment._id)
        passing &= self.validate_deployment_activated(
            CE04OSHY_LV01C_deployment._id)

        # (optional) Start CE04OSHY-LV01C platform agent with simulator

        # NOTE: DataProduct is generated in DEPLOYED state
        # # Set DataProduct for CE04OSHY-LV01C platform to DEPLOYED state
        # output_data_product_ids, assns =self.RR.find_objects(subject=CE04OSHY_LV01C_device._id, predicate=PRED.hasOutputProduct, id_only=True)
        # if output_data_product_ids:
        #     #self.assertEquals(len(child_devs), 3)
        #     for output_data_product_id in output_data_product_ids:
        #         log.debug('DataProduct for CE04OSHY-LV01C platform:  %s', output_data_product_id)
        #         self.transition_lcs_then_verify(resource_id=output_data_product_id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Check events for CE04OSHY-LV01C platform

        # Check existing CE04OSBP-LJ01C Deployment (PLANNED lcstate)
        #        dp_list, _  = self.RR.find_resources_ext(alt_id_ns="PRE", alt_id="CE04OSBP-LJ01C_DEP")
        #        self.assertEquals(len(dp_list), 1)
        #        CE04OSHY_LV01C_deployment = dp_list[0]
        #        self.assertEquals(CE04OSHY_LV01C_deployment.lcstate, 'PLANNED')
        #        log.debug('test_observatory  retrieve RSN node CE04OSBP-LJ01C Deployment:  %s', CE04OSHY_LV01C_deployment)

        # Set CE04OSBP-LJ01C Deployment to DEPLOYED state

        # Update description and other attributes for CE04OSBP-LJ01C device resource

        # Create attachment (JPG image) for CE04OSBP-LJ01C device resource

        # Activate Deployment for CE04OSBP-LJ01C

        # (optional) Add/register CE04OSBP-LJ01C platform agent to parent agent

        # (optional) Start CE04OSBP-LJ01C platform agent

        return passing

    def check_rsn_instrument(self):
        """
        Check existing RSN instrument CE04OSBP-LJ01C-06-CTDBPO108 Deployment (PLANNED lcstate)
        Current preload creates:
        - InstrumentDevice in PLANNED
        - InstrumentSite in DEPLOYED
        - Deployment in DEPLOYED
        - Deployment is activated
        """

        passing = True
        CE04OSBP_LJ01C_06_CTDBPO108_deploy = self.retrieve_ooi_asset(
            'CE04OSBP-LJ01C-06-CTDBPO108_DEP')
        self.dump_deployment(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        #passing &= self.assertEquals(CE04OSBP_LJ01C_06_CTDBPO108_deploy.lcstate, 'PLANNED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 device to DEVELOPED state
        CE04OSBP_LJ01C_06_CTDBPO108_device = self.retrieve_ooi_asset(
            'CE04OSBP-LJ01C-06-CTDBPO108_ID')
        passing &= self.transition_lcs_then_verify(
            resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id,
            new_lcs_state=LCE.DEVELOP,
            verify='DEVELOPED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 device to INTEGRATED state
        passing &= self.transition_lcs_then_verify(
            resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id,
            new_lcs_state=LCE.INTEGRATE,
            verify='INTEGRATED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 device to DEPLOYED state
        passing &= self.transition_lcs_then_verify(
            resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id,
            new_lcs_state=LCE.DEPLOY,
            verify='DEPLOYED')

        # Set CE04OSBP-LJ01C-06-CTDBPO108 Deployment to DEPLOYED state
        #self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_deploy._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')

        # Activate Deployment for CE04OSBP-LJ01C-06-CTDBPO108 instrument
        log.debug(
            '---------    activate_deployment CE04OSBP-LJ01C-06-CTDBPO108 deployment -------------- '
        )
        self.OMS.activate_deployment(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        passing &= self.validate_deployment_activated(
            CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)

        # (optional) Add/register CE04OSBP-LJ01C-06-CTDBPO108 instrument agent to parent agent

        # (optional) Start CE04OSBP-LJ01C-06-CTDBPO108 instrument agent with simulator

        # Set all DataProducts for CE04OSBP-LJ01C-06-CTDBPO108 to DEPLOYED state

        # (optional) Create a substitute Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with a comparable device
        CE04OSBP_LJ01C_06_CTDBPO108_isite = self.retrieve_ooi_asset(
            'CE04OSBP-LJ01C-06-CTDBPO108')

        ## create device here: retrieve CTD Mooring on Mooring Riser 001 - similiar?
        GP03FLMB_RI001_10_CTDMOG999_ID_idevice = self.retrieve_ooi_asset(
            'GP03FLMB-RI001-10-CTDMOG999_ID')

        deploy_id_2 = self.create_basic_deployment(
            name='CE04OSBP-LJ01C-06-CTDBPO108_DEP2',
            description=
            'substitute Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with a comparable device'
        )
        self.OMS.assign_device_to_deployment(
            instrument_device_id=GP03FLMB_RI001_10_CTDMOG999_ID_idevice._id,
            deployment_id=deploy_id_2)
        self.OMS.assign_site_to_deployment(
            instrument_site_id=CE04OSBP_LJ01C_06_CTDBPO108_isite._id,
            deployment_id=deploy_id_2)
        self.dump_deployment(deploy_id_2)

        # (optional) Activate this second deployment - check first deployment is deactivated
        self.OMS.deactivate_deployment(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        passing &= self.validate_deployment_deactivated(
            CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)

        # log.debug('Activate deployment deploy_id_2')
        # self.get_deployment_ids(deploy_id_2)
        # self.dump_deployment(deploy_id_2, "deploy_id_2")
        # self.OMS.activate_deployment(deploy_id_2)
        # passing &= self.validate_deployment_deactivated(CE04OSBP_LJ01C_06_CTDBPO108_deploy._id)
        #
        # # (optional) Set first CE04OSBP-LJ01C-06-CTDBPO108 Deployment to INTEGRATED state
        # passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_deploy._id, new_lcs_state=LCE.INTEGRATE, verify='INTEGRATED')
        #
        # # Set first CE04OSBP-LJ01C-06-CTDBPO108 device to INTEGRATED state
        # passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.INTEGRATE, verify='INTEGRATED')
        #
        #
        # # (optional) Create a third Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with a same device from first deployment
        # deploy_id_3 = self.create_basic_deployment(name='CE04OSBP-LJ01C-06-CTDBPO108_DEP3', description='substitute Deployment for site CE04OSBP-LJ01C-06-CTDBPO108 with same device as first')
        # self.IMS.deploy_instrument_device(instrument_device_id=GP03FLMB_RI001_10_CTDMOG999_ID_idevice._id, deployment_id=deploy_id_3)
        # self.OMS.deploy_instrument_site(instrument_site_id=CE04OSBP_LJ01C_06_CTDBPO108_isite._id, deployment_id=deploy_id_3)
        # self.dump_deployment(deploy_id_3)
        #
        #
        # # Set first CE04OSBP-LJ01C-06-CTDBPO108 device to DEPLOYED state
        # passing &= self.transition_lcs_then_verify(resource_id=CE04OSBP_LJ01C_06_CTDBPO108_device._id, new_lcs_state=LCE.DEPLOY, verify='DEPLOYED')
        #
        # # (optional) Activate this third deployment - check second deployment is deactivated
        # log.debug('Activate deployment deploy_id_3')
        # self.dump_deployment(deploy_id_3)
        # self.OMS.activate_deployment(deploy_id_3)
        # #todo: check second deployment is deactivated

        return passing

    def check_data_product_reference(self, reference_designator, output=[]):
        passing = True

        data_product_ids, _ = self.RR.find_resources_ext(
            alt_id_ns='PRE',
            alt_id='%s_DPI1' % reference_designator,
            id_only=True)  # Assuming DPI1 is parsed
        passing &= self.assertEquals(len(data_product_ids), 1)

        if not data_product_ids:
            return passing

        # Let's go ahead and activate it
        data_product_id = data_product_ids[0]
        self.dpclient.activate_data_product_persistence(data_product_id)
        self.addCleanup(self.dpclient.suspend_data_product_persistence,
                        data_product_id)

        dataset_ids, _ = self.RR.find_objects(data_product_id,
                                              PRED.hasDataset,
                                              id_only=True)
        passing &= self.assertEquals(len(dataset_ids), 1)
        if not dataset_ids:
            return passing
        dataset_id = dataset_ids[0]

        stream_def_ids, _ = self.RR.find_objects(data_product_id,
                                                 PRED.hasStreamDefinition,
                                                 id_only=True)
        passing &= self.assertEquals(len(dataset_ids), 1)
        if not stream_def_ids:
            return passing
        stream_def_id = stream_def_ids[0]
        output.append((data_product_id, stream_def_id, dataset_id))
        return passing

    def check_tempsf_instrument_data_product(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing: return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['temperature'] = [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009
        ]]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['temperature'], [[
            25.3884, 26.9384, 24.3394, 23.3401, 22.9832, 29.4434, 26.9873,
            15.2883, 16.3374, 14.5883, 15.7253, 18.4383, 15.3488, 17.2993,
            10.2111, 11.5993, 10.9345, 9.4444, 9.9876, 10.9834, 11.0098,
            5.3456, 4.2994, 4.3009
        ]])
        return passing

    def check_trhph_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing

        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(
            stream_def_id)
        passing &= self.assertEquals(pdict.name, 'trhph_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        # calibration constants
        a = 1.98e-9
        b = -2.45e-6
        c = 9.28e-4
        d = -0.0888
        e = 0.731

        V_s = 1.506
        V_c = 0.
        T = 11.8

        r1 = 0.906
        r2 = 4.095
        r3 = 4.095

        ORP_V = 1.806
        Cl = np.nan

        offset = 2008
        gain = 4.0
        # Normally this would be 50 per the DPS but the precision is %4.0f which truncates the values to the nearest 1...
        ORP = ((ORP_V * 1000.) - offset) / gain

        ntp_now = time.time() + 2208988800

        rdt['cc_a'] = [a]
        rdt['cc_b'] = [b]
        rdt['cc_c'] = [c]
        rdt['cc_d'] = [d]
        rdt['cc_e'] = [e]
        rdt['ref_temp_volts'] = [V_s]
        rdt['resistivity_temp_volts'] = [V_c]
        rdt['eh_sensor'] = [ORP_V]
        rdt['resistivity_5'] = [r1]
        rdt['resistivity_x1'] = [r2]
        rdt['resistivity_x5'] = [r3]
        rdt['cc_offset'] = [offset]
        rdt['cc_gain'] = [gain]
        rdt['time'] = [ntp_now]

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'],
                                                  [T], 2)
        passing &= self.assert_array_almost_equal(
            rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP],
                                                  4)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)

        passing &= self.assert_array_almost_equal(rdt['vent_fluid_temperaure'],
                                                  [T], 2)
        passing &= self.assert_array_almost_equal(
            rdt['vent_fluid_chloride_conc'], [Cl], 4)
        passing &= self.assert_array_almost_equal(rdt['vent_fluid_orp'], [ORP],
                                                  4)

        return passing

    def check_vel3d_instrument_data_products(self, reference_designator):
        passing = True
        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        pdict = self.RR2.find_parameter_dictionary_of_stream_definition_using_has_parameter_dictionary(
            stream_def_id)
        self.assertEquals(pdict.name, 'vel3d_b_sample')

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        lat = 14.6846
        lon = -51.044
        ts = np.array([
            3319563600, 3319567200, 3319570800, 3319574400, 3319578000,
            3319581600, 3319585200, 3319588800, 3319592400, 3319596000
        ],
                      dtype=np.float)

        ve = np.array([-3.2, 0.1, 0., 2.3, -0.1, 5.6, 5.1, 5.8, 8.8, 10.3])

        vn = np.array([18.2, 9.9, 12., 6.6, 7.4, 3.4, -2.6, 0.2, -1.5, 4.1])
        vu = np.array([-1.1, -0.6, -1.4, -2, -1.7, -2, 1.3, -1.6, -1.1, -4.5])
        ve_expected = np.array([
            -0.085136, -0.028752, -0.036007, 0.002136, -0.023158, 0.043218,
            0.056451, 0.054727, 0.088446, 0.085952
        ])
        vn_expected = np.array([
            0.164012, 0.094738, 0.114471, 0.06986, 0.07029, 0.049237,
            -0.009499, 0.019311, 0.012096, 0.070017
        ])
        vu_expected = np.array([
            -0.011, -0.006, -0.014, -0.02, -0.017, -0.02, 0.013, -0.016,
            -0.011, -0.045
        ])

        rdt['time'] = ts
        rdt['lat'] = [lat] * 10
        rdt['lon'] = [lon] * 10
        rdt['turbulent_velocity_east'] = ve
        rdt['turbulent_velocity_north'] = vn
        rdt['turbulent_velocity_up'] = vu

        passing &= self.assert_array_almost_equal(
            rdt['eastward_turbulent_velocity'], ve_expected)
        passing &= self.assert_array_almost_equal(
            rdt['northward_turbulent_velocity'], vn_expected)
        passing &= self.assert_array_almost_equal(
            rdt['upward_turbulent_velocity'], vu_expected)

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        passing &= self.assertTrue(dataset_monitor.wait())
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(
            rdt['eastward_turbulent_velocity'], ve_expected)
        passing &= self.assert_array_almost_equal(
            rdt['northward_turbulent_velocity'], vn_expected)
        passing &= self.assert_array_almost_equal(
            rdt['upward_turbulent_velocity'], vu_expected)
        return passing

    def check_presta_instrument_data_products(self, reference_designator):
        # Check the parsed data product make sure it's got everything it needs and can be published persisted etc.

        # Absolute Pressure (SFLPRES_L0) is what comes off the instrumnet, SFLPRES_L1 is a pfunc
        # Let's go ahead and publish some fake data!!!
        # According to https://alfresco.oceanobservatories.org/alfresco/d/d/workspace/SpacesStore/63e16865-9d9e-4b11-b0b3-d5658faa5080/1341-00230_Data_Product_Spec_SFLPRES_OOI.pdf
        # Appendix A. Example 1.
        # p_psia_tide = 14.8670
        # the tide should be 10.2504
        passing = True

        info_list = []
        passing &= self.check_data_product_reference(reference_designator,
                                                     info_list)
        if not passing:
            return passing
        data_product_id, stream_def_id, dataset_id = info_list.pop()

        now = time.time()
        ntp_now = now + 2208988800.

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [ntp_now]
        rdt['absolute_pressure'] = [14.8670]
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'],
                                                  [10.2504], 4)
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        ParameterHelper.publish_rdt_to_data_product(data_product_id, rdt)
        self.assertTrue(
            dataset_monitor.wait())  # Bumped to 20 to keep buildbot happy
        if not passing: return passing

        granule = self.data_retriever.retrieve(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(granule)
        passing &= self.assert_array_almost_equal(rdt['time'], [ntp_now])
        passing &= self.assert_array_almost_equal(rdt['seafloor_pressure'],
                                                  [10.2504], 4)
        passing &= self.assert_array_almost_equal(rdt['absolute_pressure'],
                                                  [14.8670], 4)

        return passing

    def check_rsn_instrument_data_product(self):
        passing = True
        # for RS03AXBS-MJ03A-06-PRESTA301 (PREST-A) there are a few listed data products
        # Parsed, Engineering
        # SFLPRES-0 SFLPRES-1
        # Check for the two data products and make sure they have the proper parameters
        # SFLPRES-0 should
        data_products, _ = self.RR.find_resources_ext(
            alt_id_ns='PRE',
            alt_id='RS03AXBS-MJ03A-06-PRESTA301_SFLPRES_L0_DPID',
            id_only=True)
        passing &= self.assertTrue(len(data_products) == 1)
        if not data_products:
            return passing

        data_product_id = data_products[0]

        stream_defs, _ = self.RR.find_objects(data_product_id,
                                              PRED.hasStreamDefinition,
                                              id_only=False)
        passing &= self.assertTrue(len(stream_defs) == 1)
        if not stream_defs:
            return passing

        # Assert that the stream definition has the correct reference designator
        stream_def = stream_defs[0]
        passing &= self.assertEquals(
            stream_def.stream_configuration['reference_designator'],
            'RS03AXBS-MJ03A-06-PRESTA301')

        # Get the pdict and make sure that the parameters corresponding to the available fields
        # begin with the appropriate data product identifier

        pdict_ids, _ = self.RR.find_objects(stream_def,
                                            PRED.hasParameterDictionary,
                                            id_only=True)
        passing &= self.assertEquals(len(pdict_ids), 1)
        if not pdict_ids:
            return passing

        pdict_id = pdict_ids[0]

        pdict = DatasetManagementService.get_parameter_dictionary(pdict_id)
        available_params = [
            pdict.get_context(i) for i in pdict.keys()
            if i in stream_def.available_fields
        ]
        for p in available_params:
            if p.name == 'time':  # Ignore the domain parameter
                continue
            passing &= self.assertTrue(p.ooi_short_name.startswith('SFLPRES'))
        passing &= self.check_presta_instrument_data_products(
            'RS01SLBS-MJ01A-06-PRESTA101')
        passing &= self.check_vel3d_instrument_data_products(
            'RS01SLBS-MJ01A-12-VEL3DB101')
        passing &= self.check_presta_instrument_data_products(
            'RS03AXBS-MJ03A-06-PRESTA301')
        passing &= self.check_vel3d_instrument_data_products(
            'RS03AXBS-MJ03A-12-VEL3DB301')
        passing &= self.check_tempsf_instrument_data_product(
            'RS03ASHS-MJ03B-07-TMPSFA301')
        passing &= self.check_vel3d_instrument_data_products(
            'RS03INT2-MJ03D-12-VEL3DB304')
        passing &= self.check_trhph_instrument_data_products(
            'RS03INT1-MJ03C-10-TRHPHA301')

        self.data_product_management.activate_data_product_persistence(
            data_product_id)
        dataset_id = self.RR2.find_dataset_id_of_data_product_using_has_dataset(
            data_product_id)
        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assert_array_almost_equal(rdt['seafloor_pressure'], [10.2504], 4)
        self.assert_array_almost_equal(rdt['absolute_pressure'], [14.8670], 4)
        self.data_product_management.suspend_data_product_persistence(
            data_product_id)  # Should do nothing and not raise anything

        return passing

    def check_glider(self):
        '''
        # Check that glider GP05MOAS-GL001 assembly is defined by OOI preload (3 instruments)
        '''
        passing = True
        GP05MOAS_GL001_device = self.retrieve_ooi_asset('GP05MOAS-GL001_PD')
        child_devs, assns = self.RR.find_objects(
            subject=GP05MOAS_GL001_device._id,
            predicate=PRED.hasDevice,
            id_only=True)
        passing &= self.assertEquals(len(child_devs), 3)

        # Set GP05MOAS-GL001 Deployment to DEPLOYED
        GP05MOAS_GL001_deploy = self.retrieve_ooi_asset('GP05MOAS-GL001_DEP')
        passing &= self.transition_lcs_then_verify(
            resource_id=GP05MOAS_GL001_deploy._id,
            new_lcs_state=LCE.DEPLOY,
            verify='DEPLOYED')

        # Activate Deployment for GP05MOAS-GL001
        #self.OMS.activate_deployment(GP05MOAS_GL001_deploy._id)

        # Deactivate Deployment for GP05MOAS-GL001
        #self.OMS.deactivate_deployment(GP05MOAS_GL001_deploy._id)

        # Create a new Deployment resource X without any assignment
        x_deploy_id = self.create_basic_deployment(
            name='X_Deployment',
            description='new Deployment resource X without any assignment')

        # Assign Deployment X to site GP05MOAS-GL001
        GP05MOAS_GL001_psite = self.retrieve_ooi_asset('GP05MOAS-GL001')
        self.OMS.assign_site_to_deployment(GP05MOAS_GL001_psite._id,
                                           x_deploy_id)

        # Assign Deployment X to first device for GP05MOAS-GL001
        GP05MOAS_GL001_device = self.retrieve_ooi_asset('GP05MOAS-GL001_PD')
        self.OMS.assign_device_to_deployment(GP05MOAS_GL001_device._id,
                                             x_deploy_id)

        # Set GP05MOAS-GL001 Deployment to PLANNED state
        #self.transition_lcs_then_verify(resource_id=x_deploy_id, new_lcs_state=LCE.PLAN, verify='PLANNED')
        # ??? already in planned

        # Set second GP05MOAS-GL001 Deployment to DEPLOYED
        passing &= self.transition_lcs_then_verify(resource_id=x_deploy_id,
                                                   new_lcs_state=LCE.DEPLOY,
                                                   verify='DEPLOYED')
        self.dump_deployment(x_deploy_id)

        # Activate second Deployment for GP05MOAS-GL001
        #self.OMS.activate_deployment(x_deploy_id)

        # Deactivate second Deployment for GP05MOAS-GL001
        #self.OMS.deactivate_deployment(x_deploy_id)
        return passing

    def check_cg_assembly(self):
        passing = True

        # Set several CE01ISSM-RI002-* instrument devices to DEVELOPED state

        # Assemble several CE01ISSM-RI002-* instruments to a CG CE01ISSM-RI002 component platform

        # Set several CE01ISSM-RI002-* instrument devices to INTEGRATED state

        # Assemble CE01ISSM-RI002 platform to CG CE01ISSM-LM001 station platform

        # Set CE01ISSM-RI002 component device to INTEGRATED state

        # Set CE01ISSM-LM001 station device to INTEGRATED state

        # Set CE01ISSM-LM001 station device to DEPLOYED state (children maybe too?)

        # Set CE01ISSM-LM001 Deployment to DEPLOYED

        # Activate CE01ISSM-LM001 platform assembly deployment

        # Dectivate CE01ISSM-LM001 platform assembly deployment

        # Set CE01ISSM-LM001 Deployment to INTEGRATED state

        # Set CE01ISSM-LM001 station device to INTEGRATED state

        # Set CE01ISSM-RI002 component device to INTEGRATED state

        # Set CE01ISSM-RI002 component device to INTEGRATED state

        # Disassemble CE01ISSM-RI002 platform from CG CE01ISSM-LM001 station platform

        # Disassemble all CE01ISSM-RI002-* instruments from a CG CE01ISSM-RI002 component platform

        # Retire instrument one for CE01ISSM-RI002-*

        # Retire device one for CE01ISSM-RI002

        # Retire device one for CE01ISSM-LM001

        return passing

    # -------------------------------------------------------------------------

    def retrieve_ooi_asset(self, alt_id='', namespace='PRE'):
        dp_list, _ = self.RR.find_resources_ext(alt_id_ns=namespace,
                                                alt_id=alt_id)
        self.assertEquals(len(dp_list), 1)
        return dp_list[0]

    def transition_lcs_then_verify(self, resource_id, new_lcs_state, verify):
        ret = self.RR2.advance_lcs(resource_id, new_lcs_state)
        resource_obj = self.RR.read(resource_id)
        return self.assertEquals(resource_obj.lcstate, verify)

    def create_basic_deployment(self, name='', description=''):
        start = IonTime(datetime.datetime(2013, 1, 1))
        end = IonTime(datetime.datetime(2014, 1, 1))
        temporal_bounds = IonObject(OT.TemporalBounds,
                                    name='planned',
                                    start_datetime=start.to_string(),
                                    end_datetime=end.to_string())
        deployment_obj = IonObject(RT.Deployment,
                                   name=name,
                                   description=description,
                                   context=IonObject(
                                       OT.CabledNodeDeploymentContext),
                                   constraint_list=[temporal_bounds])
        return self.OMS.create_deployment(deployment_obj)

    def validate_deployment_activated(self, deployment_id=''):
        site_id, device_id = self.get_deployment_ids(deployment_id)
        assocs = self.RR.find_associations(subject=site_id,
                                           predicate=PRED.hasDevice,
                                           object=device_id)
        return self.assertEquals(len(assocs), 1)

    def validate_deployment_deactivated(self, deployment_id=''):
        site_id, device_id = self.get_deployment_ids(deployment_id)
        assocs = self.RR.find_associations(subject=site_id,
                                           predicate=PRED.hasDevice,
                                           object=device_id)
        return self.assertEquals(len(assocs), 0)

    def dump_deployment(self, deployment_id='', name=""):
        #site_id, device_id = self.get_deployment_ids(deployment_id)
        resource_list, _ = self.RR.find_subjects(predicate=PRED.hasDeployment,
                                                 object=deployment_id,
                                                 id_only=True)
        resource_list.append(deployment_id)
        resources = self.RR.read_mult(resource_list)
        log.debug('---------   dump_deployment %s summary---------------',
                  name)
        for resource in resources:
            log.debug('%s: %s (%s)', resource._get_type(), resource.name,
                      resource._id)

        log.debug('---------   dump_deployment %s full dump ---------------',
                  name)

        for resource in resources:
            log.debug('resource: %s ', resource)
        log.debug('---------   dump_deployment %s end  ---------------', name)

        #assocs = self.container.resource_registry.find_assoctiations(anyside=deployment_id)
#        assocs = Container.instance.resource_registry.find_assoctiations(anyside=deployment_id)
#        log.debug('---------   dump_deployment  ---------------')
#        for assoc in assocs:
#            log.debug('SUBJECT: %s      PREDICATE: %s OBJET: %s', assoc.s, assoc.p, assoc.o)
#        log.debug('---------   dump_deployment  end  ---------------')

    def get_deployment_ids(self, deployment_id=''):
        devices = []
        sites = []
        idevice_list, _ = self.RR.find_subjects(RT.InstrumentDevice,
                                                PRED.hasDeployment,
                                                deployment_id,
                                                id_only=True)
        pdevice_list, _ = self.RR.find_subjects(RT.PlatformDevice,
                                                PRED.hasDeployment,
                                                deployment_id,
                                                id_only=True)
        devices = idevice_list + pdevice_list
        self.assertEquals(1, len(devices))
        isite_list, _ = self.RR.find_subjects(RT.InstrumentSite,
                                              PRED.hasDeployment,
                                              deployment_id,
                                              id_only=True)
        psite_list, _ = self.RR.find_subjects(RT.PlatformSite,
                                              PRED.hasDeployment,
                                              deployment_id,
                                              id_only=True)
        sites = isite_list + psite_list
        self.assertEquals(1, len(sites))
        return sites[0], devices[0]

    def _find_resource_in_list(self,
                               res_list,
                               attr,
                               attr_val,
                               assert_found=True):
        for res in res_list:
            v = getattr(res, attr, None)
            if v == attr_val:
                return res
        if assert_found:
            self.assertTrue(
                False,
                "Attribute %s value %s not found in list" % (attr, attr_val))
        return None

    # -------------------------------------------------------------------------

    def _get_caller(self):
        s = inspect.stack()
        return "%s:%s" % (s[2][1], s[2][2])

    @assertion_wrapper
    def assert_array_almost_equal(self, *args, **kwargs):
        np.testing.assert_array_almost_equal(*args, **kwargs)

    @assertion_wrapper
    def assertEquals(self, *args, **kwargs):
        IonIntegrationTestCase.assertEquals(self, *args, **kwargs)

    @assertion_wrapper
    def assertTrue(self, *args, **kwargs):
        IonIntegrationTestCase.assertTrue(self, *args, **kwargs)
Exemplo n.º 25
0
class TestActivateRSNVel3DInstrument(IonIntegrationTestCase):
    def setUp(self):
        # Start container
        super(TestActivateRSNVel3DInstrument, self).setUp()
        config = DotDict()

        self._start_container()

        self.container.start_rel_from_url('res/deploy/r2deploy.yml', config)

        # Now create client to DataProductManagementService
        self.rrclient = ResourceRegistryServiceClient(node=self.container.node)
        self.damsclient = DataAcquisitionManagementServiceClient(
            node=self.container.node)
        self.pubsubcli = PubsubManagementServiceClient(
            node=self.container.node)
        self.imsclient = InstrumentManagementServiceClient(
            node=self.container.node)
        self.dpclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.datasetclient = DatasetManagementServiceClient(
            node=self.container.node)
        self.processdispatchclient = ProcessDispatcherServiceClient(
            node=self.container.node)
        self.dataprocessclient = DataProcessManagementServiceClient(
            node=self.container.node)
        self.dataproductclient = DataProductManagementServiceClient(
            node=self.container.node)
        self.dataretrieverclient = DataRetrieverServiceClient(
            node=self.container.node)
        self.dataset_management = DatasetManagementServiceClient()

    def create_logger(self, name, stream_id=''):

        # logger process
        producer_definition = ProcessDefinition(name=name + '_logger')
        producer_definition.executable = {
            'module': 'ion.processes.data.stream_granule_logger',
            'class': 'StreamGranuleLogger'
        }

        logger_procdef_id = self.processdispatchclient.create_process_definition(
            process_definition=producer_definition)
        configuration = {
            'process': {
                'stream_id': stream_id,
            }
        }
        pid = self.processdispatchclient.schedule_process(
            process_definition_id=logger_procdef_id,
            configuration=configuration)
        return pid

    @attr('LOCOINT')
    @unittest.skip('under construction')
    @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False),
                     'Skip test while in CEI LAUNCH mode')
    @patch.dict(CFG, {'endpoint': {'receive': {'timeout': 180}}})
    def test_activate_rsn_vel3d(self):

        log.info(
            "--------------------------------------------------------------------------------------------------------"
        )
        # load_parameter_scenarios
        self.container.spawn_process(
            "Loader",
            "ion.processes.bootstrap.ion_loader",
            "IONLoader",
            config=dict(
                op="load",
                scenario="BETA",
                path="master",
                categories=
                "ParameterFunctions,ParameterDefs,ParameterDictionary,StreamDefinition",
                clearcols="owner_id,org_ids",
                assets="res/preload/r2_ioc/ooi_assets",
                parseooi="True",
            ))

        self.loggerpids = []

        # Create InstrumentModel
        instModel_obj = IonObject(RT.InstrumentModel,
                                  name='Vel3DMModel',
                                  description="Vel3DMModel")
        instModel_id = self.imsclient.create_instrument_model(instModel_obj)
        log.debug('test_activate_rsn_vel3d new InstrumentModel id = %s ',
                  instModel_id)

        raw_config = StreamConfiguration(stream_name='raw',
                                         parameter_dictionary_name='raw')
        vel3d_b_sample = StreamConfiguration(
            stream_name='vel3d_b_sample',
            parameter_dictionary_name='vel3d_b_sample')
        vel3d_b_engineering = StreamConfiguration(
            stream_name='vel3d_b_engineering',
            parameter_dictionary_name='vel3d_b_engineering')

        RSN_VEL3D_01 = {
            'DEV_ADDR': "10.180.80.6",
            'DEV_PORT': 2101,
            'DATA_PORT': 1026,
            'CMD_PORT': 1025,
            'PA_BINARY': "port_agent"
        }

        # Create InstrumentAgent
        instAgent_obj = IonObject(
            RT.InstrumentAgent,
            name='Vel3DAgent',
            description="Vel3DAgent",
            driver_uri=
            "http://sddevrepo.oceanobservatories.org/releases/nobska_mavs4_ooicore-0.0.7-py2.7.egg",
            stream_configurations=[
                raw_config, vel3d_b_sample, vel3d_b_engineering
            ])
        instAgent_id = self.imsclient.create_instrument_agent(instAgent_obj)
        log.debug('test_activate_rsn_vel3d new InstrumentAgent id = %s',
                  instAgent_id)

        self.imsclient.assign_instrument_model_to_instrument_agent(
            instModel_id, instAgent_id)

        # Create InstrumentDevice
        log.debug(
            'test_activate_rsn_vel3d: Create instrument resource to represent the Vel3D '
        )
        instDevice_obj = IonObject(RT.InstrumentDevice,
                                   name='Vel3DDevice',
                                   description="Vel3DDevice",
                                   serial_number="12345")
        instDevice_id = self.imsclient.create_instrument_device(
            instrument_device=instDevice_obj)
        self.imsclient.assign_instrument_model_to_instrument_device(
            instModel_id, instDevice_id)
        log.debug("test_activate_rsn_vel3d: new InstrumentDevice id = %s  ",
                  instDevice_id)

        port_agent_config = {
            'device_addr': '10.180.80.6',
            'device_port': 2101,
            'process_type': PortAgentProcessType.UNIX,
            'binary_path': "port_agent",
            'port_agent_addr': 'localhost',
            'command_port': 1025,
            'data_port': 1026,
            'log_level': 5,
            'type': PortAgentType.ETHERNET
        }

        instAgentInstance_obj = IonObject(RT.InstrumentAgentInstance,
                                          name='Vel3DAgentInstance',
                                          description="Vel3DAgentInstance",
                                          port_agent_config=port_agent_config,
                                          alerts=[])

        instAgentInstance_id = self.imsclient.create_instrument_agent_instance(
            instAgentInstance_obj, instAgent_id, instDevice_id)

        parsed_sample_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'vel3d_b_sample', id_only=True)
        parsed_sample_stream_def_id = self.pubsubcli.create_stream_definition(
            name='vel3d_b_sample',
            parameter_dictionary_id=parsed_sample_pdict_id)

        parsed_eng_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'vel3d_b_engineering', id_only=True)
        parsed_eng_stream_def_id = self.pubsubcli.create_stream_definition(
            name='vel3d_b_engineering',
            parameter_dictionary_id=parsed_eng_pdict_id)

        raw_pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'raw', id_only=True)
        raw_stream_def_id = self.pubsubcli.create_stream_definition(
            name='raw', parameter_dictionary_id=raw_pdict_id)

        #-------------------------------
        # Create Raw and Parsed Data Products for the device
        #-------------------------------

        dp_obj = IonObject(RT.DataProduct,
                           name='vel3d_b_sample',
                           description='vel3d_b_sample')

        sample_data_product_id = self.dpclient.create_data_product(
            data_product=dp_obj,
            stream_definition_id=parsed_sample_stream_def_id)
        log.debug('new dp_id = %s', sample_data_product_id)
        self.dpclient.activate_data_product_persistence(
            data_product_id=sample_data_product_id)

        self.damsclient.assign_data_product(
            input_resource_id=instDevice_id,
            data_product_id=sample_data_product_id)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(sample_data_product_id,
                                                   PRED.hasStream, None, True)
        log.debug('sample_data_product streams1 = %s', stream_ids)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(sample_data_product_id,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        log.debug('Data set for sample_data_product = %s', dataset_ids[0])
        self.parsed_dataset = dataset_ids[0]

        pid = self.create_logger('vel3d_b_sample', stream_ids[0])
        self.loggerpids.append(pid)

        dp_obj = IonObject(RT.DataProduct,
                           name='vel3d_b_engineering',
                           description='vel3d_b_engineering')

        eng_data_product_id = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=parsed_eng_stream_def_id)
        log.debug('new dp_id = %s', eng_data_product_id)
        self.dpclient.activate_data_product_persistence(
            data_product_id=eng_data_product_id)

        self.damsclient.assign_data_product(
            input_resource_id=instDevice_id,
            data_product_id=eng_data_product_id)

        dp_obj = IonObject(RT.DataProduct,
                           name='the raw data',
                           description='raw stream test')

        data_product_id2 = self.dpclient.create_data_product(
            data_product=dp_obj, stream_definition_id=raw_stream_def_id)
        log.debug('new dp_id = %s', data_product_id2)

        self.damsclient.assign_data_product(input_resource_id=instDevice_id,
                                            data_product_id=data_product_id2)

        self.dpclient.activate_data_product_persistence(
            data_product_id=data_product_id2)

        # Retrieve the id of the OUTPUT stream from the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                   PRED.hasStream, None, True)
        log.debug('test_activate_rsn_vel3d Data product streams2 = %s',
                  str(stream_ids))

        # Retrieve the id of the OUTPUT stream from the out Data Product
        dataset_ids, _ = self.rrclient.find_objects(data_product_id2,
                                                    PRED.hasDataset,
                                                    RT.Dataset, True)
        log.debug('test_activate_rsn_vel3d Data set for data_product_id2 = %s',
                  dataset_ids[0])
        self.raw_dataset = dataset_ids[0]

        def start_instrument_agent():
            self.imsclient.start_instrument_agent_instance(
                instrument_agent_instance_id=instAgentInstance_id)

        gevent.joinall([gevent.spawn(start_instrument_agent)])

        #cleanup
        self.addCleanup(self.imsclient.stop_instrument_agent_instance,
                        instrument_agent_instance_id=instAgentInstance_id)

        #wait for start
        inst_agent_instance_obj = self.imsclient.read_instrument_agent_instance(
            instAgentInstance_id)
        gate = AgentProcessStateGate(self.processdispatchclient.read_process,
                                     instDevice_id, ProcessStateEnum.RUNNING)
        self.assertTrue(
            gate. await (30),
            "The instrument agent instance (%s) did not spawn in 30 seconds" %
            gate.process_id)

        #log.trace('Instrument agent instance obj: = %s' , str(inst_agent_instance_obj))

        # Start a resource agent client to talk with the instrument agent.
        self._ia_client = ResourceAgentClient(instDevice_id,
                                              to_name=gate.process_id,
                                              process=FakeProcess())

        def check_state(label, desired_state):
            actual_state = self._ia_client.get_agent_state()
            log.debug("%s instrument agent is in state '%s'", label,
                      actual_state)
            self.assertEqual(desired_state, actual_state)

        log.debug("test_activate_rsn_vel3d: got ia client %s",
                  str(self._ia_client))

        check_state("just-spawned", ResourceAgentState.UNINITIALIZED)

        cmd = AgentCommand(command=ResourceAgentEvent.INITIALIZE)
        retval = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: initialize %s", str(retval))
        check_state("initialized", ResourceAgentState.INACTIVE)

        log.debug("test_activate_rsn_vel3d Sending go_active command ")
        cmd = AgentCommand(command=ResourceAgentEvent.GO_ACTIVE)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: return value from go_active %s",
                  str(reply))
        check_state("activated", ResourceAgentState.IDLE)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug("current state after sending go_active command %s",
                  str(state))
        #
        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: run %s", str(reply))
        check_state("commanded", ResourceAgentState.COMMAND)

        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        retval = self._ia_client.execute_agent(cmd)
        state = retval.result
        log.debug("current state after sending run command %s", str(state))

        #        cmd = AgentCommand(command=ProtocolEvent.START_AUTOSAMPLE)
        #        reply = self._ia_client.execute_agent(cmd)
        #        log.debug("test_activate_rsn_vel3d: run %s" , str(reply))
        #        state = self._ia_client.get_agent_state()
        #        self.assertEqual(ResourceAgentState.COMMAND, state)
        #
        #        gevent.sleep(5)
        #
        #        cmd = AgentCommand(command=ProtocolEvent.STOP_AUTOSAMPLE)
        #        reply = self._ia_client.execute_agent(cmd)
        #        log.debug("test_activate_rsn_vel3d: run %s" , str(reply))
        #        state = self._ia_client.get_agent_state()
        #        self.assertEqual(ResourceAgentState.COMMAND, state)
        #
        #        cmd = AgentCommand(command=ResourceAgentEvent.GET_RESOURCE_STATE)
        #        retval = self._ia_client.execute_agent(cmd)
        #        state = retval.result
        #        log.debug("current state after sending STOP_AUTOSAMPLE command %s" , str(state))

        #
        #        cmd = AgentCommand(command=ResourceAgentEvent.PAUSE)
        #        retval = self._ia_client.execute_agent(cmd)
        #        state = self._ia_client.get_agent_state()
        #        self.assertEqual(ResourceAgentState.STOPPED, state)
        #
        #        cmd = AgentCommand(command=ResourceAgentEvent.RESUME)
        #        retval = self._ia_client.execute_agent(cmd)
        #        state = self._ia_client.get_agent_state()
        #        self.assertEqual(ResourceAgentState.COMMAND, state)
        #
        #        cmd = AgentCommand(command=ResourceAgentEvent.CLEAR)
        #        retval = self._ia_client.execute_agent(cmd)
        #        state = self._ia_client.get_agent_state()
        #        self.assertEqual(ResourceAgentState.IDLE, state)
        #
        #        cmd = AgentCommand(command=ResourceAgentEvent.RUN)
        #        retval = self._ia_client.execute_agent(cmd)
        #        state = self._ia_client.get_agent_state()
        #        self.assertEqual(ResourceAgentState.COMMAND, state)

        log.debug("test_activate_rsn_vel3d: calling reset ")
        cmd = AgentCommand(command=ResourceAgentEvent.RESET)
        reply = self._ia_client.execute_agent(cmd)
        log.debug("test_activate_rsn_vel3d: return from reset %s", str(reply))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data_raw = self.dataretrieverclient.retrieve(self.raw_dataset)
        self.assertIsInstance(replay_data_raw, Granule)
        rdt_raw = RecordDictionaryTool.load_from_granule(replay_data_raw)
        log.debug("RDT raw: %s", str(rdt_raw.pretty_print()))

        self.assertIn('raw', rdt_raw)
        raw_vals = rdt_raw['raw']

        #--------------------------------------------------------------------------------
        # Deactivate loggers
        #--------------------------------------------------------------------------------

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)

        self.dpclient.delete_data_product(sample_data_product_id)
        self.dpclient.delete_data_product(eng_data_product_id)
        self.dpclient.delete_data_product(data_product_id2)
Exemplo n.º 26
0
class TestDMEnd2End(IonIntegrationTestCase):
    def setUp(self):  # Love the non pep-8 convention
        self._start_container()

        self.container.start_rel_from_url("res/deploy/r2deploy.yml")

        self.process_dispatcher = ProcessDispatcherServiceClient()
        self.pubsub_management = PubsubManagementServiceClient()
        self.resource_registry = ResourceRegistryServiceClient()
        self.dataset_management = DatasetManagementServiceClient()
        self.ingestion_management = IngestionManagementServiceClient()
        self.data_retriever = DataRetrieverServiceClient()
        self.pids = []
        self.event = Event()
        self.exchange_space_name = "test_granules"
        self.exchange_point_name = "science_data"

        self.purge_queues()

    def purge_queues(self):
        xn = self.container.ex_manager.create_xn_queue("science_granule_ingestion")
        xn.purge()

    def tearDown(self):
        self.purge_queues()
        for pid in self.pids:
            self.process_dispatcher.cancel_process(pid)
        IngestionManagementIntTest.clean_subscriptions()

    def launch_producer(self, stream_id=""):
        # --------------------------------------------------------------------------------
        # Create the process definition for the producer
        # --------------------------------------------------------------------------------
        producer_definition = ProcessDefinition(name="Example Data Producer")
        producer_definition.executable = {
            "module": "ion.processes.data.example_data_producer",
            "class": "BetterDataProducer",
        }

        process_definition_id = self.process_dispatcher.create_process_definition(
            process_definition=producer_definition
        )

        # --------------------------------------------------------------------------------
        # Launch the producer
        # --------------------------------------------------------------------------------

        config = DotDict()
        config.process.stream_id = stream_id
        pid = self.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id, configuration=config
        )
        self.pids.append(pid)

    def get_ingestion_config(self):
        # --------------------------------------------------------------------------------
        # Grab the ingestion configuration from the resource registry
        # --------------------------------------------------------------------------------
        # The ingestion configuration should have been created by the bootstrap service
        # which is configured through r2deploy.yml

        ingest_configs, _ = self.resource_registry.find_resources(restype=RT.IngestionConfiguration, id_only=True)
        return ingest_configs[0]

    def publish_hifi(self, stream_id, offset=0):
        pub = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id)

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(10) + (offset * 10)
        black_box.rdt["temp"] = (np.arange(10) + (offset * 10)) * 2
        granule = black_box.to_granule()
        pub.publish(granule)

    def publish_fake_data(self, stream_id):

        for i in xrange(4):
            self.publish_hifi(stream_id, i)

    def get_datastore(self, dataset_id):
        dataset = self.dataset_management.read_dataset(dataset_id)
        datastore_name = dataset.datastore_name
        datastore = self.container.datastore_manager.get_datastore(datastore_name, DataStore.DS_PROFILE.SCIDATA)
        return datastore

    def validate_granule_subscription(self, msg, header):
        if msg == {}:
            return
        self.assertIsInstance(msg, Granule, "Message is improperly formatted. (%s)" % type(msg))
        self.event.set()

    def wait_until_we_have_enough_granules(self, dataset_id="", granules=4):
        datastore = self.get_datastore(dataset_id)
        dataset = self.dataset_management.read_dataset(dataset_id)

        now = time.time()
        timeout = now + 10
        done = False
        while not done:
            if now >= timeout:
                raise Timeout("Granules are not populating in time.")
            if len(datastore.query_view(dataset.view_name)) >= granules:
                done = True

            now = time.time()

    def create_dataset(self):
        craft = CoverageCraft
        sdom, tdom = craft.create_domains()
        sdom = sdom.dump()
        tdom = tdom.dump()
        pdict = craft.create_parameters()
        pdict = pdict.dump()

        dataset_id = self.dataset_management.create_dataset(
            "test_dataset", parameter_dict=pdict, spatial_domain=sdom, temporal_domain=tdom
        )
        return dataset_id

    def test_coverage_ingest(self):
        stream_id = self.pubsub_management.create_stream()
        dataset_id = self.create_dataset()
        # I freaking hate this bug
        self.get_datastore(dataset_id)
        ingestion_config_id = self.get_ingestion_config()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id
        )

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(20)
        black_box.rdt["temp"] = np.random.random(20) * 10
        black_box.sync_with_granule()
        granule = black_box.to_granule()

        publisher = SimpleStreamPublisher.new_publisher(self.container, self.exchange_point_name, stream_id)
        publisher.publish(granule)

        self.wait_until_we_have_enough_granules(dataset_id, 1)

        coverage = DatasetManagementService._get_coverage(dataset_id)

        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage()
        comp = black_box.rdt["time"] == np.arange(20)
        self.assertTrue(comp.all())

        black_box = CoverageCraft()
        black_box.rdt["time"] = np.arange(20) + 20
        black_box.rdt["temp"] = np.random.random(20) * 10
        black_box.sync_with_granule()
        granule = black_box.to_granule()

        publisher.publish(granule)

        self.wait_until_we_have_enough_granules(dataset_id, 2)

        coverage = DatasetManagementService._get_coverage(dataset_id)

        black_box = CoverageCraft(coverage)
        black_box.sync_rdt_with_coverage()
        comp = black_box.rdt["time"] == np.arange(40)
        self.assertTrue(comp.all())

        granule = self.data_retriever.retrieve(dataset_id)

        black_box = CoverageCraft()
        black_box.sync_rdt_with_granule(granule)
        comp = black_box.rdt["time"] == np.arange(40)
        self.assertTrue(comp.all())

    @attr("SMOKE")
    def test_dm_end_2_end(self):
        # --------------------------------------------------------------------------------
        # Set up a stream and have a mock instrument (producer) send data
        # --------------------------------------------------------------------------------

        stream_id = self.pubsub_management.create_stream()

        self.launch_producer(stream_id)

        # --------------------------------------------------------------------------------
        # Start persisting the data on the stream
        # - Get the ingestion configuration from the resource registry
        # - Create the dataset
        # - call persist_data_stream to setup the subscription for the ingestion workers
        #   on the stream that you specify which causes the data to be persisted
        # --------------------------------------------------------------------------------

        ingest_config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Now the granules are ingesting and persisted
        # --------------------------------------------------------------------------------

        self.wait_until_we_have_enough_granules(dataset_id, 4)

        # --------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        # --------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_id)
        self.assertIsInstance(replay_data, Granule)

        # --------------------------------------------------------------------------------
        # Now to try the streamed approach
        # --------------------------------------------------------------------------------

        replay_id, stream_id = self.data_retriever.define_replay(dataset_id)

        # --------------------------------------------------------------------------------
        # Create the listening endpoint for the the retriever to talk to
        # --------------------------------------------------------------------------------
        xp = self.container.ex_manager.create_xp(self.exchange_point_name)
        xn = self.container.ex_manager.create_xn_queue(self.exchange_space_name)
        xn.bind("%s.data" % stream_id, xp)
        subscriber = SimpleStreamSubscriber.new_subscriber(
            self.container, self.exchange_space_name, self.validate_granule_subscription
        )
        subscriber.start()

        self.data_retriever.start_replay(replay_id)

        fail = False
        try:
            self.event.wait(10)
        except gevent.Timeout:
            fail = True

        subscriber.stop()

        self.assertTrue(not fail, "Failed to validate the data.")

    def test_replay_by_time(self):
        log.info("starting test...")

        # --------------------------------------------------------------------------------
        # Create the necessary configurations for the test
        # --------------------------------------------------------------------------------
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        # --------------------------------------------------------------------------------
        # Create the datastore first,
        # --------------------------------------------------------------------------------
        # There is a race condition sometimes between the services and the process for
        # the creation of the datastore and it's instance, this ensures the datastore
        # exists before the process is even subscribing to data.
        self.get_datastore(dataset_id)

        self.publish_fake_data(stream_id)
        self.wait_until_we_have_enough_granules(dataset_id, 2)  # I just need two

        replay_granule = self.data_retriever.retrieve(dataset_id, {"start_time": 0, "end_time": 6})

        rdt = RecordDictionaryTool.load_from_granule(replay_granule)

        comp = rdt["time"] == np.array([0, 1, 2, 3, 4, 5])

        try:
            log.info("Compared granule: %s", replay_granule.__dict__)
            log.info("Granule tax: %s", replay_granule.taxonomy.__dict__)
        except:
            pass
        self.assertTrue(comp.all())

    def test_last_granule(self):
        # --------------------------------------------------------------------------------
        # Create the necessary configurations for the test
        # --------------------------------------------------------------------------------
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        # --------------------------------------------------------------------------------
        # Create the datastore first,
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.publish_hifi(stream_id, 0)
        self.publish_hifi(stream_id, 1)

        self.wait_until_we_have_enough_granules(dataset_id, 2)  # I just need two

        replay_granule = self.data_retriever.retrieve_last_granule(dataset_id)

        rdt = RecordDictionaryTool.load_from_granule(replay_granule)

        comp = rdt["time"] == np.arange(10) + 10

        self.assertTrue(comp.all())

    def test_replay_with_parameters(self):
        # --------------------------------------------------------------------------------
        # Create the configurations and the dataset
        # --------------------------------------------------------------------------------
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )

        # --------------------------------------------------------------------------------
        # Coerce the datastore into existence (beats race condition)
        # --------------------------------------------------------------------------------
        self.get_datastore(dataset_id)

        self.launch_producer(stream_id)

        self.wait_until_we_have_enough_granules(dataset_id, 4)

        query = {"start_time": 0, "end_time": 20, "parameters": ["time", "temp"]}
        retrieved_data = self.data_retriever.retrieve(dataset_id=dataset_id, query=query)

        rdt = RecordDictionaryTool.load_from_granule(retrieved_data)
        comp = np.arange(20) == rdt["time"]
        self.assertTrue(comp.all(), "%s" % rdt.pretty_print())
        self.assertEquals(set(rdt.iterkeys()), set(["time", "temp"]))

    def test_repersist_data(self):
        stream_id = self.pubsub_management.create_stream()
        config_id = self.get_ingestion_config()
        dataset_id = self.create_dataset()
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        self.get_datastore(dataset_id)
        self.publish_hifi(stream_id, 0)
        self.publish_hifi(stream_id, 1)
        self.wait_until_we_have_enough_granules(dataset_id, 2)
        self.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=config_id)
        self.ingestion_management.persist_data_stream(
            stream_id=stream_id, ingestion_configuration_id=config_id, dataset_id=dataset_id
        )
        self.publish_hifi(stream_id, 2)
        self.publish_hifi(stream_id, 3)
        self.wait_until_we_have_enough_granules(dataset_id, 4)
        retrieved_granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(retrieved_granule)
        comp = rdt["time"] == np.arange(0, 40)
        self.assertTrue(comp.all(), "Uh-oh: %s" % rdt["time"])