def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params:
                # I don't care about
                return
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=data_product_id,
                             callback=cb,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            async_queue.get(timeout=120)
        except Empty:
            raise AssertionError('QC was not flagged in time')
Пример #2
0
    def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params:
                # I don't care about
                return
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)


        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=data_product_id, callback=cb, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            async_queue.get(timeout=120)
        except Empty:
            raise AssertionError('QC was not flagged in time')
Пример #3
0
    def test_base_subscriber_as_catchall(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(callback=cb)
        pub1 = EventPublisher(event_type="ResourceEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        self._listen(sub)

        pub1.publish_event(origin="some", description="1")
        pub2.publish_event(origin="other", description="2")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
Пример #4
0
    def test_base_subscriber_as_catchall(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(callback=cb)
        pub1 = EventPublisher(event_type="ResourceEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        self._listen(sub)

        pub1.publish_event(origin="some", description="1")
        pub2.publish_event(origin="other", description="2")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
Пример #5
0
class Notifier(object):
    """Sends Process state notifications via ION events

    This object is fed into the internal PD core classes
    """
    def __init__(self):
        self.event_pub = EventPublisher()

    def notify_process(self, process):
        process_id = process.upid
        state = process.state

        ion_process_state = _PD_PROCESS_STATE_MAP.get(state)
        if not ion_process_state:
            log.debug("Received unknown process state from Process Dispatcher." +
                      " process=%s state=%s", process_id, state)
            return

        log.debug("Emitting event for process state. process=%s state=%s", process_id, ion_process_state)
        try:
            self.event_pub.publish_event(event_type="ProcessLifecycleEvent",
                origin=process_id, origin_type="DispatchedProcess",
                state=ion_process_state)
        except Exception:
            log.exception("Problem emitting event for process state. process=%s state=%s",
                process_id, ion_process_state)
Пример #6
0
    def test_pub_on_different_origins(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent", callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="one", description="1")
        pub.publish_event(origin="two", description="2")
        pub.publish_event(origin="three", description="3")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 3)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
        self.assertEquals(res[2].description, "3")
Пример #7
0
    def test_pub_on_different_origins(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent", callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="one", description="1")
        pub.publish_event(origin="two", description="2")
        pub.publish_event(origin="three", description="3")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 3)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
        self.assertEquals(res[2].description, "3")
    def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=data_product_id, callback=cb, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.process_dispatcher.schedule_process(self.process_definition_id, process_id=self.process_id, configuration=config) # The process is now up and running maybe?

        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            for i in xrange(24):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC was not flagged in time: %d', i)
Пример #9
0
def fail(x):
    '''
    The goal behind this function is to publish an event so that threads
    can synchronize with it to verify that it was run, regardless of context
    '''
    event_publisher = EventPublisher(OT.GranuleIngestionErrorEvent)
    try:
        event_publisher.publish_event(error_msg='failure')

        raise StandardError('Something you tried to do failed')
    finally:
        event_publisher.close()
Пример #10
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
Пример #11
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent",
                              sub_type="st1",
                              callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
Пример #12
0
    def test_pub_and_sub(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent", callback=cb, origin="specific")
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)
        pub.publish_event(origin="specific", description="hello")

        event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing')
        self.assertEqual(event_obj, pub.publish_event_object(event_obj))

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing', ts_created='2423')
            pub.publish_event_object(event_obj)
        self.assertIn( 'The ts_created value is not a valid timestamp',cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing', ts_created='1000494978462')
            pub.publish_event_object(event_obj)
        self.assertIn( 'This ts_created value is too old',cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing')
            event_obj._id = '343434'
            pub.publish_event_object(event_obj)
        self.assertIn( 'The event object cannot contain a _id field',cm.exception.message)

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), self.count)
        self.assertEquals(res[0].description, "hello")
        self.assertAlmostEquals(int(res[0].ts_created), int(get_ion_ts()), delta=5000)

        self.assertEquals(res[1].description, "more testing")
        self.assertAlmostEquals(int(res[1].ts_created), int(get_ion_ts()), delta=5000)
Пример #13
0
    def _set_calibration_for_data_product(self, dp_obj, dev_cfg):
        from ion.util.direct_coverage_utils import DirectCoverageAccess
        from coverage_model import SparseConstantType

        log.debug(" Setting calibration for data product '%s'", dp_obj.name)
        dataset_ids, _ = self.rr.find_objects(dp_obj, PRED.hasDataset, id_only=True)
        publisher = EventPublisher(OT.InformationContentModifiedEvent)
        for dataset_id in dataset_ids:
            # Synchronize with ingestion
            with DirectCoverageAccess() as dca:
                cov = dca.get_editable_coverage(dataset_id)
                # Iterate over the calibrations
                for cal_name, contents in dev_cfg.iteritems():
                    if cal_name in cov.list_parameters() and isinstance(cov.get_parameter_context(cal_name).param_type, SparseConstantType):
                        value = float(contents['value'])
                        cov.set_parameter_values(cal_name, value)
                    else:
                        log.warn("Calibration %s not found in dataset", cal_name)
                publisher.publish_event(origin=dataset_id, description="Calibrations Updated")
        publisher.close()
        log.info(" Calibration set for data product '%s' in %s coverages", dp_obj.name, len(dataset_ids))
Пример #14
0
    def test_subscriber_listening_for_specific_origin(self):
        ar = event.AsyncResult()
        self.count = 0
        def cb(*args, **kwargs):
            self.count += 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ResourceEvent", origin="specific", callback=cb)
        pub = EventPublisher(event_type="ResourceEvent", node=self.container.node)

        self._listen(sub)

        pub.publish_event(origin="notspecific", description="1")
        pub.publish_event(origin="notspecific", description="2")
        pub.publish_event(origin="specific", description="3")
        pub.publish_event(origin="notspecific", description="4")

        evmsg = ar.get(timeout=5)
        self.assertEquals(self.count, 1)
        self.assertEquals(evmsg.description, "3")
Пример #15
0
    def test_subscriber_listening_for_specific_origin(self):
        ar = event.AsyncResult()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ResourceEvent",
                              origin="specific",
                              callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="notspecific", description="1")
        pub.publish_event(origin="notspecific", description="2")
        pub.publish_event(origin="specific", description="3")
        pub.publish_event(origin="notspecific", description="4")

        evmsg = ar.get(timeout=5)
        self.assertEquals(self.count, 1)
        self.assertEquals(evmsg.description, "3")
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition('lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = ['offset_document']
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc,_ = self.rrclient.create_association(subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)
            
        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a':2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0]))


        svm.stored_value_cas('updated_document', {'offset_a':3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id, reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2) # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'],np.array([20.,20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0,23.0]))
Пример #17
0
class QCPostProcessing(SimpleProcess):
    '''
    QC Post Processing Process

    This process provides the capability to ION clients and operators to evaluate the automated quality control flags on
    various data products. This process should be run periodically with overlapping spans of data to ensure complete
    dataset QC verification.

    This parameters that this process accepts as configurations are:
        - dataset_id: The dataset identifier, required.
        - start_time: Unix timestamp, defaults to 24 hours in the past
        - end_time: Unix timestamp, defaults to current time
        - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc',
          'spketst_qc', 'stuckvl_qc'], defaults to all

    '''

    qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc']

    def on_start(self):
        SimpleProcess.on_start(self)
        self.data_retriever = DataRetrieverServiceProcessClient(process=self)
        self.interval_key = self.CFG.get_safe('process.interval_key', None)
        self.qc_params = self.CFG.get_safe('process.qc_params', [])
        validate_is_not_none(
            self.interval_key,
            'An interval key is necessary to paunch this process')
        self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                                origin=self.interval_key,
                                                callback=self._event_callback,
                                                auto_delete=True)
        self.add_endpoint(self.event_subscriber)
        self.resource_registry = self.container.resource_registry
        self.run_interval = self.CFG.get_safe(
            'service.qc_processing.run_interval', 24)

    def _event_callback(self, *args, **kwargs):
        log.info('QC Post Processing Triggered')
        dataset_ids, _ = self.resource_registry.find_resources(
            restype=RT.Dataset, id_only=True)
        for dataset_id in dataset_ids:
            log.info('QC Post Processing for dataset %s', dataset_id)
            try:
                self.process(dataset_id)
            except BadRequest as e:
                if 'Problems reading from the coverage' in e.message:
                    log.error('Failed to read from dataset')

    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            granule = self.data_retriever.retrieve(dataset_id,
                                                   query={
                                                       'start_time': st,
                                                       'end_time': et
                                                   })
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})

    def flag_qc_parameter(self, dataset_id, parameter, temporal_values,
                          configuration):
        log.info('Flagging QC for %s', parameter)
        data_product_ids, _ = self.resource_registry.find_subjects(
            object=dataset_id,
            subject_type=RT.DataProduct,
            predicate=PRED.hasDataset,
            id_only=True)
        for data_product_id in data_product_ids:
            self.qc_publisher.publish_event(origin=data_product_id,
                                            qc_parameter=parameter,
                                            temporal_values=temporal_values,
                                            configuration=configuration)

    @classmethod
    def chop(cls, start_time, end_time):
        while start_time < end_time:
            yield (start_time, min(start_time + 3600, end_time))
            start_time = min(start_time + 3600, end_time)
        return
Пример #18
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container.stats_mgr.stop()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
Пример #19
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [
            DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0)
            for i in xrange(4)
        ]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()

            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.*",
                               callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.a",
                               callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="*.a",
                               callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1",
                               callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")
class ScienceGranuleIngestionWorker(TransformStreamListener,
                                    BaseIngestionWorker):
    CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5)

    def __init__(self, *args, **kwargs):
        TransformStreamListener.__init__(self, *args, **kwargs)
        BaseIngestionWorker.__init__(self, *args, **kwargs)

        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()

    def on_start(self):  #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)

        self.queue_name = self.CFG.get_safe('process.queue_name', self.id)
        self.subscriber = StreamSubscriber(process=self,
                                           exchange_name=self.queue_name,
                                           callback=self.receive_callback)
        self.thread_lock = RLock()

        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(
            from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.input_product = self.CFG.get_safe('process.input_product', '')
        self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True)
        self.ignore_gaps = self.CFG.get_safe('service.ingestion.ignore_gaps',
                                             False)
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        self.connection_id = ''
        self.connection_index = None

        self.start_listener()

    def on_quit(self):  #pragma no cover
        self.event_publisher.close()
        self.qc_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
        self._coverages.clear()
        TransformStreamListener.on_quit(self)
        BaseIngestionWorker.on_quit(self)

    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(
                self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            for stream, coverage in self._coverages.iteritems():
                try:
                    coverage.close(timeout=5)
                except:
                    log.exception('Problems closing the coverage')
            self._coverages.clear()
            self.subscriber_thread = None

    def pause(self):
        if self.subscriber_thread is not None:
            self.stop_listener()

    def resume(self):
        if self.subscriber_thread is None:
            self.start_listener()

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = ResourceRegistryServiceClient()
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,
                                              predicate=PRED.hasStream,
                                              object=stream_id,
                                              id_only=True)
        if datasets:
            return datasets[0]
        return None

    def get_dataset(self, stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id,
                                                                    mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    def gap_coverage(self, stream_id):
        try:
            old_cov = self._coverages.pop(stream_id)
            dataset_id = self.get_dataset(stream_id)
            sdom, tdom = time_series_domain()
            new_cov = DatasetManagementService._create_simplex_coverage(
                dataset_id, old_cov.parameter_dictionary, sdom, tdom,
                old_cov._persistence_layer.inline_data_writes)
            old_cov.close()
            result = new_cov
        except KeyError:
            result = self.get_coverage(stream_id)
        self._coverages[stream_id] = result
        return result

    def dataset_changed(self, dataset_id, extents, window):
        self.event_publisher.publish_event(origin=dataset_id,
                                           author=self.id,
                                           extents=extents,
                                           window=window)

    def evaluate_qc(self, rdt, dataset_id):
        if self.qc_enabled:
            for field in rdt.fields:
                if not (field.endswith('glblrng_qc')
                        or field.endswith('loclrng_qc')):
                    continue
                try:
                    values = rdt[field]
                    if values is not None:
                        if not all(values):
                            topology = np.where(values == 0)
                            timestamps = rdt[rdt.temporal_parameter][
                                topology[0]]
                            self.flag_qc_parameter(dataset_id, field,
                                                   timestamps.tolist(), {})
                except:
                    continue

    def flag_qc_parameter(self, dataset_id, parameter, temporal_values,
                          configuration):
        data_product_ids, _ = self.container.resource_registry.find_subjects(
            object=dataset_id,
            predicate=PRED.hasDataset,
            subject_type=RT.DataProduct,
            id_only=True)
        for data_product_id in data_product_ids:
            description = 'Automated Quality Control Alerted on %s' % parameter
            self.qc_publisher.publish_event(origin=data_product_id,
                                            qc_parameter=parameter,
                                            temporal_values=temporal_values,
                                            configuration=configuration,
                                            description=description)

    def update_connection_index(self, connection_id, connection_index):
        self.connection_id = connection_id
        try:
            connection_index = int(connection_index)
            self.connection_index = connection_index
        except ValueError:
            pass

    def has_gap(self, connection_id, connection_index):
        if connection_id:
            if not self.connection_id:
                self.update_connection_index(connection_id, connection_index)
                return False
            else:
                if connection_id != self.connection_id:
                    return True
        if connection_index:
            if self.connection_index is None:
                self.update_connection_index(connection_id, connection_index)
                return False
            try:
                connection_index = int(connection_index)
                if connection_index != self.connection_index + 1:
                    return True
            except ValueError:
                pass

        return False

    def splice_coverage(self, dataset_id, coverage):
        log.info('Splicing new coverage')
        DatasetManagementService._splice_coverage(dataset_id, coverage)

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s',
                      msg)
            return

        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        """ retry writing coverage multiple times and eventually time out """
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                self.add_granule(stream_id, rdt)
                done = True
            except:
                log.exception('An issue with coverage, retrying after a bit')
                if (time.time() -
                        start) > MAX_RETRY_TIME:  # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error(
                        "We're giving up, the coverage needs to be inspected %s",
                        DatasetManagementService._get_coverage_path(
                            dataset_id))
                    raise

                if stream_id in self._coverages:
                    log.info('Popping coverage for stream %s', stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2

    def expand_coverage(self, coverage, elements, stream_id):
        try:
            coverage.insert_timesteps(elements, oob=False)
        except IOError as e:
            log.error("Couldn't insert time steps for coverage: %s",
                      coverage.persistence_dir,
                      exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)

    def get_stored_values(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs
        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_value_manager.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')
        return None

    def fill_lookup_values(self, rdt):
        rdt.fetch_lookup_values()
        for field in rdt.lookup_values():
            value = self.get_stored_values(rdt.context(field).lookup_value)
            if value:
                rdt[field] = value

    def insert_sparse_values(self, coverage, rdt, stream_id):

        self.fill_lookup_values(rdt)
        for field in rdt.fields:
            if rdt[field] is None:
                continue
            if not isinstance(
                    rdt.context(field).param_type, SparseConstantType):
                # We only set sparse values before insert
                continue
            value = rdt[field]
            try:
                coverage.set_parameter_values(param_name=field, value=value)
            except ValueError as e:
                if "'lower_bound' cannot be >= 'upper_bound'" in e.message:
                    continue
                else:
                    raise
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir,
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

    def insert_values(self, coverage, rdt, stream_id):
        elements = len(rdt)

        start_index = coverage.num_timesteps - elements

        for k, v in rdt.iteritems():
            if isinstance(v, SparseConstantValue):
                continue
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k,
                                              tdoa=slice_,
                                              value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir,
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(
                coverage.get_parameter_context('ingestion_timestamp').uom,
                t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp',
                                          tdoa=slice_,
                                          value=ntp_time)

    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Gap Analysis
        #--------------------------------------------------------------------------------
        if not self.ignore_gaps:
            gap_found = self.has_gap(rdt.connection_id, rdt.connection_index)
            if gap_found:
                log.error(
                    'Gap Found!   New connection: (%s,%s)\tOld Connection: (%s,%s)',
                    rdt.connection_id, rdt.connection_index,
                    self.connection_id, self.connection_index)
                self.gap_coverage(stream_id)

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug(
                '%s: add_granule stream %s dataset %s coverage %r file %s',
                self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        elements = len(rdt)
        if rdt[rdt.temporal_parameter] is None:
            elements = 0

        self.insert_sparse_values(coverage, rdt, stream_id)

        if debugging:
            timer.complete_step('checks')  # lightweight ops, should be zero

        self.expand_coverage(coverage, elements, stream_id)

        if debugging:
            timer.complete_step('insert')

        self.insert_values(coverage, rdt, stream_id)

        if debugging:
            timer.complete_step('keys')

        DatasetManagementService._save_coverage(coverage)

        if debugging:
            timer.complete_step('save')

        start_index = coverage.num_timesteps - elements
        self.dataset_changed(dataset_id, coverage.num_timesteps,
                             (start_index, start_index + elements))

        if not self.ignore_gaps and gap_found:
            self.splice_coverage(dataset_id, coverage)

        self.evaluate_qc(rdt, dataset_id)

        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)

        self.update_connection_index(rdt.connection_id, rdt.connection_index)

    def _add_timing_stats(self, timer):
        """ add stats from latest coverage operation to Accumulator and periodically log results """
        self.time_stats.add(timer)
        if self.time_stats.get_count() % REPORT_FREQUENCY > 0:
            return

        if log.isEnabledFor(TRACE):
            # report per step
            for step in 'checks', 'insert', 'keys', 'save', 'notify':
                log.debug('%s step %s times: %s', self._id, step,
                          self.time_stats.to_string(step))
        # report totals
        log.debug('%s total times: %s', self._id, self.time_stats)
class ScienceGranuleIngestionWorker(TransformStreamListener, BaseIngestionWorker):
    CACHE_LIMIT=CFG.get_safe('container.ingestion_cache',5)

    def __init__(self, *args,**kwargs):
        TransformStreamListener.__init__(self, *args, **kwargs)
        BaseIngestionWorker.__init__(self, *args, **kwargs)

        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets  = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()



    def on_start(self): #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)
        
        self.queue_name = self.CFG.get_safe('process.queue_name',self.id)
        self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback)
        self.thread_lock = RLock()
        
        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.connection_id = ''
        self.connection_index = None
        
        self.start_listener()

    def on_quit(self): #pragma no cover
        self.event_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
        self._coverages.clear()
        TransformStreamListener.on_quit(self)
        BaseIngestionWorker.on_quit(self)

    
    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            for stream, coverage in self._coverages.iteritems():
                try:
                    coverage.close(timeout=5)
                except:
                    log.exception('Problems closing the coverage')
            self._coverages.clear()
            self.subscriber_thread = None

    def pause(self):
        if self.subscriber_thread is not None:
            self.stop_listener()


    def resume(self):
        if self.subscriber_thread is None:
            self.start_listener()


    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = self.container.resource_registry
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,predicate=PRED.hasStream,object=stream_id,id_only=True)
        if datasets:
            return datasets[0]
        return None

    def _get_data_products(self, dataset_id):
        rr_client = self.container.resource_registry
        data_products, _ = rr_client.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct, id_only=False)
        return data_products


    #--------------------------------------------------------------------------------
    # Metadata Handlers
    #--------------------------------------------------------------------------------

    def initialize_metadata(self, dataset_id, rdt):
        '''
        Initializes a metadata document in the object store. The document
        contains information about the bounds and extents of the dataset as
        well other metadata to improve performance.

        '''

        object_store = self.container.object_store
        key = dataset_id
        bounds = {}
        extents = {}
        last_values = {}
        rough_size = 0
        for k,v in rdt.iteritems():
            v = v[:].flatten()
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                bounds[k] = (np.min(v), np.max(v))
                last_values[k] = v[-1]
            extents[k] = len(rdt)
            rough_size += len(rdt) * 4

        doc = {'bounds':bounds, 'extents':extents, 'last_values':last_values, 'size': rough_size}
        doc = numpy_walk(doc)
        object_store.create_doc(doc, object_id=key)
        return 

    def update_metadata(self, dataset_id, rdt):
        '''
        Updates the metada document with the latest information available
        '''

        self.update_data_product_metadata(dataset_id, rdt)

        # Grab the document
        object_store = self.container.object_store
        key = dataset_id
        try:
            doc = object_store.read_doc(key)
        except NotFound:
            return self.initialize_metadata(dataset_id, rdt)
        # These are the fields we're interested in
        bounds = doc['bounds']
        extents = doc['extents']
        last_values = doc['last_values']
        rough_size = doc['size']
        for k,v in rdt.iteritems():
            if k not in bounds:
                continue

            v = v[:].flatten() # Get the numpy representation (dense array).
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                l_min = np.min(v)
                l_max = np.max(v)
                o_min, o_max = bounds[k]
                bounds[k] = (min(l_min, o_min), max(l_max, o_max))
                last_values[k] = v[-1]
            # Update the bounds
            # Increase the extents
            extents[k] = extents[k] + len(rdt)
            # How about the last value?

            rough_size += len(rdt) * 4
            doc['size'] = rough_size
        # Sanitize it
        doc = numpy_walk(doc)
        object_store.update_doc(doc)

    def update_data_product_metadata(self, dataset_id, rdt):
        data_products = self._get_data_products(dataset_id)
        for data_product in data_products:
            self.update_time(data_product, rdt[rdt.temporal_parameter][:])
            self.update_geo(data_product, rdt)
            try:
                self.container.resource_registry.update(data_product)
            except: # TODO: figure out WHICH Exception gets raised here when the bounds are off
                log.error("Problem updating the data product metadata", exc_info=True)
                # Carry on :(



    def update_time(self, data_product, t):
        '''
        Sets the nominal_datetime for a data product correctly
        Accounts for things like NTP and out of order data
        '''

        t0, t1 = self.get_datetime_bounds(data_product)
        #TODO: Account for non NTP-based timestamps
        min_t = np.min(t) - 2208988800
        max_t = np.max(t) - 2208988800
        if t0:
            t0 = min(t0, min_t)
        else:
            t0 = min_t

        if t1:
            t1 = max(t1, max_t)
        else:
            t1 = max_t

        if t0 > t1:
            log.error("This should never happen but t0 > t1")

        data_product.nominal_datetime.start_datetime = float(t0)
        data_product.nominal_datetime.end_datetime = float(t1)

    def get_datetime(self, nominal_datetime):
        '''
        Returns a floating point value for the datetime or None if it's an
        empty string
        '''
        t = None
        # So normally this is a string
        if isinstance(nominal_datetime, (float, int)):
            t = nominal_datetime # simple enough
        elif isinstance(nominal_datetime, basestring):
            if nominal_datetime: # not an empty string
                # Try to convert it to a float
                try:
                    t = float(nominal_datetime)
                except ValueError:
                    pass
        return t

    def get_datetime_bounds(self, data_product):
        '''Returns the min and max for the bounds in the nominal_datetime
        attr
        '''
        
        t0 = self.get_datetime(data_product.nominal_datetime.start_datetime)
        t1 = self.get_datetime(data_product.nominal_datetime.end_datetime)
        return (t0, t1)


    def update_geo(self, data_product, rdt):
        '''
        Finds the maximum bounding box
        '''
        lat = None
        lon = None
        for p in rdt:
            if rdt._rd[p] is None:
                continue
            # TODO: Not an all encompassing list of acceptable names for lat and lon
            if p.lower() in ('lat', 'latitude', 'y_axis'):
                lat = np.asscalar(rdt[p][-1])
            elif p.lower() in ('lon', 'longitude', 'x_axis'):
                lon = np.asscalar(rdt[p][-1])
            if lat and lon:
                break

        if lat and lon:
            data_product.geospatial_bounds.geospatial_latitude_limit_north = lat
            data_product.geospatial_bounds.geospatial_latitude_limit_south = lat
            data_product.geospatial_bounds.geospatial_longitude_limit_east = lon
            data_product.geospatial_bounds.geospatial_longitude_limit_west = lon

    
    #--------------------------------------------------------------------------------
    # Cache managemnt
    #--------------------------------------------------------------------------------

    def get_dataset(self,stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result


    #--------------------------------------------------------------------------------
    # Granule Parsing and Handling
    #--------------------------------------------------------------------------------


    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        '''
        The consumer callback to parse and manage the granule.
        The message is ACK'd once the function returns
        '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s', msg)
            return


        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        '''
        A loop that tries to parse and store a granule for up to five minutes,
        and waits an increasing amount of time each iteration.
        '''
        done = False
        timeout = 2
        start = time.time()
        while not done:
            if self.parse_granule(stream_id, rdt, start, done):
                return # We're all done, everything worked

            if (time.time() - start) > MAX_RETRY_TIME: # After a while, give up
                dataset_id = self.get_dataset(stream_id)
                log.error("We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id))
                raise

            if stream_id in self._coverages:
                log.info('Popping coverage for stream %s', stream_id)
                self._coverages.pop(stream_id)

            gevent.sleep(timeout)

            timeout = min(60 * 5, timeout * 2)


    def parse_granule(self, stream_id, rdt, start, done):
        try:
            self.add_granule(stream_id, rdt)
            return True 
        except Exception as e:
            log.exception('An issue with coverage, retrying after a bit')
            return False
        return True # never reaches here, Added for clarity


    def dataset_changed(self, dataset_id, window):
        self.event_publisher.publish_event(origin=dataset_id, author=self.id, window=window)

    def build_data_dict(self, rdt):
        np_dict = {}
        
        time_array = rdt[rdt.temporal_parameter]
        if time_array is None:
            raise ValueError("A granule needs a time array")
        for k,v in rdt.iteritems():
            # Sparse values are different and aren't constructed using NumpyParameterData
            if isinstance(rdt.param_type(k), SparseConstantType):
                value = v[0]
                if hasattr(value, 'dtype'):
                    value = np.asscalar(value)
                time_start = np.asscalar(time_array[0])
                np_dict[k] = ConstantOverTime(k, value, time_start=time_start, time_end=None) # From now on
                continue
            elif isinstance(rdt.param_type(k), CategoryType):
                log.warning("Category types temporarily unsupported")
                continue
            elif isinstance(rdt.param_type(k), RecordType):
                value = v
            else:
                value = v

            try:
                if k == 'temp_sample':
                    print repr(value)
                np_dict[k] = NumpyParameterData(k, value, time_array)
            except:
                raise

        return np_dict

    def insert_values(self, coverage, rdt, stream_id):
        
        np_dict = self.build_data_dict(rdt)

        if 'ingestion_timestamp' in coverage.list_parameters():
            timestamps = np.array([(time.time()+2208988800) for i in rdt[rdt.temporal_parameter]])
            np_dict['ingestion_timestamp'] = NumpyParameterData('ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter])


        # If it's sparse only
        if self.sparse_only(rdt):
            del np_dict[rdt.temporal_parameter]
    

        try:
            coverage.set_parameter_values(np_dict)
        except IOError as e:
            log.error("Couldn't insert values for coverage: %s",
                      coverage.persistence_dir, exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
        except KeyError as e:
            if 'has not been initialized' in e.message:
                coverage.refresh()
            raise
        except Exception as e:
            print repr(rdt)
            raise

    
    def add_granule(self,stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        if stream_id in self._bad_coverages:
            log.info('Message attempting to be inserted into bad coverage: %s',
                     DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            
        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s', stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error("Couldn't open coverage: %s",
                      DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if not coverage:
            log.error('Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        if rdt[rdt.temporal_parameter] is None:
            log.warning("Empty granule received")
            return

        # Parse the RDT and set hte values in the coverage
        self.insert_values(coverage, rdt, stream_id)
        
        # Force the data to be flushed
        DatasetManagementService._save_coverage(coverage)

        self.update_metadata(dataset_id, rdt)

        try:
            window = rdt[rdt.temporal_parameter][[0,-1]]
            window = window.tolist()
        except (ValueError, IndexError):
            window = None
        self.dataset_changed(dataset_id, window)

    def sparse_only(self, rdt):
        '''
        A sparse only rdt will have only a time array AND sparse values, no other data
        '''
        if rdt[rdt.temporal_parameter] is None:
            return False # No time, so it's just empty

        at_least_one = False

        for key in rdt.iterkeys():
            # Skip time, that needs to be there
            if key == rdt.temporal_parameter:
                continue
            if not isinstance(rdt.param_type(key), SparseConstantType):
                return False
            else:
                at_least_one = True

        return at_least_one
Пример #22
0
class ResourceRegistry(object):
    """
    Class that uses a datastore to provide a resource registry.
    The resource registry adds knowledge of resource objects and associations.
    Resources have lifecycle state.
    Add special treatment of Attachment resources
    """
    DEFAULT_ATTACHMENT_NAME = 'resource.attachment'

    def __init__(self, datastore_manager=None, container=None):
        self.container = container or bootstrap.container_instance

        # Get an instance of datastore configured as resource registry.
        datastore_manager = datastore_manager or self.container.datastore_manager
        self.rr_store = datastore_manager.get_datastore(DataStore.DS_RESOURCES, DataStore.DS_PROFILE.RESOURCES)
        self.name = 'container_resource_registry'
        self.id = 'container_resource_registry'

        self.event_pub = EventPublisher()

    def start(self):
        pass

    def stop(self):
        self.close()

    def close(self):
        """
        Pass-through method to close the underlying datastore.
        """
        self.rr_store.close()

    # -------------------------------------------------------------------------
    # Resource object manipulation

    def create(self, object=None, actor_id=None, object_id=None, attachments=None):
        """
        Accepts object that is to be stored in the data store and tags them with additional data
        (timestamp and such) If actor_id is provided, creates hasOwner association with objects.
        If attachments are provided
        (in dict(att1=dict(data=xyz), att2=dict(data=aaa, content_type='text/plain') form)
        they get attached to the object.
        Returns a tuple containing object and revision identifiers.
        """
        if object is None:
            raise BadRequest("Object not present")
        if not isinstance(object, IonObjectBase):
            raise BadRequest("Object is not an IonObject")
        if not is_resource(object):
            raise BadRequest("Object is not a Resource")
        if "_id" in object:
            raise BadRequest("Object must not contain _id")
        if "_rev" in object:
            raise BadRequest("Object must not contain _rev")


        lcsm = get_restype_lcsm(object._get_type())
        object.lcstate = lcsm.initial_state if lcsm else LCS.DEPLOYED
        object.availability = lcsm.initial_availability if lcsm else AS.AVAILABLE
        cur_time = get_ion_ts()
        object.ts_created = cur_time
        object.ts_updated = cur_time
        if object_id is None:
            new_res_id = create_unique_resource_id()
        else:
            new_res_id = object_id
        res = self.rr_store.create(object, new_res_id, attachments=attachments)
        res_id, rev = res

        if actor_id and actor_id != 'anonymous':
            log.debug("Associate resource_id=%s with owner=%s", res_id, actor_id)
            self.create_association(res_id, PRED.hasOwner, actor_id)

        if self.container.has_capability(self.container.CCAP.EVENT_PUBLISHER):
            self.event_pub.publish_event(event_type="ResourceModifiedEvent",
                                     origin=res_id, origin_type=object._get_type(),
                                     sub_type="CREATE",
                                     mod_type=ResourceModificationType.CREATE)

        return res

    def create_mult(self, res_list):
        cur_time = get_ion_ts()
        id_list = []
        for resobj in res_list:
            lcsm = get_restype_lcsm(resobj._get_type())
            resobj.lcstate = lcsm.initial_state if lcsm else LCS.DEPLOYED
            resobj.availability = lcsm.initial_availability if lcsm else AS.AVAILABLE
            resobj.ts_created = cur_time
            resobj.ts_updated = cur_time
            id_list.append(resobj._id if "_id" in resobj else create_unique_resource_id())

        res = self.rr_store.create_mult(res_list, id_list, allow_ids=True)
        res_list = [(rid, rrv) for success, rid, rrv in res]

        # TODO: Associations with owners

        # TODO: Publish events (skipped, because this is inefficient one by one for a large list
#        for rid,rrv in res_list:
#            self.event_pub.publish_event(event_type="ResourceModifiedEvent",
#                origin=res_id, origin_type=object._get_type(),
#                mod_type=ResourceModificationType.CREATE)

        return res_list

    def read(self, object_id='', rev_id=''):
        if not object_id:
            raise BadRequest("The object_id parameter is an empty string")

        return self.rr_store.read(object_id, rev_id)

    def read_mult(self, object_ids=None, strict=True):
        """
        @param object_ids  a list of resource ids (can be empty)
        @param strict  a bool - if True (default), raise a NotFound in case one of the resources was not found
        Returns resource objects for given list of resource ids in the same order. If a resource object was not
        found, contains None (unless strict==True) in which case NotFound will be raised.
        """
        if object_ids is None:
            raise BadRequest("The object_ids parameter is empty")
        return self.rr_store.read_mult(object_ids, strict=strict)

    def update(self, object):
        if object is None:
            raise BadRequest("Object not present")
        if not hasattr(object, "_id") or not hasattr(object, "_rev"):
            raise BadRequest("Object does not have required '_id' or '_rev' attribute")
            # Do an check whether LCS has been modified
        res_obj = self.read(object._id)

        object.ts_updated = get_ion_ts()
        if res_obj.lcstate != object.lcstate or res_obj.availability != object.availability:
            log.warn("Cannot modify %s life cycle state or availability in update current=%s/%s given=%s/%s. " +
                     "DO NOT REUSE THE SAME OBJECT IN CREATE THEN UPDATE",
                      type(res_obj).__name__, res_obj.lcstate, res_obj.availability, object.lcstate, object.availability)
            object.lcstate = res_obj.lcstate
            object.availability = res_obj.availability

        self.event_pub.publish_event(event_type="ResourceModifiedEvent",
                                     origin=object._id, origin_type=object._get_type(),
                                     sub_type="UPDATE",
                                     mod_type=ResourceModificationType.UPDATE)

        return self.rr_store.update(object)

    def delete(self, object_id='', del_associations=False):
        res_obj = self.read(object_id)
        if not res_obj:
            raise NotFound("Resource %s does not exist" % object_id)

        if not del_associations:
            self._delete_owners(object_id)

        # Update first to RETIRED to give ElasticSearch a hint
        res_obj.lcstate = LCS.RETIRED
        res_obj.availability = AS.PRIVATE
        self.rr_store.update(res_obj)

        if del_associations:
            assoc_ids = self.find_associations(anyside=object_id, id_only=True)
            self.rr_store.delete_doc_mult(assoc_ids, object_type="Association")
            #log.debug("Deleted %s associations for resource %s", len(assoc_ids), object_id)

        elif self._is_in_association(object_id):
            log.warn("Deleting object %s that still has associations" % object_id)

        res = self.rr_store.delete(object_id)

        if self.container.has_capability(self.container.CCAP.EVENT_PUBLISHER):
            self.event_pub.publish_event(event_type="ResourceModifiedEvent",
                                     origin=res_obj._id, origin_type=res_obj._get_type(),
                                     sub_type="DELETE",
                                     mod_type=ResourceModificationType.DELETE)

        return res

    def _delete_owners(self, resource_id):
        # Delete all owner users.
        owners, assocs = self.rr_store.find_objects(resource_id, PRED.hasOwner, RT.ActorIdentity, id_only=True)
        for aid in assocs:
            self.delete_association(aid)

    def retire(self, resource_id):
        """
        This is the official "delete" for resource objects: they are set to RETIRED lcstate.
        All associations are set to retired as well.
        """
        res_obj = self.read(resource_id)
        old_state = res_obj.lcstate
        old_availability = res_obj.availability
        if old_state == LCS.RETIRED:
            raise BadRequest("Resource id=%s already RETIRED" % (resource_id))

        res_obj.lcstate = LCS.RETIRED
        res_obj.availability = AS.PRIVATE
        res_obj.ts_updated = get_ion_ts()

        updres = self.rr_store.update(res_obj)
        log.debug("retire(res_id=%s). Change %s_%s to %s_%s", resource_id,
                  old_state, old_availability, res_obj.lcstate, res_obj.availability)

        assocs = self.find_associations(anyside=resource_id, id_only=False)
        for assoc in assocs:
            assoc.retired = True
        if assocs:
            self.rr_store.update_mult(assocs)
            log.debug("retire(res_id=%s). Retired %s associations", resource_id, len(assocs))

        if self.container.has_capability(self.container.CCAP.EVENT_PUBLISHER):
            self.event_pub.publish_event(event_type="ResourceLifecycleEvent",
                                     origin=res_obj._id, origin_type=res_obj.type_,
                                     sub_type="%s.%s" % (res_obj.lcstate, res_obj.availability),
                                     lcstate=res_obj.lcstate, availability=res_obj.availability,
                                     lcstate_before=old_state, availability_before=old_availability)


    def execute_lifecycle_transition(self, resource_id='', transition_event=''):
        if transition_event == LCE.RETIRE:
            return self.retire(resource_id)

        res_obj = self.read(resource_id)

        old_state = res_obj.lcstate
        old_availability = res_obj.availability
        old_lcs = lcstate(old_state, old_availability)

        restype = res_obj._get_type()
        restype_workflow = get_restype_lcsm(restype)
        if not restype_workflow:
            raise BadRequest("Resource id=%s type=%s has no lifecycle" % (resource_id, restype))

        new_state = restype_workflow.get_successor(old_lcs, transition_event)
        if not new_state:
            raise BadRequest("Resource id=%s, type=%s, lcstate=%s has no transition for event %s" % (
                resource_id, restype, old_lcs, transition_event))

        lcmat, lcav = lcsplit(new_state)
        res_obj.lcstate = lcmat
        res_obj.availability = lcav

        res_obj.ts_updated = get_ion_ts()
        self.rr_store.update(res_obj)
        log.debug("execute_lifecycle_transition(res_id=%s, event=%s). Change %s_%s to %s_%s", resource_id, transition_event,
                  old_state, old_availability, res_obj.lcstate, res_obj.availability)

        if self.container.has_capability(self.container.CCAP.EVENT_PUBLISHER):
            self.event_pub.publish_event(event_type="ResourceLifecycleEvent",
                                     origin=res_obj._id, origin_type=res_obj.type_,
                                     sub_type="%s.%s" % (res_obj.lcstate, res_obj.availability),
                                     lcstate=res_obj.lcstate, availability=res_obj.availability,
                                     lcstate_before=old_state, availability_before=old_availability,
                                     transition_event=transition_event)

        return lcstate(res_obj.lcstate, res_obj.availability)

    def set_lifecycle_state(self, resource_id='', target_lcstate=''):
        """Sets the lifecycle state (if possible) to the target state. Supports compound states"""
        if not target_lcstate:
            raise BadRequest("Bad life-cycle state %s" % target_lcstate)
        if target_lcstate.startswith('RETIRED'):
            return self.retire(resource_id)

        res_obj = self.read(resource_id)
        old_target = target_lcstate
        old_state = res_obj.lcstate
        old_availability = res_obj.availability
        old_lcs = lcstate(old_state, old_availability)
        restype = res_obj._get_type()
        restype_workflow = get_restype_lcsm(restype)
        if not restype_workflow:
            raise BadRequest("Resource id=%s type=%s has no lifecycle" % (resource_id, restype))

        if '_' in target_lcstate:    # Support compound
            target_lcmat, target_lcav = lcsplit(target_lcstate)
            if target_lcmat not in LCS:
                raise BadRequest("Unknown life-cycle state %s" % target_lcmat)
            if target_lcav and target_lcav not in AS:
                raise BadRequest("Unknown life-cycle availability %s" % target_lcav)
        elif target_lcstate in LCS:
            target_lcmat, target_lcav = target_lcstate, res_obj.availability
            target_lcstate = lcstate(target_lcmat, target_lcav)
        elif target_lcstate in AS:
            target_lcmat, target_lcav = res_obj.lcstate, target_lcstate
            target_lcstate = lcstate(target_lcmat, target_lcav)
        else:
            raise BadRequest("Unknown life-cycle state %s" % target_lcstate)

        # Check that target state is allowed
        if not target_lcstate in restype_workflow.get_successors(old_lcs).values():
            raise BadRequest("Target state %s not reachable for resource in state %s" % (target_lcstate, old_lcs))

        res_obj.lcstate = target_lcmat
        res_obj.availability = target_lcav

        res_obj.ts_updated = get_ion_ts()

        updres = self.rr_store.update(res_obj)
        log.debug("set_lifecycle_state(res_id=%s, target=%s). Change %s_%s to %s_%s", resource_id, old_target,
                  old_state, old_availability, res_obj.lcstate, res_obj.availability)

        if self.container.has_capability(self.container.CCAP.EVENT_PUBLISHER):
            self.event_pub.publish_event(event_type="ResourceLifecycleEvent",
                                     origin=res_obj._id, origin_type=res_obj.type_,
                                     sub_type="%s.%s" % (res_obj.lcstate, res_obj.availability),
                                     lcstate=res_obj.lcstate, availability=res_obj.availability,
                                     lcstate_before=old_state, availability_before=old_availability)


    # -------------------------------------------------------------------------
    # Attachment operations

    def create_attachment(self, resource_id='', attachment=None, actor_id=None):
        """
        Creates an Attachment resource from given argument and associates it with the given resource.
        @retval the resource ID for the attachment resource.
        """
        if attachment is None:
            raise BadRequest("Object not present")
        if not isinstance(attachment, Attachment):
            raise BadRequest("Object is not an Attachment")

        attachment.object_id = resource_id if resource_id else ""
        attachment.attachment_size = -1

        attachment_content = None

        if attachment.attachment_type == AttachmentType.BLOB:
            if type(attachment.content) is not str:
                raise BadRequest("Attachment content must be str")
            attachment.attachment_size = len(attachment.content)
            attachment_content = attachment.content
        elif attachment.attachment_type == AttachmentType.ASCII:
            if type(attachment.content) is not str:
                raise BadRequest("Attachment content must be str")
            attachment.attachment_size = len(attachment.content)
            attachment_content = attachment.content
        elif attachment.attachment_type == AttachmentType.OBJECT:
            raise BadRequest("AttachmentType.OBJECT is not supported currently")
        elif attachment.attachment_type == AttachmentType.REFERENCE:
            if not isinstance(attachment.content, basestring):
                raise BadRequest("Attachment content must be binary string")
            attachment.attachment_size = len(attachment.content)
            attachment_content = attachment.content
        else:
            raise BadRequest("Unknown attachment-type: %s" % attachment.attachment_type)

        attachment.content = ''
        content = dict(data=attachment_content, content_type=attachment.content_type)

        att_id, _ = self.create(attachment, attachments={self.DEFAULT_ATTACHMENT_NAME: content}, actor_id=actor_id)

        if resource_id:
            self.create_association(resource_id, PRED.hasAttachment, att_id)

        return att_id

    def read_attachment(self, attachment_id='', include_content=False):
        """
        Returns the metadata of an attachment. Unless indicated otherwise the content returned
        is only a name to the actual attachment content.
        """
        attachment = self.read(attachment_id)
        if not isinstance(attachment, Attachment):
            raise Inconsistent("Object in datastore must be Attachment, not %s" % type(attachment))

        if include_content:
            attachment.content = self.rr_store.read_attachment(attachment_id,
                                                               attachment_name=self.DEFAULT_ATTACHMENT_NAME)
            if attachment.attachment_type == AttachmentType.BLOB:
                if type(attachment.content) is not str:
                    raise BadRequest("Attachment content must be str")

        return attachment

    def delete_attachment(self, attachment_id=''):
        try:
            self.rr_store.delete_attachment(attachment_id, attachment_name=self.DEFAULT_ATTACHMENT_NAME)
        finally:
            return self.delete(attachment_id, del_associations=True)

    def find_attachments(self, resource_id='', keyword=None,
                         limit=0, descending=False, include_content=False, id_only=True):
        key = [resource_id]
        att_res = self.rr_store.find_by_view("attachment", "by_resource", start_key=key,
                                             end_key=list(key), descending=descending, limit=limit,
                                             id_only=True)

        att_ids = [att[0] for att in att_res if not keyword or keyword in att[1][2]]
        if id_only:
            return att_ids
        else:
            atts = self.rr_store.read_mult(att_ids)
            if include_content:
                for att in atts:
                    att.content = self.rr_store.read_attachment(doc=att._id, attachment_name=self.DEFAULT_ATTACHMENT_NAME)
            return atts


    # -------------------------------------------------------------------------
    # Association operations

    def create_association(self, subject=None, predicate=None, object=None, assoc_type=None):
        """
        Create an association between two IonObjects with a given predicate
        @param assoc_type  DEPRECATED
        """
        if not (subject and predicate and object):
            raise BadRequest("Association must have all elements set")

        if type(subject) is str:
            subject_id = subject
            subject = self.read(subject_id)
            subject_type = subject.type_
        else:
            if "_id" not in subject:
                raise BadRequest("Subject id not available")
            subject_id = subject._id
            subject_type = subject.type_

        if type(object) is str:
            object_id = object
            object = self.read(object_id)
            object_type = object.type_
        else:
            if "_id" not in object:
                raise BadRequest("Object id not available")
            object_id = object._id
            object_type = object.type_

        # Check that subject and object type are permitted by association definition
        try:
            pt = Predicates.get(predicate)
        except AttributeError:
            raise BadRequest("Predicate unknown %s" % predicate)
        if not subject_type in pt['domain']:
            found_st = False
            for domt in pt['domain']:
                if subject_type in getextends(domt):
                    found_st = True
                    break
            if not found_st:
                raise BadRequest("Illegal subject type %s for predicate %s" % (subject_type, predicate))
        if not object_type in pt['range']:
            found_ot = False
            for rant in pt['range']:
                if object_type in getextends(rant):
                    found_ot = True
                    break
            if not found_ot:
                raise BadRequest("Illegal object type %s for predicate %s" % (object_type, predicate))

        # Finally, ensure this isn't a duplicate
        assoc_list = self.find_associations(subject_id, predicate, object_id, id_only=False)
        if len(assoc_list) != 0:
            assoc = assoc_list[0]
            #print "**** Found associations:"
            #import pprint
            #pprint.pprint(assoc_list)
            raise BadRequest("Association between %s and %s with predicate %s already exists" % (subject_id, object_id, predicate))

        assoc = IonObject("Association",
                          s=subject_id, st=subject_type,
                          p=predicate,
                          o=object_id, ot=object_type,
                          ts=get_ion_ts())
        return self.rr_store.create(assoc, create_unique_association_id())

    def create_association_mult(self, assoc_list=None):
        """
        Create multiple associations between two IonObjects with a given predicate.
        @param assoc_list  A list of 3-tuples of (subject, predicate, object). Subject/object can be str or object
        """
        if not assoc_list:
            return []

        lookup_rid = set()
        for s, p, o in assoc_list:
            if type(s) is str:
                lookup_rid.add(s)
            if type(o) is str:
                lookup_rid.add(o)
        lookup_rid = list(lookup_rid)
        lookup_obj = self.read_mult(lookup_rid) if lookup_rid else []
        res_by_id = dict(zip(lookup_rid, lookup_obj))

        create_ts = get_ion_ts()
        new_assoc_list = []
        for s, p, o in assoc_list:
            new_s = s
            new_o = o
            if type(s) is str:
                new_s = res_by_id[s]
                if not new_s:
                    raise NotFound("Subject %s not found" % s)
            else:
                if "_id" not in s:
                    raise BadRequest("Subject id not available")
            if type(o) is str:
                new_o = res_by_id[o]
                if not new_o:
                    raise NotFound("Object %s not found" % o)
            else:
                if "_id" not in object:
                    raise BadRequest("Object id not available")

            # Check that subject and object type are permitted by association definition
            if p not in Predicates:
                raise BadRequest("Predicate unknown %s" % p)
            pt = Predicates.get(p)
            if not new_s.type_ in pt['domain']:
                found_st = False
                for domt in pt['domain']:
                    if new_s.type_ in getextends(domt):
                        found_st = True
                        break
                if not found_st:
                    raise BadRequest("Illegal subject type %s for predicate %s" % (new_s.type_, p))
            if not new_o.type_ in pt['range']:
                found_ot = False
                for rant in pt['range']:
                    if new_o.type_ in getextends(rant):
                        found_ot = True
                        break
                if not found_ot:
                    raise BadRequest("Illegal object type %s for predicate %s" % (new_o.type_, p))

            # Skip duplicate check

            assoc = IonObject("Association",
                              s=new_s._id, st=new_s.type_,
                              p=p,
                              o=new_o._id, ot=new_o.type_,
                              ts=create_ts)
            new_assoc_list.append(assoc)

        new_assoc_ids = [create_unique_association_id() for i in xrange(len(new_assoc_list))]
        return self.rr_store.create_mult(new_assoc_list, new_assoc_ids)

    def delete_association(self, association=''):
        """
        Delete an association between two IonObjects
        @param association  Association object, association id or 3-list of [subject, predicate, object]
        """
        if type(association) in (list, tuple) and len(association) == 3:
            subject, predicate, obj = association
            assoc_id_list = self.find_associations(subject=subject, predicate=predicate, object=obj, id_only=True)
            success = True
            for aid in assoc_id_list:
                success = success and self.rr_store.delete(aid, object_type="Association")
            return success
        else:
            return self.rr_store.delete(association, object_type="Association")

    def _is_in_association(self, obj_id):
        if not obj_id:
            raise BadRequest("Must provide object id")

        assoc_ids = self.find_associations(anyside=obj_id, id_only=True, limit=1)
        if assoc_ids:
            log.debug("_is_in_association(%s): Object has associations: %s", obj_id, assoc_ids)
            return True

        return False

    def read_association(self, association_id=None):
        if not association_id:
            raise BadRequest("Missing association_id parameter")

        return self.rr_store.read(association_id, object_type="Association")


    # -------------------------------------------------------------------------
    # Resource find operations

    def read_object(self, subject="", predicate="", object_type="", assoc="", id_only=False):
        if assoc:
            if type(assoc) is str:
                assoc = self.read_association(assoc)
            return assoc.o if id_only else self.read(assoc.o)
        else:
            obj_list, assoc_list = self.find_objects(subject=subject, predicate=predicate, object_type=object_type, id_only=True)
            if not obj_list:
                raise NotFound("No object found for subject=%s, predicate=%s, object_type=%s" % (subject, predicate, object_type))
            elif len(obj_list) > 1:
                raise Inconsistent("More than one object found for subject=%s, predicate=%s, object_type=%s: count=%s" % (
                    subject, predicate, object_type, len(obj_list)))
            return obj_list[0] if id_only else self.read(obj_list[0])

    def read_subject(self, subject_type="", predicate="", object="", assoc="", id_only=False):
        if assoc:
            if type(assoc) is str:
                assoc = self.read_association(assoc)
            return assoc.s if id_only else self.read(assoc.s)
        else:
            sub_list, assoc_list = self.find_subjects(subject_type=subject_type, predicate=predicate, object=object, id_only=True)
            if not sub_list:
                raise NotFound("No subject found for subject_type=%s, predicate=%s, object=%s" % (subject_type, predicate, object))
            elif len(sub_list) > 1:
                raise Inconsistent("More than one subject found for subject_type=%s, predicate=%s, object=%s: count=%s" % (
                    subject_type, predicate, object, len(sub_list)))
            return sub_list[0] if id_only else self.read(sub_list[0])

    def find_objects(self, subject="", predicate="", object_type="", id_only=False, limit=None, skip=None, descending=None):
        return self.rr_store.find_objects(subject, predicate, object_type, id_only=id_only, limit=limit, skip=skip, descending=descending)

    def find_subjects(self, subject_type="", predicate="", object="", id_only=False, limit=None, skip=None, descending=None):
        return self.rr_store.find_subjects(subject_type, predicate, object, id_only=id_only, limit=limit, skip=skip, descending=descending)

    def find_associations(self, subject="", predicate="", object="", assoc_type=None, id_only=False, anyside=None, limit=None, skip=None, descending=None):
        return self.rr_store.find_associations(subject, predicate, object, assoc_type, id_only=id_only, anyside=anyside, limit=limit, skip=skip, descending=descending)

    def find_objects_mult(self, subjects=[], id_only=False):
        return self.rr_store.find_objects_mult(subjects=subjects, id_only=id_only)

    def find_subjects_mult(self, objects=[], id_only=False):
        return self.rr_store.find_subjects_mult(objects=objects, id_only=id_only)

    def get_association(self, subject="", predicate="", object="", assoc_type=None, id_only=False):
        assoc = self.rr_store.find_associations(subject, predicate, object, id_only=id_only)
        if not assoc:
            raise NotFound("Association for subject/predicate/object/type %s/%s/%s not found" % (
                subject, predicate, object))
        elif len(assoc) > 1:
            raise Inconsistent("Duplicate associations found for subject/predicate/object/type %s/%s/%s" % (
                subject, predicate, object))
        return assoc[0]

    def find_resources(self, restype="", lcstate="", name="", id_only=False):
        return self.rr_store.find_resources(restype, lcstate, name, id_only=id_only)

    def find_resources_ext(self, restype="", lcstate="", name="",
                           keyword=None, nested_type=None,
                           attr_name=None, attr_value=None, alt_id="", alt_id_ns="",
                           limit=None, skip=None, descending=None, id_only=False):
        return self.rr_store.find_resources_ext(restype=restype, lcstate=lcstate, name=name,
            keyword=keyword, nested_type=nested_type,
            attr_name=attr_name, attr_value=attr_value, alt_id=alt_id, alt_id_ns=alt_id_ns,
            limit=limit, skip=skip, descending=descending,
            id_only=id_only)


    # -------------------------------------------------------------------------
    # Extended resource framework operations

    def get_resource_extension(self, resource_id='', resource_extension='', computed_resource_type=None, ext_associations=None, ext_exclude=None, **kwargs ):
        """Returns any ExtendedResource object containing additional related information derived from associations

        @param resource_id    str
        @param resource_extension    str
        @param ext_associations    dict
        @param ext_exclude    list
        @retval extended_resource    ExtendedResource
        @throws BadRequest    A parameter is missing
        @throws NotFound    An object with the specified resource_id does not exist
        """
        if not resource_id:
            raise BadRequest("The resource_id parameter is empty")

        if not resource_extension:
            raise BadRequest("The resource_extension parameter is not set")

        extended_resource_handler = ExtendedResourceContainer(self, self)

        #Handle differently if the resource_id parameter is a list of ids
        if resource_id.find('[') > -1:
            res_input = eval(resource_id)
            extended_resource_list = extended_resource_handler.create_extended_resource_container_list(extended_resource_type=resource_extension,
                resource_id_list=res_input, computed_resource_type=computed_resource_type, ext_associations=ext_associations, ext_exclude=ext_exclude, **kwargs)
            return extended_resource_list

        extended_resource = extended_resource_handler.create_extended_resource_container(extended_resource_type=resource_extension,
            resource_id=resource_id, computed_resource_type=computed_resource_type, ext_associations=ext_associations, ext_exclude=ext_exclude, **kwargs)

        return extended_resource

    def prepare_resource_support(self, resource_type='', resource_id=''):
        """Returns a structured dict with information to help create/update a resource

        @param resource_type    str
        @param resource_id    str
        @retval resource_data    GenericPrepareSupport
        @throws BadRequest    A parameter is missing
        @throws NotFound    An object with the specified resource_id does not exist
        """

        if not resource_type:
            raise BadRequest("The resource_type parameter is required")

        extended_resource_handler = ExtendedResourceContainer(self, self)

        resource_data = extended_resource_handler.create_prepare_resource_support(resource_id=resource_id, prepare_resource_type=OT.GenericPrepareSupport, origin_resource_type=resource_type)

        #Fill out service request information for creating a instrument device
        extended_resource_handler.set_service_requests(resource_data.create_request, 'resource_registry',
            'create', { "object":  "$(object)" })

        #Fill out service request information for creating a instrument device
        extended_resource_handler.set_service_requests(resource_data.update_request, 'resource_registry',
            'update', { "object":  "$(object)" })

        return resource_data


    #This is a method used for testing - do not remove
    def get_user_id_test(self, resource_id, user_id=None):
        return user_id
Пример #23
0
    def test_pub_and_sub(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent",
                              callback=cb,
                              origin="specific")
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)
        pub.publish_event(origin="specific", description="hello")

        event_obj = bootstrap.IonObject('ResourceEvent',
                                        origin='specific',
                                        description='more testing')
        self.assertEqual(event_obj, pub.publish_event_object(event_obj))

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='2423')
            pub.publish_event_object(event_obj)
        self.assertIn('The ts_created value is not a valid timestamp',
                      cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='1000494978462')
            pub.publish_event_object(event_obj)
        self.assertIn('This ts_created value is too old', cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing')
            event_obj._id = '343434'
            pub.publish_event_object(event_obj)
        self.assertIn('The event object cannot contain a _id field',
                      cm.exception.message)

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), self.count)
        self.assertEquals(res[0].description, "hello")
        self.assertAlmostEquals(int(res[0].ts_created),
                                int(get_ion_ts()),
                                delta=5000)

        self.assertEquals(res[1].description, "more testing")
        self.assertAlmostEquals(int(res[1].ts_created),
                                int(get_ion_ts()),
                                delta=5000)
Пример #24
0
class Directory(object):
    """
    Frontend to a directory functionality backed by a datastore.
    A directory is a system wide datastore backend tree of entries with attributes and child entries.
    Entries can be identified by a path. The root is '/'.
    Every Org can have its own directory. The default directory is for the root Org (ION).
    """

    def __init__(self, orgname=None, datastore_manager=None, container=None):
        self.container = container or bootstrap.container_instance
        # Get an instance of datastore configured as directory.
        datastore_manager = datastore_manager or self.container.datastore_manager
        self.dir_store = datastore_manager.get_datastore(DataStore.DS_DIRECTORY, DataStore.DS_PROFILE.DIRECTORY)

        self.orgname = orgname or CFG.system.root_org
        self.is_root = (self.orgname == CFG.system.root_org)
        self.events_enabled = CFG.get_safe("service.directory.publish_events") is True   # Publish change events?

        self.event_pub = None
        self.event_sub = None

    def start(self):
        if self.events_enabled:
            # init change event publisher
            self.event_pub = EventPublisher()

            # Register to receive directory changes
            # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent",
            #                                  origin="Directory",
            #                                  callback=self.receive_directory_change_event)

        # Create directory root entry (for current org) if not existing
        self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True)

    def stop(self):
        self.close()

    def close(self):
        """
        Close directory and all resources including datastore and event listener.
        """
        if self.event_sub:
            self.event_sub.deactivate()
        self.dir_store.close()

    # -------------------------------------------------------------------------
    # Directory register, lookup and find

    def lookup(self, parent, key=None, return_entry=False):
        """
        Read directory entry by key and parent node.
        @param return_entry  If True, returns DirEntry object if found, otherwise DirEntry attributes dict
        @retval Either current DirEntry attributes dict or DirEntry object or None if not found.
        """
        path = self._get_path(parent, key) if key else parent
        direntry = self._read_by_path(path)
        if return_entry:
            return direntry
        else:
            return direntry.attributes if direntry else None

    def lookup_mult(self, parent, keys=None, return_entry=False):
        """
        Read several directory entries by keys from the same parent node.
        @param return_entry  If True, returns DirEntry object if found, otherwise DirEntry attributes dict
        @retval Either list of current DirEntry attributes dict or DirEntry object or None if not found.
        """
        direntry_list = self._read_by_path(parent, mult_keys=keys)
        if return_entry:
            return direntry_list
        else:
            return [direntry.attributes if direntry else None for direntry in direntry_list]

    def register(self, parent, key, create_only=False, return_entry=False, ensure_parents=True, **kwargs):
        """
        Add/replace an entry within directory, below a parent node or "/" root.
        Note: Replaces (not merges) the attribute values of the entry if existing.
        register will fail when a concurrent write was detected, meaning that the other writer wins.
        @param create_only  If True, does not change an already existing entry
        @param return_entry  If True, returns DirEntry object of prior entry, otherwise DirEntry attributes dict
        @param ensure_parents  If True, make sure that parent nodes exist
        @retval  DirEntry if previously existing
        """
        if not (parent and key):
            raise BadRequest("Illegal arguments")
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal arguments: parent")

        dn = self._get_path(parent, key)
        log.debug("Directory.register(%s): %s", dn, kwargs)

        entry_old = None
        cur_time = get_ion_ts()
        # Must read existing entry by path to make sure to not create path twice
        direntry = self._read_by_path(dn)
        if direntry and create_only:
            # We only wanted to make sure entry exists. Do not change
            # NOTE: It is ambiguous to the caller whether we ran into this situation. Seems OK.
            return direntry if return_entry else direntry.attributes
        elif direntry:
            old_rev, old_ts, old_attr = direntry._rev, direntry.ts_updated, direntry.attributes
            direntry.attributes = kwargs
            direntry.ts_updated = cur_time
            try:
                self.dir_store.update(direntry)

                if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                    self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                                 origin=self.orgname + ".DIR", origin_type="DIR",
                                                 key=key, parent=parent, org=self.orgname,
                                                 sub_type="REGISTER." + parent[1:].replace("/", "."),
                                                 mod_type=DirectoryModificationType.UPDATE)
            except Conflict:
                # Concurrent update - we accept that we finished the race second and give up
                log.warn("Concurrent update to %s detected. We lost: %s", dn, kwargs)

            if return_entry:
                # Reset object back to prior state
                direntry.attributes = old_attr
                direntry.ts_updated = old_ts
                direntry._rev = old_rev
                entry_old = direntry
            else:
                entry_old = old_attr
        else:
            direntry = self._create_dir_entry(parent, key, attributes=kwargs, ts=cur_time)
            if ensure_parents:
                self._ensure_parents_exist([direntry])
            try:
                self.dir_store.create(direntry, create_unique_directory_id())
                if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                    self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                                 origin=self.orgname + ".DIR", origin_type="DIR",
                                                 key=key, parent=parent, org=self.orgname,
                                                 sub_type="REGISTER." + parent[1:].replace("/", "."),
                                                 mod_type=DirectoryModificationType.CREATE)
            except BadRequest as ex:
                if not ex.message.startswith("DirEntry already exists"):
                    raise
                # Concurrent create - we accept that we finished the race second and give up
                log.warn("Concurrent create of %s detected. We lost: %s", dn, kwargs)

        return entry_old

    def register_safe(self, parent, key, **kwargs):
        """
        Use this method to protect caller from any form of directory register error
        """
        try:
            return self.register(parent, key, **kwargs)
        except Exception as ex:
            log.exception("Error registering path=%s/%s, args=%s", parent, key, kwargs)

    def register_mult(self, entries):
        """
        Registers multiple directory entries efficiently in one datastore access.
        Note: this fails if entries are already existing, so works for create only.
        """
        if type(entries) not in (list, tuple):
            raise BadRequest("Bad entries type")
        de_list = []
        cur_time = get_ion_ts()
        for parent, key, attrs in entries:
            direntry = self._create_dir_entry(parent, key, attributes=attrs, ts=cur_time)
            de_list.append(direntry)
        pe_list = self._ensure_parents_exist(de_list, create=False)
        de_list.extend(pe_list)
        deid_list = [create_unique_directory_id() for i in xrange(len(de_list))]
        self.dir_store.create_mult(de_list, deid_list)

        if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
            for de in de_list:
                self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                             origin=self.orgname + ".DIR", origin_type="DIR",
                                             key=de.key, parent=de.parent, org=self.orgname,
                                             sub_type="REGISTER." + de.parent[1:].replace("/", "."),
                                             mod_type=DirectoryModificationType.CREATE)

    def unregister(self, parent, key=None, return_entry=False):
        """
        Remove entry from directory.
        Returns attributes of deleted DirEntry
        """
        path = self._get_path(parent, key) if key else parent
        log.debug("Removing content at path %s" % path)

        direntry = self._read_by_path(path)
        if direntry:
            self.dir_store.delete(direntry)
            if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                             origin=self.orgname + ".DIR", origin_type="DIR",
                                             key=key, parent=parent, org=self.orgname,
                                             sub_type="UNREGISTER." + parent[1:].replace("/", "."),
                                             mod_type=DirectoryModificationType.DELETE)

        if direntry and not return_entry:
            return direntry.attributes
        else:
            return direntry

    def unregister_safe(self, parent, key):
        try:
            return self.unregister(parent, key)
        except Exception as ex:
            log.exception("Error unregistering path=%s/%s", parent, key)

    def find_child_entries(self, parent='/', direct_only=True, **kwargs):
        """
        Return all child entries (ordered by path) for the given parent path.
        Does not return the parent itself. Optionally returns child of child entries.
        Additional kwargs are applied to constrain the search results (limit, descending, skip).
        @param parent  Path to parent (must start with "/")
        @param direct_only  If False, includes child of child entries
        @retval  A list of DirEntry objects for the matches
        """
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal argument parent: %s" % parent)
        if direct_only:
            start_key = [self.orgname, parent, 0]
            end_key = [self.orgname, parent]
            res = self.dir_store.find_by_view('directory', 'by_parent',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)
        else:
            path = parent[1:].split("/")
            start_key = [self.orgname, path, 0]
            end_key = [self.orgname, list(path) + ["ZZZZZZ"]]
            res = self.dir_store.find_by_view('directory', 'by_path',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def find_by_key(self, key=None, parent='/', **kwargs):
        """
        Returns a list of DirEntry for each directory entry that matches the given key name.
        If a parent is provided, only checks in this parent and all subtree.
        These entries are in the same org's directory but have different parents.
        """
        if key is None:
            raise BadRequest("Illegal arguments")
        if parent is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, key, parent]
        end_key = [self.orgname, key, parent + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_key',
            start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def find_by_value(self, subtree='/', attribute=None, value=None, **kwargs):
        """
        Returns a list of DirEntry with entries that have an attribute with the given value.
        """
        if attribute is None:
            raise BadRequest("Illegal arguments")
        if subtree is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, attribute, value, subtree]
        end_key = [self.orgname, attribute, value, subtree + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_attribute',
                        start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def remove_child_entries(self, parent, delete_parent=False):
        pass

    # -------------------------------------------------------------------------
    #  Concurrency Control

    def acquire_lock(self, key, timeout=LOCK_EXPIRES_DEFAULT, lock_holder=None, lock_info=None):
        """
        Attempts to atomically acquire a lock with the given key and namespace.
        If holder is given and holder already has the lock, renew.
        Checks for expired locks.
        @param timeout  Int value of millis until lock expiration or 0 for no expiration
        @param lock_holder  Str value identifying lock holder for subsequent exclusive access
        @param lock_info  Dict value for additional attributes describing lock
        @retval  bool - could lock be acquired?
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_attrs = {LOCK_EXPIRES_ATTR: get_ion_ts_millis() + timeout if timeout else 0,
                      LOCK_HOLDER_ATTR: lock_holder or ""}
        if lock_info:
            lock_attrs.update(lock_info)
        expires = int(lock_attrs[LOCK_EXPIRES_ATTR])  # Check type just to be sure
        if expires and get_ion_ts_millis() > expires:
            raise BadRequest("Invalid lock expiration value: %s", expires)

        direntry = self._create_dir_entry(LOCK_DIR_PATH, key, attributes=lock_attrs)
        lock_result = False
        try:
            # This is an atomic operation. It relies on the unique key constraint of the directory service
            self.dir_store.create(direntry, create_unique_directory_id())
            lock_result = True
        except BadRequest as ex:
            if ex.message.startswith("DirEntry already exists"):
                de_old = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
                if de_old:
                    if self._is_lock_expired(de_old):
                        # Lock is expired: remove, try to relock
                        # Note: even as holder, it's safer to reacquire in this case than renew
                        log.warn("Removing expired lock: %s/%s", de_old.parent, de_old.key)
                        try:
                            # This is safe, because of lock was deleted + recreated in the meantime, it has different id
                            self._delete_lock(de_old)
                            # Try recreate - may fail again due to concurrency
                            self.dir_store.create(direntry, create_unique_directory_id())
                            lock_result = True
                        except Exception:
                            log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                    elif lock_holder and de_old.attributes[LOCK_HOLDER_ATTR] == lock_holder:
                        # Holder currently holds the lock: renew
                        log.info("Renewing lock %s/%s for holder %s", de_old.parent, de_old.key, lock_holder)
                        de_old.attributes = lock_attrs
                        try:
                            self.dir_store.update(de_old)
                            lock_result = True
                        except Exception:
                            log.exception("Error renewing expired lock %s", de_old.key)
                # We do nothing if we could not find the lock now...
            else:
                raise

        log.debug("Directory.acquire_lock(%s): %s -> %s", key, lock_attrs, lock_result)

        return lock_result

    def is_locked(self, key):
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
        return lock_entry and not self._is_lock_expired(lock_entry)

    def release_lock(self, key):
        """
        Releases lock identified by key.
        Raises NotFound if lock does not exist.
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        log.debug("Directory.release_lock(%s)", key)

        dir_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
        if dir_entry:
            self._delete_lock(dir_entry)
        else:
            raise NotFound("Lock %s not found" % key)

    def release_expired_locks(self):
        """Removes all expired locks
        """
        de_list = self.find_child_entries(LOCK_DIR_PATH, direct_only=True)
        for de in de_list:
            if self._is_lock_expired(de):
                log.warn("Removing expired lock %s/%s", de.parent, de.key)
                try:
                    # This is safe, because if lock was deleted + recreated in the meantime, it has different id
                    self._delete_lock(de)
                except Exception:
                    log.exception("Error releasing expired lock %s", de.key)

    def _is_lock_expired(self, lock_entry):
        if not lock_entry:
            raise BadRequest("No lock entry provided")
        return 0 < lock_entry.attributes[LOCK_EXPIRES_ATTR] <= get_ion_ts_millis()

    def _delete_lock(self, lock_entry):
        lock_entry_id = lock_entry._id
        self.dir_store.delete(lock_entry_id)

    # -------------------------------------------------------------------------
    # Internal functions

    def receive_directory_change_event(self, event_msg, headers):
        # @TODO add support to fold updated config into container config
        pass


    def _get_path(self, parent, key):
        """
        Returns the qualified directory path for a directory entry.
        """
        if parent == "/":
            return parent + key
        elif parent.startswith("/"):
            return parent + "/" + key
        else:
            raise BadRequest("Illegal parent: %s" % parent)

    def _get_key(self, path):
        """
        Returns the key from a qualified directory path
        """
        parent, key = path.rsplit("/", 1)
        return key

    def _create_dir_entry(self, parent, key, orgname=None, ts=None, attributes=None):
        """
        Standard way to create a DirEntry object.
        """
        orgname = orgname or self.orgname
        ts = ts or get_ion_ts()
        attributes = attributes if attributes is not None else {}
        parent = parent or "/"
        de = DirEntry(org=orgname, parent=parent, key=key, attributes=attributes, ts_created=ts, ts_updated=ts)
        return de

    def _read_by_path(self, path, orgname=None, mult_keys=None):
        """
        Given a qualified path, find entry in directory and return DirEntry object or None if not found.
        """
        if path is None:
            raise BadRequest("Illegal arguments")
        orgname = orgname or self.orgname
        if mult_keys:
            parent = path or "/"
            key = mult_keys
        else:
            parent, key = path.rsplit("/", 1)
            parent = parent or "/"
        find_key = [orgname, key, parent]
        view_res = self.dir_store.find_by_view('directory', 'by_key', key=find_key, id_only=True, convert_doc=True)

        match = [doc for docid, index, doc in view_res]
        if mult_keys:
            entries_by_key = {doc.key: doc for doc in match}
            entries = [entries_by_key.get(key, None) for key in mult_keys]
            return entries
        else:
            if len(match) > 1:
                log.error("More than one directory entry found for key %s" % path)
                return match[0]
            elif match:
                return match[0]
            return None

    def _get_unique_parents(self, entry_list):
        """Returns a sorted, unique list of parents of DirEntries (excluding the root /)"""
        if entry_list and type(entry_list) not in (list, tuple):
            entry_list = [entry_list]
        parents = set()
        for entry in entry_list:
            parents.add(entry.parent)
        if "/" in parents:
            parents.remove("/")
        return sorted(parents)

    def _ensure_parents_exist(self, entry_list, create=True):
        parents_list = self._get_unique_parents(entry_list)
        pe_list = []
        try:
            for parent in parents_list:
                pe = self.lookup(parent)
                if pe is None:
                    pp, pk = parent.rsplit("/", 1)
                    direntry = self._create_dir_entry(parent=pp, key=pk)
                    pe_list.append(direntry)
                    if create:
                        try:
                            self.dir_store.create(direntry, create_unique_directory_id())
                        except BadRequest as ex:
                            if not ex.message.startswith("DirEntry already exists"):
                                raise
                            # Else: Concurrent create
        except Exception as ex:
            log.warn("_ensure_parents_exist(): Error creating directory parents", exc_info=True)
        return pe_list

    def _cleanup_outdated_entries(self, dir_entries, common="key"):
        """
        This function takes all DirEntry from the list and removes all but the most recent one
        by ts_updated timestamp. It returns the most recent DirEntry and removes the others by
        direct datastore operations. If there are multiple entries with most recent timestamp, the
        first encountered is kept and the others non-deterministically removed.
        Note: This operation can be called for DirEntries without common keys, e.g. for all
        entries registering an agent for a device.
        """
        if not dir_entries:
            return
        newest_entry = dir_entries[0]
        try:
            for de in dir_entries:
                if int(de.ts_updated) > int(newest_entry.ts_updated):
                    newest_entry = de

            remove_list = [de for de in dir_entries if de is not newest_entry]

            log.info("Attempting to cleanup these directory entries: %s" % remove_list)
            for de in remove_list:
                try:
                    self.dir_store.delete(de)
                except Exception as ex:
                    log.warn("Removal of outdated %s directory entry failed: %s" % (common, de))
            log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common))

        except Exception as ex:
            log.warn("Cleanup of multiple directory entries for %s failed: %s" % (
                common, str(ex)))

        return newest_entry
Пример #25
0
class SimpleResourceAgent(BaseSimpleResourceAgent):
    """
    A resource agent is an ION process of type "agent" that exposes the standard
    resource agent service interface.
    """

    process_type = "agent"

    # Override in subclass to publish specific types of events
    COMMAND_EVENT_TYPE = "ResourceCommandEvent"
    # Override in subclass to set specific origin type
    ORIGIN_TYPE = "Resource"

    def __init__(self, *args, **kwargs):
        super(SimpleResourceAgent, self).__init__(*args, **kwargs)

        # The ID of the AgentInstance subtype resource object
        self.agent_id = None
        # The ID of the AgentDefinition subtype resource object
        self.agent_def_id = None
        # The ID of the target resource object, e.g. a device id
        self.resource_id = None
        # The Resource Type of the target resource object - ex. InstrumentDevice or PlatformDevice
        #Must be set by Implementing Class
        self.resource_type = None

    def _on_init(self):
        log.debug("Resource Agent initializing. name=%s, resource_id=%s" % (self._proc_name, self.resource_id))
        self._event_publisher = EventPublisher()

    def _on_quit(self):
        pass


    ##############################################################
    # Governance interfaces and helpers
    ##############################################################

    def _get_process_org_governance_name(self):
        '''
        Look for the org_name associated with this process, default to System root
        '''
        if hasattr(self,'org_governance_name'):
            org_governance_name = self.org_governance_name
            log.debug("Getting org_governance_name from process: " + org_governance_name)
        else:
            org_governance_name = self.container.governance_controller.system_root_org_name
            log.debug("Getting org_governance_name from container: " + org_governance_name)

        return org_governance_name


    def negotiate(self, resource_id="", sap_in=None):
        pass

    def execute(self, resource_id="", command=None):
        return self._execute("rcmd_", command)

    def execute_agent(self, resource_id="", command=None):
        return self._execute("acmd_", command)

    def _execute(self, cprefix, command):
        if not command:
            raise iex.BadRequest("execute argument 'command' not present")
        if not command.command:
            raise iex.BadRequest("command not set")

        cmd_res = IonObject("AgentCommandResult", command_id=command.command_id, command=command.command)
        cmd_func = getattr(self, cprefix + str(command.command), None)
        if cmd_func:
            cmd_res.ts_execute = get_ion_ts()
            try:
                res = cmd_func(*command.args, **command.kwargs)
                cmd_res.status = 0
                cmd_res.result = res
            except iex.IonException as ex:
                # TODO: Distinguish application vs. uncaught exception
                cmd_res.status = getattr(ex, 'status_code', -1)
                cmd_res.result = str(ex)
                log.warn("Agent command %s failed with trace=%s" % (command.command, traceback.format_exc()))
        else:
            log.info("Agent command not supported: %s" % (command.command))
            ex = iex.NotFound("Command not supported: %s" % command.command)
            cmd_res.status = iex.NotFound.status_code
            cmd_res.result = str(ex)

        sub_type = "%s.%s" % (command.command, cmd_res.status)
        event_data = self._post_execute_event_hook(event_type=self.COMMAND_EVENT_TYPE,
            origin=self.resource_id, origin_type=self.ORIGIN_TYPE,
            sub_type=sub_type, command=str(command.command),
            result=str(cmd_res.result))
        post_event = self._event_publisher.publish_event(**event_data)

        return cmd_res

    def _post_execute_event_hook(self, **kwargs):
        """
        Hook to add additional values to the event object to be published
        @param event  A filled out even object of type COMMAND_EVENT_TYPE
        @retval an event object
        """
        return kwargs

    def get_capabilities(self, resource_id="", capability_types=[]):
        capability_types = capability_types or ["CONV_TYPE", "AGT_CMD", "AGT_PAR", "RES_CMD", "RES_PAR"]
        cap_list = []
        if "CONV_TYPE" in capability_types:
            cap_list.extend([("CONV_TYPE", cap) for cap in self._get_agent_conv_types()])
        if "AGT_CMD" in capability_types:
            cap_list.extend([("AGT_CMD", cap) for cap in self._get_agent_commands()])
        if "AGT_PAR" in capability_types:
            cap_list.extend([("AGT_PAR", cap) for cap in self._get_agent_params()])
        if "RES_CMD" in capability_types:
            cap_list.extend([("RES_CMD", cap) for cap in self._get_resource_commands()])
        if "RES_PAR" in capability_types:
            cap_list.extend([("RES_PAR", cap) for cap in self._get_resource_params()])
        return cap_list

    def set_param(self, resource_id="", name='', value=''):
        if not hasattr(self, "rpar_%s" % name):
            raise iex.NotFound('Resource parameter not existing: %s' % name)
        pvalue = getattr(self, "rpar_%s" % name)
        setattr(self, "rpar_%s" % name, value)
        return pvalue

    def get_param(self, resource_id="", name=''):
        try:
            return getattr(self, "rpar_%s" % name)
        except AttributeError:
            raise iex.NotFound('Resource parameter not found: %s' % name)

    def set_agent_param(self, resource_id="", name='', value=''):
        if not hasattr(self, "apar_%s" % name):
            raise iex.NotFound('Agent parameter not existing: %s' % name)
        pvalue = getattr(self, "apar_%s" % name)
        setattr(self, "apar_%s" % name, value)
        return pvalue

    def get_agent_param(self, resource_id="", name=''):
        try:
            return getattr(self, "apar_%s" % name)
        except AttributeError:
            raise iex.NotFound('Agent parameter not found: %s' % name)

    def _get_agent_conv_types(self):
        return []

    def _get_agent_params(self):
        return self._get_names(self, "apar_")

    def _get_agent_commands(self):
        return self._get_names(self, "acmd_")

    def _get_resource_params(self):
        return self._get_names(self, "rpar_")

    def _get_resource_commands(self):
        return self._get_names(self, "rcmd_")

    def _get_names(self, obj, prefix):
        return [name[len(prefix):] for name in dir(obj) if name.startswith(prefix)]
Пример #26
0
class Directory(object):
    """
    Frontend to a directory functionality backed by a datastore.
    A directory is a system wide datastore backend tree of entries with attributes and child entries.
    Entries can be identified by a path. The root is '/'.
    Every Org can have its own directory. The default directory is for the root Org (ION).
    """

    def __init__(self, orgname=None, datastore_manager=None, container=None):
        self.container = container or bootstrap.container_instance
        # Get an instance of datastore configured as directory.
        datastore_manager = datastore_manager or self.container.datastore_manager
        self.dir_store = datastore_manager.get_datastore(DataStore.DS_DIRECTORY, DataStore.DS_PROFILE.DIRECTORY)

        self.orgname = orgname or CFG.system.root_org
        self.is_root = (self.orgname == CFG.system.root_org)
        self.events_enabled = CFG.get_safe("service.directory.publish_events") is True   # Publish change events?

        self.event_pub = None
        self.event_sub = None

    def start(self):
        if self.events_enabled:
            # init change event publisher
            self.event_pub = EventPublisher()

            # Register to receive directory changes
            # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent",
            #                                  origin="Directory",
            #                                  callback=self.receive_directory_change_event)

        # Create directory root entry (for current org) if not existing
        self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True)

    def stop(self):
        self.close()

    def close(self):
        """
        Close directory and all resources including datastore and event listener.
        """
        if self.event_sub:
            self.event_sub.deactivate()
        self.dir_store.close()

    # -------------------------------------------------------------------------
    # Directory register, lookup and find

    def lookup(self, parent, key=None, return_entry=False):
        """
        Read directory entry by key and parent node.
        @param return_entry  If True, returns DirEntry object if found, otherwise DirEntry attributes dict
        @retval Either current DirEntry attributes dict or DirEntry object or None if not found.
        """
        path = self._get_path(parent, key) if key else parent
        direntry = self._read_by_path(path)
        if return_entry:
            return direntry
        else:
            return direntry.attributes if direntry else None

    def lookup_mult(self, parent, keys=None, return_entry=False):
        """
        Read several directory entries by keys from the same parent node.
        @param return_entry  If True, returns DirEntry object if found, otherwise DirEntry attributes dict
        @retval Either list of current DirEntry attributes dict or DirEntry object or None if not found.
        """
        direntry_list = self._read_by_path(parent, mult_keys=keys)
        if return_entry:
            return direntry_list
        else:
            return [direntry.attributes if direntry else None for direntry in direntry_list]

    def register(self, parent, key, create_only=False, return_entry=False, ensure_parents=True, **kwargs):
        """
        Add/replace an entry within directory, below a parent node or "/" root.
        Note: Replaces (not merges) the attribute values of the entry if existing.
        register will fail when a concurrent write was detected, meaning that the other writer wins.
        @param create_only  If True, does not change an already existing entry
        @param return_entry  If True, returns DirEntry object of prior entry, otherwise DirEntry attributes dict
        @param ensure_parents  If True, make sure that parent nodes exist
        @retval  DirEntry if previously existing
        """
        if not (parent and key):
            raise BadRequest("Illegal arguments")
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal arguments: parent")

        dn = self._get_path(parent, key)
        log.debug("Directory.register(%s): %s", dn, kwargs)

        entry_old = None
        cur_time = get_ion_ts()
        # Must read existing entry by path to make sure to not create path twice
        direntry = self._read_by_path(dn)
        if direntry and create_only:
            # We only wanted to make sure entry exists. Do not change
            # NOTE: It is ambiguous to the caller whether we ran into this situation. Seems OK.
            return direntry if return_entry else direntry.attributes
        elif direntry:
            old_rev, old_ts, old_attr = direntry._rev, direntry.ts_updated, direntry.attributes
            direntry.attributes = kwargs
            direntry.ts_updated = cur_time
            try:
                self.dir_store.update(direntry)

                if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                    self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                                 origin=self.orgname + ".DIR", origin_type="DIR",
                                                 key=key, parent=parent, org=self.orgname,
                                                 sub_type="REGISTER." + parent[1:].replace("/", "."),
                                                 mod_type=DirectoryModificationType.UPDATE)
            except Conflict:
                # Concurrent update - we accept that we finished the race second and give up
                log.warn("Concurrent update to %s detected. We lost: %s", dn, kwargs)

            if return_entry:
                # Reset object back to prior state
                direntry.attributes = old_attr
                direntry.ts_updated = old_ts
                direntry._rev = old_rev
                entry_old = direntry
            else:
                entry_old = old_attr
        else:
            direntry = self._create_dir_entry(parent, key, attributes=kwargs, ts=cur_time)
            if ensure_parents:
                self._ensure_parents_exist([direntry])
            try:
                self.dir_store.create(direntry, create_unique_directory_id())
                if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                    self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                                 origin=self.orgname + ".DIR", origin_type="DIR",
                                                 key=key, parent=parent, org=self.orgname,
                                                 sub_type="REGISTER." + parent[1:].replace("/", "."),
                                                 mod_type=DirectoryModificationType.CREATE)
            except BadRequest as ex:
                if not ex.message.startswith("DirEntry already exists"):
                    raise
                # Concurrent create - we accept that we finished the race second and give up
                log.warn("Concurrent create of %s detected. We lost: %s", dn, kwargs)

        return entry_old

    def register_safe(self, parent, key, **kwargs):
        """
        Use this method to protect caller from any form of directory register error
        """
        try:
            return self.register(parent, key, **kwargs)
        except Exception as ex:
            log.exception("Error registering path=%s/%s, args=%s", parent, key, kwargs)

    def register_mult(self, entries):
        """
        Registers multiple directory entries efficiently in one datastore access.
        Note: this fails if entries are already existing, so works for create only.
        """
        if type(entries) not in (list, tuple):
            raise BadRequest("Bad entries type")
        de_list = []
        cur_time = get_ion_ts()
        for parent, key, attrs in entries:
            direntry = self._create_dir_entry(parent, key, attributes=attrs, ts=cur_time)
            de_list.append(direntry)
        pe_list = self._ensure_parents_exist(de_list, create=False)
        de_list.extend(pe_list)
        deid_list = [create_unique_directory_id() for i in xrange(len(de_list))]
        self.dir_store.create_mult(de_list, deid_list)

        if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
            for de in de_list:
                self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                             origin=self.orgname + ".DIR", origin_type="DIR",
                                             key=de.key, parent=de.parent, org=self.orgname,
                                             sub_type="REGISTER." + de.parent[1:].replace("/", "."),
                                             mod_type=DirectoryModificationType.CREATE)

    def unregister(self, parent, key=None, return_entry=False):
        """
        Remove entry from directory.
        Returns attributes of deleted DirEntry
        """
        path = self._get_path(parent, key) if key else parent
        log.debug("Removing content at path %s" % path)

        direntry = self._read_by_path(path)
        if direntry:
            self.dir_store.delete(direntry)
            if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                             origin=self.orgname + ".DIR", origin_type="DIR",
                                             key=key, parent=parent, org=self.orgname,
                                             sub_type="UNREGISTER." + parent[1:].replace("/", "."),
                                             mod_type=DirectoryModificationType.DELETE)

        if direntry and not return_entry:
            return direntry.attributes
        else:
            return direntry

    def unregister_safe(self, parent, key):
        try:
            return self.unregister(parent, key)
        except Exception as ex:
            log.exception("Error unregistering path=%s/%s", parent, key)

    def find_child_entries(self, parent='/', direct_only=True, **kwargs):
        """
        Return all child entries (ordered by path) for the given parent path.
        Does not return the parent itself. Optionally returns child of child entries.
        Additional kwargs are applied to constrain the search results (limit, descending, skip).
        @param parent  Path to parent (must start with "/")
        @param direct_only  If False, includes child of child entries
        @retval  A list of DirEntry objects for the matches
        """
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal argument parent: %s" % parent)
        if direct_only:
            start_key = [self.orgname, parent, 0]
            end_key = [self.orgname, parent]
            res = self.dir_store.find_by_view('directory', 'by_parent',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)
        else:
            path = parent[1:].split("/")
            start_key = [self.orgname, path, 0]
            end_key = [self.orgname, list(path) + ["ZZZZZZ"]]
            res = self.dir_store.find_by_view('directory', 'by_path',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def find_by_key(self, key=None, parent='/', **kwargs):
        """
        Returns a list of DirEntry for each directory entry that matches the given key name.
        If a parent is provided, only checks in this parent and all subtree.
        These entries are in the same org's directory but have different parents.
        """
        if key is None:
            raise BadRequest("Illegal arguments")
        if parent is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, key, parent]
        end_key = [self.orgname, key, parent + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_key',
            start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def find_by_value(self, subtree='/', attribute=None, value=None, **kwargs):
        """
        Returns a list of DirEntry with entries that have an attribute with the given value.
        """
        if attribute is None:
            raise BadRequest("Illegal arguments")
        if subtree is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, attribute, value, subtree]
        end_key = [self.orgname, attribute, value, subtree + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_attribute',
                        start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def remove_child_entries(self, parent, delete_parent=False):
        pass

    # -------------------------------------------------------------------------
    #  Concurrency Control

    def acquire_lock(self, key, timeout=LOCK_EXPIRES_DEFAULT, lock_holder=None, lock_info=None):
        """
        Attempts to atomically acquire a lock with the given key and namespace.
        If holder is given and holder already has the lock, renew.
        Checks for expired locks.
        @param timeout  Secs until lock expiration or 0 for no expiration
        @param lock_holder  Str value identifying lock holder for subsequent exclusive access
        @param lock_info  Dict value for additional attributes describing lock
        @retval  bool - could lock be acquired?
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_attrs = {LOCK_EXPIRES_ATTR: get_ion_ts_millis() + int(1000*timeout) if timeout else 0,
                      LOCK_HOLDER_ATTR: lock_holder or ""}
        if lock_info:
            lock_attrs.update(lock_info)
        expires = int(lock_attrs[LOCK_EXPIRES_ATTR])  # Check type just to be sure
        if expires and get_ion_ts_millis() > expires:
            raise BadRequest("Invalid lock expiration value: %s", expires)

        direntry = self._create_dir_entry(LOCK_DIR_PATH, key, attributes=lock_attrs)
        lock_result = False
        try:
            # This is an atomic operation. It relies on the unique key constraint of the directory service
            self.dir_store.create(direntry, create_unique_directory_id())
            lock_result = True
        except BadRequest as ex:
            if ex.message.startswith("DirEntry already exists"):
                de_old = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
                if de_old:
                    if self._is_lock_expired(de_old):
                        # Lock is expired: remove, try to relock
                        # Note: even as holder, it's safer to reacquire in this case than renew
                        log.warn("Removing expired lock: %s/%s", de_old.parent, de_old.key)
                        try:
                            # This is safe, because of lock was deleted + recreated in the meantime, it has different id
                            self._delete_lock(de_old)
                            # Try recreate - may fail again due to concurrency
                            self.dir_store.create(direntry, create_unique_directory_id())
                            lock_result = True
                        except BadRequest as ex:
                            if not ex.message.startswith("DirEntry already exists"):
                                log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                        except Exception:
                            log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                    elif lock_holder and de_old.attributes[LOCK_HOLDER_ATTR] == lock_holder:
                        # Holder currently holds the lock: renew
                        log.debug("Renewing lock %s/%s for holder %s", de_old.parent, de_old.key, lock_holder)
                        de_old.attributes = lock_attrs
                        try:
                            self.dir_store.update(de_old)
                            lock_result = True
                        except Exception:
                            log.exception("Error renewing expired lock %s", de_old.key)
                # We do nothing if we could not find the lock now...
            else:
                raise

        log.debug("Directory.acquire_lock(%s): %s -> %s", key, lock_attrs, lock_result)

        return lock_result

    def is_locked(self, key):
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
        return lock_entry and not self._is_lock_expired(lock_entry)

    def release_lock(self, key, lock_holder=None):
        """
        Releases lock identified by key.
        Raises NotFound if lock does not exist.
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        log.debug("Directory.release_lock(%s)", key)

        dir_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
        if dir_entry:
            if lock_holder and dir_entry.attributes[LOCK_HOLDER_ATTR] != lock_holder:
                raise BadRequest("Cannot release lock - not currently lock holder")
            self._delete_lock(dir_entry)
        else:
            raise NotFound("Lock %s not found" % key)

    def release_expired_locks(self):
        """Removes all expired locks
        """
        de_list = self.find_child_entries(LOCK_DIR_PATH, direct_only=True)
        for de in de_list:
            if self._is_lock_expired(de):
                log.warn("Removing expired lock %s/%s", de.parent, de.key)
                try:
                    # This is safe, because if lock was deleted + recreated in the meantime, it has different id
                    self._delete_lock(de)
                except Exception:
                    log.exception("Error releasing expired lock %s", de.key)

    def _is_lock_expired(self, lock_entry):
        if not lock_entry:
            raise BadRequest("No lock entry provided")
        return 0 < lock_entry.attributes[LOCK_EXPIRES_ATTR] <= get_ion_ts_millis()

    def _delete_lock(self, lock_entry):
        lock_entry_id = lock_entry._id
        self.dir_store.delete(lock_entry_id)

    # -------------------------------------------------------------------------
    # Internal functions

    def receive_directory_change_event(self, event_msg, headers):
        # @TODO add support to fold updated config into container config
        pass


    def _get_path(self, parent, key):
        """
        Returns the qualified directory path for a directory entry.
        """
        if parent == "/":
            return parent + key
        elif parent.startswith("/"):
            return parent + "/" + key
        else:
            raise BadRequest("Illegal parent: %s" % parent)

    def _get_key(self, path):
        """
        Returns the key from a qualified directory path
        """
        parent, key = path.rsplit("/", 1)
        return key

    def _create_dir_entry(self, parent, key, orgname=None, ts=None, attributes=None):
        """
        Standard way to create a DirEntry object.
        """
        orgname = orgname or self.orgname
        ts = ts or get_ion_ts()
        attributes = attributes if attributes is not None else {}
        parent = parent or "/"
        de = DirEntry(org=orgname, parent=parent, key=key, attributes=attributes, ts_created=ts, ts_updated=ts)
        return de

    def _read_by_path(self, path, orgname=None, mult_keys=None):
        """
        Given a qualified path, find entry in directory and return DirEntry object or None if not found.
        """
        if path is None:
            raise BadRequest("Illegal arguments")
        orgname = orgname or self.orgname
        if mult_keys:
            parent = path or "/"
            key = mult_keys
        else:
            parent, key = path.rsplit("/", 1)
            parent = parent or "/"
        find_key = [orgname, key, parent]
        view_res = self.dir_store.find_by_view('directory', 'by_key', key=find_key, id_only=True, convert_doc=True)

        match = [doc for docid, index, doc in view_res]
        if mult_keys:
            entries_by_key = {doc.key: doc for doc in match}
            entries = [entries_by_key.get(key, None) for key in mult_keys]
            return entries
        else:
            if len(match) > 1:
                log.error("More than one directory entry found for key %s" % path)
                return match[0]
            elif match:
                return match[0]
            return None

    def _get_unique_parents(self, entry_list):
        """Returns a sorted, unique list of parents of DirEntries (excluding the root /)"""
        if entry_list and type(entry_list) not in (list, tuple):
            entry_list = [entry_list]
        parents = set()
        for entry in entry_list:
            parents.add(entry.parent)
        if "/" in parents:
            parents.remove("/")
        return sorted(parents)

    def _ensure_parents_exist(self, entry_list, create=True):
        parents_list = self._get_unique_parents(entry_list)
        pe_list = []
        try:
            for parent in parents_list:
                pe = self.lookup(parent)
                if pe is None:
                    pp, pk = parent.rsplit("/", 1)
                    direntry = self._create_dir_entry(parent=pp, key=pk)
                    pe_list.append(direntry)
                    if create:
                        try:
                            self.dir_store.create(direntry, create_unique_directory_id())
                        except BadRequest as ex:
                            if not ex.message.startswith("DirEntry already exists"):
                                raise
                            # Else: Concurrent create
        except Exception as ex:
            log.warn("_ensure_parents_exist(): Error creating directory parents", exc_info=True)
        return pe_list

    def _cleanup_outdated_entries(self, dir_entries, common="key"):
        """
        This function takes all DirEntry from the list and removes all but the most recent one
        by ts_updated timestamp. It returns the most recent DirEntry and removes the others by
        direct datastore operations. If there are multiple entries with most recent timestamp, the
        first encountered is kept and the others non-deterministically removed.
        Note: This operation can be called for DirEntries without common keys, e.g. for all
        entries registering an agent for a device.
        """
        if not dir_entries:
            return
        newest_entry = dir_entries[0]
        try:
            for de in dir_entries:
                if int(de.ts_updated) > int(newest_entry.ts_updated):
                    newest_entry = de

            remove_list = [de for de in dir_entries if de is not newest_entry]

            log.info("Attempting to cleanup these directory entries: %s" % remove_list)
            for de in remove_list:
                try:
                    self.dir_store.delete(de)
                except Exception as ex:
                    log.warn("Removal of outdated %s directory entry failed: %s" % (common, de))
            log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common))

        except Exception as ex:
            log.warn("Cleanup of multiple directory entries for %s failed: %s" % (
                common, str(ex)))

        return newest_entry
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition(
            'lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(
            data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = [
            'offset_document'
        ]
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc, _ = self.rrclient.create_association(
            subject=data_product_id,
            object=data_producer_id,
            predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)

        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a': 2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                   predicate=PRED.hasStream,
                                                   id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0]))

        svm.stored_value_cas('updated_document', {'offset_a': 3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id,
                         reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2)  # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0, 23.0]))
Пример #28
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0) for i in xrange(4)]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()
            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.*", callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.a", callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="*.a", callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")
Пример #29
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        # Install the container tracer (could be its own
        self.container_tracer = ContainerTracer()
        self.container_tracer.start_tracing()
        self.container.tracer = CallTracer
        self.container.tracer.configure(CFG.get_safe("container.tracer", {}))

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.info('ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container_tracer.stop_tracing()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
class ScienceGranuleIngestionWorker(TransformStreamListener, BaseIngestionWorker):
    CACHE_LIMIT=CFG.get_safe('container.ingestion_cache',5)

    def __init__(self, *args,**kwargs):
        TransformStreamListener.__init__(self, *args, **kwargs)
        BaseIngestionWorker.__init__(self, *args, **kwargs)

        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets  = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()



    def on_start(self): #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)
        
        self.queue_name = self.CFG.get_safe('process.queue_name',self.id)
        self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback)
        self.thread_lock = RLock()
        
        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True)
        self.ignore_gaps = self.CFG.get_safe('service.ingestion.ignore_gaps', True)
        if not self.ignore_gaps:
            log.warning("Gap handling is not supported in release 2")
        self.ignore_gaps = True
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        self.connection_id = ''
        self.connection_index = None
        
        self.start_listener()

    def on_quit(self): #pragma no cover
        self.event_publisher.close()
        self.qc_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
        self._coverages.clear()
        TransformStreamListener.on_quit(self)
        BaseIngestionWorker.on_quit(self)

    
    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            for stream, coverage in self._coverages.iteritems():
                try:
                    coverage.close(timeout=5)
                except:
                    log.exception('Problems closing the coverage')
            self._coverages.clear()
            self.subscriber_thread = None

    def pause(self):
        if self.subscriber_thread is not None:
            self.stop_listener()


    def resume(self):
        if self.subscriber_thread is None:
            self.start_listener()


    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = self.container.resource_registry
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,predicate=PRED.hasStream,object=stream_id,id_only=True)
        if datasets:
            return datasets[0]
        return None

    def _get_data_products(self, dataset_id):
        rr_client = self.container.resource_registry
        data_products, _ = rr_client.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct, id_only=False)
        return data_products


    def initialize_metadata(self, dataset_id, rdt):
        '''
        Initializes a metadata document in the object store. The document
        contains information about the bounds and extents of the dataset as
        well other metadata to improve performance.

        '''

        object_store = self.container.object_store
        key = dataset_id
        bounds = {}
        extents = {}
        last_values = {}
        rough_size = 0
        for k,v in rdt.iteritems():
            v = v[:].flatten()
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                bounds[k] = (np.min(v), np.max(v))
                last_values[k] = v[-1]
            extents[k] = len(rdt)
            rough_size += len(rdt) * 4

        doc = {'bounds':bounds, 'extents':extents, 'last_values':last_values, 'size': rough_size}
        doc = numpy_walk(doc)
        object_store.create_doc(doc, object_id=key)
        return 

    def update_metadata(self, dataset_id, rdt):
        '''
        Updates the metada document with the latest information available
        '''

        self.update_data_product_metadata(dataset_id, rdt)

        # Grab the document
        object_store = self.container.object_store
        key = dataset_id
        try:
            doc = object_store.read_doc(key)
        except NotFound:
            return self.initialize_metadata(dataset_id, rdt)
        # These are the fields we're interested in
        bounds = doc['bounds']
        extents = doc['extents']
        last_values = doc['last_values']
        rough_size = doc['size']
        for k,v in rdt.iteritems():
            if k not in bounds:
                continue

            v = v[:].flatten() # Get the numpy representation (dense array).
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                l_min = np.min(v)
                l_max = np.max(v)
                o_min, o_max = bounds[k]
                bounds[k] = (min(l_min, o_min), max(l_max, o_max))
                last_values[k] = v[-1]
            # Update the bounds
            # Increase the extents
            extents[k] = extents[k] + len(rdt)
            # How about the last value?

            rough_size += len(rdt) * 4
            doc['size'] = rough_size
        # Sanitize it
        doc = numpy_walk(doc)
        object_store.update_doc(doc)

    def update_data_product_metadata(self, dataset_id, rdt):
        data_products = self._get_data_products(dataset_id)
        for data_product in data_products:
            self.update_time(data_product, rdt[rdt.temporal_parameter][:])
            self.update_geo(data_product, rdt)
            self.container.resource_registry.update(data_product)

    def update_time(self, data_product, t):
        #TODO: Account for non NTP-based timestamps
        t_min = np.min(t)
        t_min -= 2208988800
        t_max = np.max(t)
        t_max -= 2208988800

        if not data_product.nominal_datetime.start_datetime:
            data_product.nominal_datetime.start_datetime = t_min
        data_product.nominal_datetime.end_datetime = t_max

    def update_geo(self, data_product, rdt):
        lat = None
        lon = None
        for p in rdt:
            if rdt._rd[p] is None:
                continue
            # TODO: Not an all encompassing list of acceptable names for lat and lon
            if p.lower() in ('lat', 'latitude', 'y_axis'):
                lat = np.asscalar(rdt[p][-1])
            elif p.lower() in ('lon', 'longitude', 'x_axis'):
                lon = np.asscalar(rdt[p][-1])
            if lat and lon:
                break

        if lat and lon:
            data_product.geospatial_bounds.geospatial_latitude_limit_north = lat
            data_product.geospatial_bounds.geospatial_latitude_limit_south = lat
            data_product.geospatial_bounds.geospatial_longitude_limit_east = lon
            data_product.geospatial_bounds.geospatial_longitude_limit_west = lon


    
    def get_dataset(self,stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    def gap_coverage(self,stream_id):
        try:
            old_cov = self._coverages.pop(stream_id)
            dataset_id = self.get_dataset(stream_id)
            sdom, tdom = time_series_domain()
            new_cov = DatasetManagementService._create_simplex_coverage(dataset_id, old_cov.parameter_dictionary, sdom, tdom, old_cov._persistence_layer.inline_data_writes)
            old_cov.close()
            result = new_cov
        except KeyError:
            result = self.get_coverage(stream_id)
        self._coverages[stream_id] = result
        return result


    def dataset_changed(self, dataset_id, extents, window):
        self.event_publisher.publish_event(origin=dataset_id, author=self.id, extents=extents, window=window)

    def evaluate_qc(self, rdt, dataset_id):
        if self.qc_enabled:
            for field in rdt.fields:
                if not (field.endswith('glblrng_qc') or field.endswith('loclrng_qc')):
                    continue
                try:
                    values = rdt[field]
                    if values is not None:
                        if not all(values):
                            topology = np.where(values==0)
                            timestamps = rdt[rdt.temporal_parameter][topology[0]]
                            self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
                except:
                    continue
    def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration):
        data_product_ids, _ = self.container.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct, id_only=True)
        for data_product_id in data_product_ids:
            description = 'Automated Quality Control Alerted on %s' % parameter
            self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration, description=description)

    def update_connection_index(self, connection_id, connection_index):
        self.connection_id = connection_id
        try:
            connection_index = int(connection_index)
            self.connection_index = connection_index
        except ValueError:
            pass

    def has_gap(self, connection_id, connection_index):
        if connection_id:
            if not self.connection_id:
                self.update_connection_index(connection_id, connection_index)
                return False
            else:
                if connection_id != self.connection_id:
                    return True
        if connection_index:
            if self.connection_index is None:
                self.update_connection_index(connection_id, connection_index)
                return False
            try:
                connection_index = int(connection_index)
                if connection_index != self.connection_index+1:
                    return True
            except ValueError:
                pass

        return False

    def splice_coverage(self, dataset_id, coverage):
        log.info('Splicing new coverage')
        DatasetManagementService._splice_coverage(dataset_id, coverage)

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s', msg)
            return


        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        """ retry writing coverage multiple times and eventually time out """
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                self.add_granule(stream_id, rdt)
                done = True
            except:
                log.exception('An issue with coverage, retrying after a bit')
                if (time.time() - start) > MAX_RETRY_TIME: # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error("We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id))
                    raise

                if stream_id in self._coverages:
                    log.info('Popping coverage for stream %s', stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2


    def expand_coverage(self, coverage, elements, stream_id):
        try:
            coverage.insert_timesteps(elements, oob=False)
        except IOError as e:
            log.error("Couldn't insert time steps for coverage: %s",
                      coverage.persistence_dir, exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
    
    def get_stored_values(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs
        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_value_manager.read_value(key)
                if lookup_value in document:
                    return document[lookup_value] 
            except NotFound:
                log.warning('Specified lookup document does not exist')
        return None


    def fill_lookup_values(self, rdt):
        rdt.fetch_lookup_values()
        for field in rdt.lookup_values():
            value = self.get_stored_values(rdt.context(field).lookup_value)
            if value:
                rdt[field] = value

    def insert_sparse_values(self, coverage, rdt, stream_id):

        self.fill_lookup_values(rdt)
        for field in rdt.fields:
            if rdt._rd[field] is None:
                continue
            if not isinstance(rdt.context(field).param_type, SparseConstantType):
                # We only set sparse values before insert
                continue 
            value = rdt[field]
            try:
                coverage.set_parameter_values(param_name=field, value=value)
            except ValueError as e:
                if "'lower_bound' cannot be >= 'upper_bound'" in e.message:
                    continue
                else:
                    raise
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir, exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

    def insert_values(self, coverage, rdt, stream_id):
        elements = len(rdt)

        start_index = coverage.num_timesteps - elements

        for k,v in rdt.iteritems():
            if isinstance(v, SparseConstantValue):
                continue
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir, exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)
            except IndexError as e:
                log.error("Value set: %s", v[:])
                data_products, _ = self.container.resource_registry.find_subjects(object=stream_id, predicate=PRED.hasStream, subject_type=RT.DataProduct)
                for data_product in data_products:
                    log.exception("Index exception with %s, trying to insert %s into coverage with shape %s", 
                                  data_product.name,
                                  k,
                                  v.shape)

    
        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(coverage.get_parameter_context('ingestion_timestamp').uom, t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp', tdoa=slice_, value=ntp_time)
    
    def add_granule(self,stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info('Message attempting to be inserted into bad coverage: %s',
                     DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            
        #--------------------------------------------------------------------------------
        # Gap Analysis
        #--------------------------------------------------------------------------------
        if not self.ignore_gaps:
            gap_found = self.has_gap(rdt.connection_id, rdt.connection_index)
            if gap_found:
                log.warning('Gap Found!   New connection: (%s,%s)\tOld Connection: (%s,%s)', rdt.connection_id, rdt.connection_index, self.connection_id, self.connection_index)
                self.gap_coverage(stream_id)



        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s', stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error("Couldn't open coverage: %s",
                      DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug('%s: add_granule stream %s dataset %s coverage %r file %s',
                      self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error('Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        elements = len(rdt)
        if rdt[rdt.temporal_parameter] is None:
            elements = 0 

        self.insert_sparse_values(coverage,rdt,stream_id)
        
        if debugging:
            timer.complete_step('checks') # lightweight ops, should be zero
        
        self.expand_coverage(coverage, elements, stream_id)
        
        if debugging:
            timer.complete_step('insert')

        self.insert_values(coverage, rdt, stream_id)
        
        if debugging:
            timer.complete_step('keys')
        
        DatasetManagementService._save_coverage(coverage)
        
        if debugging:
            timer.complete_step('save')
        
        start_index = coverage.num_timesteps - elements

        if not self.ignore_gaps and gap_found:
            self.splice_coverage(dataset_id, coverage)

        self.evaluate_qc(rdt, dataset_id)
        
        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)

        self.update_connection_index(rdt.connection_id, rdt.connection_index)

        self.update_metadata(dataset_id, rdt)
        self.dataset_changed(dataset_id,coverage.num_timesteps,(start_index,start_index+elements))

    def _add_timing_stats(self, timer):
        """ add stats from latest coverage operation to Accumulator and periodically log results """
        self.time_stats.add(timer)
        if self.time_stats.get_count() % REPORT_FREQUENCY>0:
            return

        if log.isEnabledFor(TRACE):
            # report per step
            for step in 'checks', 'insert', 'keys', 'save', 'notify':
                log.debug('%s step %s times: %s', self._id, step, self.time_stats.to_string(step))
        # report totals
        log.debug('%s total times: %s', self._id, self.time_stats)
Пример #31
0
class QCPostProcessing(SimpleProcess):
    '''
    QC Post Processing Process

    This process provides the capability to ION clients and operators to evaluate the automated quality control flags on
    various data products. This process should be run periodically with overlapping spans of data to ensure complete
    dataset QC verification.

    This parameters that this process accepts as configurations are:
        - dataset_id: The dataset identifier, required.
        - start_time: Unix timestamp, defaults to 24 hours in the past
        - end_time: Unix timestamp, defaults to current time
        - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc',
          'spketst_qc', 'stuckvl_qc'], defaults to all

    '''

    qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc']
    def on_start(self):
        SimpleProcess.on_start(self)
        self.data_retriever = DataRetrieverServiceProcessClient(process=self)
        self.interval_key = self.CFG.get_safe('process.interval_key',None)
        self.qc_params    = self.CFG.get_safe('process.qc_params',[])
        validate_is_not_none(self.interval_key, 'An interval key is necessary to paunch this process')
        self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True)
        self.add_endpoint(self.event_subscriber)
        self.resource_registry = self.container.resource_registry
        self.run_interval = self.CFG.get_safe('service.qc_processing.run_interval', 24)
    
    def _event_callback(self, *args, **kwargs):
        log.info('QC Post Processing Triggered')
        dataset_ids, _ = self.resource_registry.find_resources(restype=RT.Dataset, id_only=True)
        for dataset_id in dataset_ids:
            log.info('QC Post Processing for dataset %s', dataset_id)
            try:
                self.process(dataset_id)
            except BadRequest as e:
                if 'Problems reading from the coverage' in e.message:
                    log.error('Failed to read from dataset %s', dataset_id, exc_info=True)

    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600*(self.run_interval+1))) # Every N hours with 1 of overlap
        end_time   = end_time or now
        
        qc_params  = [i for i in self.qc_params if i in self.qc_suffixes] or self.qc_suffixes
        
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st,et in self.chop(int(start_time),int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug("Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et)
            try:
                granule = self.data_retriever.retrieve(dataset_id, query={'start_time':st, 'end_time':et})
            except BadRequest:
                data_products, _ = self.container.resource_registry.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct)
                for data_product in data_products:
                    log.exception('Failed to perform QC Post Processing on %s', data_product.name)
                    log.error('Calculated Start Time: %s', st)
                    log.error('Calculated End Time:   %s', et)
                raise
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [i for i in rdt.fields if any([i.endswith(j) for j in qc_params])]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val==0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field, timestamps.tolist(),{})



    def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration):
        log.info('Flagging QC for %s', parameter)
        data_product_ids, _ = self.resource_registry.find_subjects(object=dataset_id, subject_type=RT.DataProduct, predicate=PRED.hasDataset, id_only=True)
        for data_product_id in data_product_ids:
            self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration)

    @classmethod
    def chop(cls, start_time, end_time):
        while start_time < end_time:
            yield (start_time, min(start_time+3600, end_time))
            start_time = min(start_time+3600, end_time)
        return
Пример #32
0
class ScienceGranuleIngestionWorker(TransformStreamListener,
                                    BaseIngestionWorker):
    CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5)

    def __init__(self, *args, **kwargs):
        TransformStreamListener.__init__(self, *args, **kwargs)
        BaseIngestionWorker.__init__(self, *args, **kwargs)

        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()

    def on_start(self):  #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)

        self.queue_name = self.CFG.get_safe('process.queue_name', self.id)
        self.subscriber = StreamSubscriber(process=self,
                                           exchange_name=self.queue_name,
                                           callback=self.receive_callback)
        self.thread_lock = RLock()

        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(
            from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.input_product = self.CFG.get_safe('process.input_product', '')
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.connection_id = ''
        self.connection_index = None

        self.start_listener()

    def on_quit(self):  #pragma no cover
        self.event_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
        self._coverages.clear()
        TransformStreamListener.on_quit(self)
        BaseIngestionWorker.on_quit(self)

    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(
                self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            for stream, coverage in self._coverages.iteritems():
                try:
                    coverage.close(timeout=5)
                except:
                    log.exception('Problems closing the coverage')
            self._coverages.clear()
            self.subscriber_thread = None

    def pause(self):
        if self.subscriber_thread is not None:
            self.stop_listener()

    def resume(self):
        if self.subscriber_thread is None:
            self.start_listener()

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = self.container.resource_registry
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,
                                              predicate=PRED.hasStream,
                                              object=stream_id,
                                              id_only=True)
        if datasets:
            return datasets[0]
        return None

    def _get_data_products(self, dataset_id):
        rr_client = self.container.resource_registry
        data_products, _ = rr_client.find_subjects(object=dataset_id,
                                                   predicate=PRED.hasDataset,
                                                   subject_type=RT.DataProduct,
                                                   id_only=False)
        return data_products

    #--------------------------------------------------------------------------------
    # Metadata Handlers
    #--------------------------------------------------------------------------------

    def initialize_metadata(self, dataset_id, rdt):
        '''
        Initializes a metadata document in the object store. The document
        contains information about the bounds and extents of the dataset as
        well other metadata to improve performance.

        '''

        object_store = self.container.object_store
        key = dataset_id
        bounds = {}
        extents = {}
        last_values = {}
        rough_size = 0
        for k, v in rdt.iteritems():
            v = v[:].flatten()
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                bounds[k] = (np.min(v), np.max(v))
                last_values[k] = v[-1]
            extents[k] = len(rdt)
            rough_size += len(rdt) * 4

        doc = {
            'bounds': bounds,
            'extents': extents,
            'last_values': last_values,
            'size': rough_size
        }
        doc = numpy_walk(doc)
        object_store.create_doc(doc, object_id=key)
        return

    def update_metadata(self, dataset_id, rdt):
        '''
        Updates the metada document with the latest information available
        '''

        self.update_data_product_metadata(dataset_id, rdt)

        # Grab the document
        object_store = self.container.object_store
        key = dataset_id
        try:
            doc = object_store.read_doc(key)
        except NotFound:
            return self.initialize_metadata(dataset_id, rdt)
        # These are the fields we're interested in
        bounds = doc['bounds']
        extents = doc['extents']
        last_values = doc['last_values']
        rough_size = doc['size']
        for k, v in rdt.iteritems():
            if k not in bounds:
                continue

            v = v[:].flatten()  # Get the numpy representation (dense array).
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                l_min = np.min(v)
                l_max = np.max(v)
                o_min, o_max = bounds[k]
                bounds[k] = (min(l_min, o_min), max(l_max, o_max))
                last_values[k] = v[-1]
            # Update the bounds
            # Increase the extents
            extents[k] = extents[k] + len(rdt)
            # How about the last value?

            rough_size += len(rdt) * 4
            doc['size'] = rough_size
        # Sanitize it
        doc = numpy_walk(doc)
        object_store.update_doc(doc)

    def update_data_product_metadata(self, dataset_id, rdt):
        data_products = self._get_data_products(dataset_id)
        for data_product in data_products:
            self.update_time(data_product, rdt[rdt.temporal_parameter][:])
            self.update_geo(data_product, rdt)
            try:
                self.container.resource_registry.update(data_product)
            except:  # TODO: figure out WHICH Exception gets raised here when the bounds are off
                log.error("Problem updating the data product metadata",
                          exc_info=True)
                # Carry on :(

    def update_time(self, data_product, t):
        '''
        Sets the nominal_datetime for a data product correctly
        Accounts for things like NTP and out of order data
        '''

        t0, t1 = self.get_datetime_bounds(data_product)
        #TODO: Account for non NTP-based timestamps
        min_t = np.min(t) - 2208988800
        max_t = np.max(t) - 2208988800
        if t0:
            t0 = min(t0, min_t)
        else:
            t0 = min_t

        if t1:
            t1 = max(t1, max_t)
        else:
            t1 = max_t

        if t0 > t1:
            log.error("This should never happen but t0 > t1")

        data_product.nominal_datetime.start_datetime = float(t0)
        data_product.nominal_datetime.end_datetime = float(t1)

    def get_datetime(self, nominal_datetime):
        '''
        Returns a floating point value for the datetime or None if it's an
        empty string
        '''
        t = None
        # So normally this is a string
        if isinstance(nominal_datetime, (float, int)):
            t = nominal_datetime  # simple enough
        elif isinstance(nominal_datetime, basestring):
            if nominal_datetime:  # not an empty string
                # Try to convert it to a float
                try:
                    t = float(nominal_datetime)
                except ValueError:
                    pass
        return t

    def get_datetime_bounds(self, data_product):
        '''Returns the min and max for the bounds in the nominal_datetime
        attr
        '''

        t0 = self.get_datetime(data_product.nominal_datetime.start_datetime)
        t1 = self.get_datetime(data_product.nominal_datetime.end_datetime)
        return (t0, t1)

    def update_geo(self, data_product, rdt):
        '''
        Finds the maximum bounding box
        '''
        lat = None
        lon = None
        for p in rdt:
            if rdt._rd[p] is None:
                continue
            # TODO: Not an all encompassing list of acceptable names for lat and lon
            if p.lower() in ('lat', 'latitude', 'y_axis'):
                lat = np.asscalar(rdt[p][-1])
            elif p.lower() in ('lon', 'longitude', 'x_axis'):
                lon = np.asscalar(rdt[p][-1])
            if lat and lon:
                break

        if lat and lon:
            data_product.geospatial_bounds.geospatial_latitude_limit_north = lat
            data_product.geospatial_bounds.geospatial_latitude_limit_south = lat
            data_product.geospatial_bounds.geospatial_longitude_limit_east = lon
            data_product.geospatial_bounds.geospatial_longitude_limit_west = lon

    #--------------------------------------------------------------------------------
    # Cache managemnt
    #--------------------------------------------------------------------------------

    def get_dataset(self, stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id,
                                                                    mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    #--------------------------------------------------------------------------------
    # Granule Parsing and Handling
    #--------------------------------------------------------------------------------

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        '''
        The consumer callback to parse and manage the granule.
        The message is ACK'd once the function returns
        '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s',
                      msg)
            return

        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        '''
        A loop that tries to parse and store a granule for up to five minutes,
        and waits an increasing amount of time each iteration.
        '''
        done = False
        timeout = 2
        start = time.time()
        while not done:
            if self.parse_granule(stream_id, rdt, start, done):
                return  # We're all done, everything worked

            if (time.time() -
                    start) > MAX_RETRY_TIME:  # After a while, give up
                dataset_id = self.get_dataset(stream_id)
                log.error(
                    "We're giving up, the coverage needs to be inspected %s",
                    DatasetManagementService._get_coverage_path(dataset_id))
                raise

            if stream_id in self._coverages:
                log.info('Popping coverage for stream %s', stream_id)
                self._coverages.pop(stream_id)

            gevent.sleep(timeout)

            timeout = min(60 * 5, timeout * 2)

    def parse_granule(self, stream_id, rdt, start, done):
        try:
            self.add_granule(stream_id, rdt)
            return True
        except Exception as e:
            log.exception('An issue with coverage, retrying after a bit')
            return False
        return True  # never reaches here, Added for clarity

    def dataset_changed(self, dataset_id, window):
        self.event_publisher.publish_event(origin=dataset_id,
                                           author=self.id,
                                           window=window)

    def build_data_dict(self, rdt):
        np_dict = {}

        time_array = rdt[rdt.temporal_parameter]
        if time_array is None:
            raise ValueError("A granule needs a time array")
        for k, v in rdt.iteritems():
            # Sparse values are different and aren't constructed using NumpyParameterData
            if isinstance(rdt.param_type(k), SparseConstantType):
                value = v[0]
                if hasattr(value, 'dtype'):
                    value = np.asscalar(value)
                time_start = np.asscalar(time_array[0])
                np_dict[k] = ConstantOverTime(k,
                                              value,
                                              time_start=time_start,
                                              time_end=None)  # From now on
                continue
            elif isinstance(rdt.param_type(k), CategoryType):
                log.warning("Category types temporarily unsupported")
                continue
            elif isinstance(rdt.param_type(k), RecordType):
                value = v
            else:
                value = v

            try:
                if k == 'temp_sample':
                    print repr(value)
                np_dict[k] = NumpyParameterData(k, value, time_array)
            except:
                raise

        return np_dict

    def insert_values(self, coverage, rdt, stream_id):

        np_dict = self.build_data_dict(rdt)

        if 'ingestion_timestamp' in coverage.list_parameters():
            timestamps = np.array([(time.time() + 2208988800)
                                   for i in rdt[rdt.temporal_parameter]])
            np_dict['ingestion_timestamp'] = NumpyParameterData(
                'ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter])

        # If it's sparse only
        if self.sparse_only(rdt):
            del np_dict[rdt.temporal_parameter]

        try:
            coverage.set_parameter_values(np_dict)
        except IOError as e:
            log.error("Couldn't insert values for coverage: %s",
                      coverage.persistence_dir,
                      exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
        except KeyError as e:
            if 'has not been initialized' in e.message:
                coverage.refresh()
            raise
        except Exception as e:
            print repr(rdt)
            raise

    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        if rdt[rdt.temporal_parameter] is None:
            log.warning("Empty granule received")
            return

        # Parse the RDT and set hte values in the coverage
        self.insert_values(coverage, rdt, stream_id)

        # Force the data to be flushed
        DatasetManagementService._save_coverage(coverage)

        self.update_metadata(dataset_id, rdt)

        try:
            window = rdt[rdt.temporal_parameter][[0, -1]]
            window = window.tolist()
        except (ValueError, IndexError):
            window = None
        self.dataset_changed(dataset_id, window)

    def sparse_only(self, rdt):
        '''
        A sparse only rdt will have only a time array AND sparse values, no other data
        '''
        if rdt[rdt.temporal_parameter] is None:
            return False  # No time, so it's just empty

        at_least_one = False

        for key in rdt.iterkeys():
            # Skip time, that needs to be there
            if key == rdt.temporal_parameter:
                continue
            if not isinstance(rdt.param_type(key), SparseConstantType):
                return False
            else:
                at_least_one = True

        return at_least_one
class ScienceGranuleIngestionWorker(TransformStreamListener):
    CACHE_LIMIT=CFG.get_safe('container.ingestion_cache',5)

    def __init__(self, *args,**kwargs):
        super(ScienceGranuleIngestionWorker, self).__init__(*args, **kwargs)
        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets  = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()

    def on_start(self): #pragma no cover
        super(ScienceGranuleIngestionWorker,self).on_start()
        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True)
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.lookup_monitor.start()
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        self.connection_id = ''
        self.connection_index = None


    def on_quit(self): #pragma no cover
        super(ScienceGranuleIngestionWorker, self).on_quit()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
    
    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = ResourceRegistryServiceClient()
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,predicate=PRED.hasStream,object=stream_id,id_only=True)
        if datasets:
            return datasets[0]
        return None
    
    def get_dataset(self,stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    def gap_coverage(self,stream_id):
        try:
            old_cov = self._coverages.pop(stream_id)
            dataset_id = self.get_dataset(stream_id)
            sdom, tdom = time_series_domain()
            new_cov = DatasetManagementService._create_simplex_coverage(dataset_id, old_cov.parameter_dictionary, sdom, tdom, old_cov._persistence_layer.inline_data_writes)
            old_cov.close()
            result = new_cov
        except KeyError:
            result = self.get_coverage(stream_id)
        self._coverages[stream_id] = result
        return result


    def dataset_changed(self, dataset_id, extents, window):
        self.event_publisher.publish_event(origin=dataset_id, author=self.id, extents=extents, window=window)

    def evaluate_qc(self, rdt, dataset_id):
        if self.qc_enabled:
            for field in rdt.fields:
                if not field.endswith('_qc'):
                    continue
                try:
                    values = rdt[field]
                    if values is not None:
                        if not all(values):
                            topology = np.nonzero(values)
                            first_occurrence = topology[0][0]
                            ts = rdt[rdt.temporal_parameter][first_occurrence]
                            self.flag_qc_parameter(dataset_id, field, ts, {})
                except:
                    continue
    def flag_qc_parameter(self, dataset_id, parameter, temporal_value, configuration):
        self.qc_publisher.publish_event(origin=dataset_id, qc_parameter=parameter, temporal_value=temporal_value, configuration=configuration)

    def update_connection_index(self, connection_id, connection_index):
        self.connection_id = connection_id
        try:
            connection_index = int(connection_index)
            self.connection_index = connection_index
        except ValueError:
            pass

    def has_gap(self, connection_id, connection_index):
        if connection_id:
            if not self.connection_id:
                self.update_connection_index(connection_id, connection_index)
                return False
            else:
                if connection_id != self.connection_id:
                    return True
        if connection_index:
            if self.connection_index is None:
                self.update_connection_index(connection_id, connection_index)
                return False
            try:
                connection_index = int(connection_index)
                if connection_index != self.connection_index+1:
                    return True
            except ValueError:
                pass

        return False

    def splice_coverage(self, dataset_id, coverage):
        log.info('Splicing new coverage')
        DatasetManagementService._splice_coverage(dataset_id, coverage)

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s', msg)
            return


        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        """ retry writing coverage multiple times and eventually time out """
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                self.add_granule(stream_id, rdt)
                done = True
            except:
                log.exception('An issue with coverage, retrying after a bit')
                if (time.time() - start) > MAX_RETRY_TIME: # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error("We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id))
                    raise

                if stream_id in self._coverages:
                    log.info('Popping coverage for stream %s', stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2


    def expand_coverage(self, coverage, elements, stream_id):
        try:
            coverage.insert_timesteps(elements, oob=False)
        except IOError as e:
            log.error("Couldn't insert time steps for coverage: %s",
                      coverage.persistence_dir, exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
    
    def get_stored_values(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs
        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_value_manager.read_value(key)
                if lookup_value in document:
                    return document[lookup_value] 
            except NotFound:
                log.warning('Specified lookup document does not exist')
        return None


    def fill_lookup_values(self, rdt):
        rdt.fetch_lookup_values()
        for field in rdt.lookup_values():
            value = self.get_stored_values(rdt.context(field).lookup_value)
            if value:
                rdt[field] = value

    def insert_sparse_values(self, coverage, rdt, stream_id):

        self.fill_lookup_values(rdt)
        for field in rdt._lookup_values():
            if rdt[field] is None:
                continue
            if not isinstance(rdt.context(field).param_type, SparseConstantType):
                # We only set sparse values before insert
                continue 
            value = rdt[field]
            try:
                coverage.set_parameter_values(param_name=field, value=value)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir, exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

    def insert_values(self, coverage, rdt, stream_id):
        elements = len(rdt)

        start_index = coverage.num_timesteps - elements

        for k,v in rdt.iteritems():
            if isinstance(v, SparseConstantValue):
                continue
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir, exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)
    
        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(coverage.get_parameter_context('ingestion_timestamp').uom, t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp', tdoa=slice_, value=ntp_time)
    
    def add_granule(self,stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info('Message attempting to be inserted into bad coverage: %s',
                     DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            
        #--------------------------------------------------------------------------------
        # Gap Analysis
        #--------------------------------------------------------------------------------
        gap_found = self.has_gap(rdt.connection_id, rdt.connection_index)
        if gap_found:
            log.error('Gap Found!   New connection: (%s,%s)\tOld Connection: (%s,%s)', rdt.connection_id, rdt.connection_index, self.connection_id, self.connection_index)
            self.gap_coverage(stream_id)



        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s', stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error("Couldn't open coverage: %s",
                      DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug('%s: add_granule stream %s dataset %s coverage %r file %s',
                      self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error('Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        elements = len(rdt)

        self.insert_sparse_values(coverage,rdt,stream_id)
        
        if debugging:
            timer.complete_step('checks') # lightweight ops, should be zero
        
        self.expand_coverage(coverage, elements, stream_id)
        
        if debugging:
            timer.complete_step('insert')

        self.insert_values(coverage, rdt, stream_id)
        
        if debugging:
            timer.complete_step('keys')
        
        DatasetManagementService._save_coverage(coverage)
        
        if debugging:
            timer.complete_step('save')
        
        start_index = coverage.num_timesteps - elements
        self.dataset_changed(dataset_id,coverage.num_timesteps,(start_index,start_index+elements))

        if gap_found:
            self.splice_coverage(dataset_id, coverage)

        self.evaluate_qc(rdt, dataset_id)
        
        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)

        self.update_connection_index(rdt.connection_id, rdt.connection_index)

    def _add_timing_stats(self, timer):
        """ add stats from latest coverage operation to Accumulator and periodically log results """
        self.time_stats.add(timer)
        if self.time_stats.get_count() % REPORT_FREQUENCY>0:
            return

        if log.isEnabledFor(TRACE):
            # report per step
            for step in 'checks', 'insert', 'keys', 'save', 'notify':
                log.debug('%s step %s times: %s', self._id, step, self.time_stats.to_string(step))
        # report totals
        log.debug('%s total times: %s', self._id, self.time_stats)
Пример #34
0
class PDLocalBackend(object):
    """Scheduling backend to PD that manages processes in the local container

    This implementation is the default and is used in single-container
    deployments where there is no CEI launch to leverage.
    """

    # We attempt to make the local backend act a bit more like the real thing.
    # Process spawn requests are asynchronous (not completed by the time the
    # operation returns). Therefore, callers need to listen for events to find
    # the success of failure of the process launch. To make races here more
    # detectable, we introduce an artificial delay between when
    # schedule_process() returns and when the process is actually launched.
    SPAWN_DELAY = 0

    def __init__(self, container):
        self.container = container
        self.event_pub = EventPublisher()
        self._processes = []

        self._spawn_greenlets = set()

        # use the container RR instance -- talks directly to db
        self.rr = container.resource_registry

    def initialize(self):
        pass

    def shutdown(self):
        if self._spawn_greenlets:
            try:
                gevent.killall(list(self._spawn_greenlets), block=True)
            except Exception:
                log.warn("Ignoring error while killing spawn greenlets", exc_info=True)
            self._spawn_greenlets.clear()

    def set_system_boot(self, system_boot):
        pass

    def create_definition(self, definition, definition_id=None):
        pd_id, version = self.rr.create(definition, object_id=definition_id)
        return pd_id

    def read_definition(self, definition_id):
        return self.rr.read(definition_id)

    def read_definition_by_name(self, definition_name):
        raise ServerError("reading process definitions by name not supported by this backend")

    def update_definition(self, definition, definition_id):
        raise ServerError("updating process definitions not supported by this backend")

    def delete_definition(self, definition_id):
        return self.rr.delete(definition_id)

    def create(self, process_id, definition_id):
        if not self._get_process(process_id):
            self._add_process(process_id, {}, ProcessStateEnum.REQUESTED)
        return process_id

    def schedule(self, process_id, definition_id, schedule, configuration, name):

        definition = self.read_definition(definition_id)
        process = self._get_process(process_id)

        # in order for this local backend to behave more like the real thing,
        # we introduce an artificial delay in spawn requests. This helps flush
        # out races where callers try to use a process before it is necessarily
        # running.

        if self.SPAWN_DELAY:
            glet = gevent.spawn_later(self.SPAWN_DELAY, self._inner_spawn,
                process_id, definition, schedule, configuration)
            self._spawn_greenlets.add(glet)

            if process:
                process.process_configuration = configuration
            else:
                self._add_process(process_id, configuration, None)

        else:
            if process:
                process.process_configuration = configuration
            else:
                self._add_process(process_id, configuration, None)
            self._inner_spawn(process_id, name, definition, schedule, configuration)

        return process_id

    def _inner_spawn(self, process_id, process_name, definition, schedule, configuration):

        name = process_name
        module = definition.module
        cls = definition.class_name

        self.event_pub.publish_event(event_type="ProcessLifecycleEvent",
            origin=process_id, origin_type="DispatchedProcess",
            state=ProcessStateEnum.PENDING)

        # Spawn the process
        pid = self.container.spawn_process(name=name, module=module, cls=cls,
            config=configuration, process_id=process_id)
        log.debug('PD: Spawned Process (%s)', pid)

        # update state on the existing process
        process = self._get_process(process_id)
        process.process_state = ProcessStateEnum.RUNNING

        self.event_pub.publish_event(event_type="ProcessLifecycleEvent",
            origin=process_id, origin_type="DispatchedProcess",
            state=ProcessStateEnum.RUNNING)

        if self.SPAWN_DELAY:
            glet = gevent.getcurrent()
            if glet:
                self._spawn_greenlets.discard(glet)

        return pid

    def cancel(self, process_id):
        process = self._get_process(process_id)
        if process:
            try:
                self.container.proc_manager.terminate_process(process_id)
                log.debug('PD: Terminated Process (%s)', process_id)
            except BadRequest, e:
                log.warn("PD: Failed to terminate process %s in container. already dead?: %s",
                    process_id, str(e))
            process.process_state = ProcessStateEnum.TERMINATED

            try:
                self.event_pub.publish_event(event_type="ProcessLifecycleEvent",
                    origin=process_id, origin_type="DispatchedProcess",
                    state=ProcessStateEnum.TERMINATED)
            except BadRequest, e:
                log.warn(e)
Пример #35
0
class ProcManager(object):
    def __init__(self, container):
        self.container = container

        # Define the callables that can be added to Container public API, and add
        self.container_api = [self.spawn_process, self.terminate_process]
        for call in self.container_api:
            setattr(self.container, call.__name__, call)

        self.proc_id_pool = IDPool()

        # Registry of running processes
        self.procs = {}
        self.procs_by_name = {}   # BAD: This is not correct if procs have the same name

        # mapping of greenlets we spawn to process_instances for error handling
        self._spawned_proc_to_process = {}

        # Effective execution engine config (after merging in child process overrides)
        self.ee_cfg = self._get_execution_engine_config()

        # Process dispatcher (if configured/enabled and not a child container process)
        self.pd_cfg = CFG.get_safe("service.process_dispatcher") or {}
        self.pd_enabled = self.pd_cfg.get("enabled", False) is True and not self.ee_cfg["container"]["is_child"]
        self.pd_core = None

        self.event_pub = EventPublisher()
        self.publish_events = CFG.get_safe("container.process.publish_events") is True

        # Passive manager for simple threads/greenlets, to keep them registered (these are not OS threads)
        # Note that each ION process has its own thread manager, so this is for container level threads
        self.thread_manager = ThreadManager(heartbeat_secs=None, failure_notify_callback=None)

        # Active supervisor for ION processes
        self.proc_sup = IonProcessThreadManager(heartbeat_secs=CFG.get_safe("container.timeout.heartbeat"),
                                                failure_notify_callback=self._spawned_proc_failed)

        # list of callbacks for process state changes
        self._proc_state_change_callbacks = []

    def start(self):
        log.debug("ProcManager starting ...")

        if self.pd_enabled:
            self._start_process_dispatcher()

        self.proc_sup.start()

        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
            # Register container as resource object
            cc_obj = self._get_capability_container_object()
            self.cc_id, _ = self.container.resource_registry.create(cc_obj)

            # Create an association to an Org object if not the rot ION org and only if found
            if CFG.get_safe("container.org_name") != CFG.get_safe("system.root_org"):
                org, _ = self.container.resource_registry.find_resources(
                        restype=RT.Org, name=CFG.get_safe("container.org_name"), id_only=True)
                if org:
                    self.container.resource_registry.create_association(org[0], PRED.hasResource, self.cc_id)  # TODO - replace with proper association

        log.debug("ProcManager started, OK.")

    def stop(self):
        log.debug("ProcManager stopping ...")

        # Call quit on procs to give them ability to clean up in reverse order
        procs_list = sorted(self.procs.values(), key=lambda proc: proc._proc_start_time, reverse=True)
        for proc in procs_list:
            try:
                self.terminate_process(proc.id)
            except Exception as ex:
                log.warn("Failed to terminate process (%s): %s", proc.id, ex)

        # TODO: Have a choice of shutdown behaviors for waiting on children, timeouts, etc
        self.proc_sup.shutdown(CFG.get_safe("container.timeout.shutdown"))

        if self.procs:
            log.warn("ProcManager procs not empty: %s", self.procs)
        if self.procs_by_name:
            log.warn("ProcManager procs_by_name not empty: %s", self.procs_by_name)

        # Remove Resource registration
        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
            try:
                self.container.resource_registry.delete(self.cc_id, del_associations=True)
            except NotFound:
                # already gone, this is ok
                pass

        if self.pd_enabled:
            self._stop_process_dispatcher()

        log.debug("ProcManager stopped, OK.")

    def _get_execution_engine_config(self):
        ee_base_cfg = CFG.get_safe("container.execution_engine") or {}
        if ee_base_cfg.get("type", None) != "scioncc":
            raise ContainerConfigError("Execution engine config invalid: %s", ee_base_cfg)

        ee_cfg = deepcopy(ee_base_cfg)

        # If we are a child process, merge in child config override
        proc_name = multiprocessing.current_process().name
        ee_cfg["container"] = dict(child_proc_name=proc_name, is_child=False)
        child_cfgs = ee_base_cfg.get("child_configs", None) or {}
        if proc_name.startswith("Container-child-"):
            ee_cfg["container"]["is_child"] = True
            if proc_name in child_cfgs:
                log.info("Applying execution engine config override for child: %s", proc_name)
                dict_merge(ee_cfg, child_cfgs[proc_name], inplace=True)
            else:
                for cfg_name, ch_cfg in child_cfgs.iteritems():
                    pattern = ch_cfg.get("name_pattern", None)
                    if pattern and re.match(pattern, proc_name):
                        log.info("Applying execution engine config override %s for child: %s", cfg_name, proc_name)
                        dict_merge(ee_cfg, ch_cfg, inplace=True)
                        break

        ee_cfg.pop("child_configs", None)
        return ee_cfg

    def _get_capability_container_object(self):
        container_info = dict(proc_name=multiprocessing.current_process().name,
                              process_id=os.getpid(),
                              parent_process_id=os.getppid(),
                              hostname=socket.gethostname(),
                              host=socket.gethostbyname(socket.gethostname()),
                              platform=sys.platform,
                              argv=sys.argv,
                              python_version=sys.version,
                              cwd=os.getcwd(),
                              start_time=self.container.start_time,
                              )
        # Other possibilities: username, os package versions, IP address
        host_info = {k: v for (k, v) in zip(("os_sysname", "os_nodename", "os_release", "os_version", "os_machine"), os.uname())}
        container_info.update(host_info)
        container_info["env"] = {k: str(v) for (k,v) in os.environ.iteritems()}
        container_info["python_path"] = sys.path
        cc_obj = CapabilityContainer(name=self.container.id, version=self.container.version,
                                     cc_agent=self.container.name,
                                     container_info=container_info,
                                     execution_engine_config=self.ee_cfg)
        return cc_obj

    # -----------------------------------------------------------------

    def spawn_process(self, name=None, module=None, cls=None, config=None, process_id=None):
        """
        Spawn a process within the container. Processes can be of different type.
        """
        if process_id and not is_valid_identifier(process_id, ws_sub='_'):
            raise BadRequest("Given process_id %s is not a valid identifier" % process_id)

        # PROCESS ID. Generate a new process id if not provided
        # TODO: Ensure it is system-wide unique
        process_id = process_id or "%s.%s" % (self.container.id, self.proc_id_pool.get_id())
        log.debug("ProcManager.spawn_process(name=%s, module.cls=%s.%s, config=%s) as pid=%s", name, module, cls, config, process_id)

        # CONFIG
        process_cfg = self._create_process_config(config)

        try:
            service_cls = named_any("%s.%s" % (module, cls))
        except AttributeError as ae:
            # Try to nail down the error
            import importlib
            importlib.import_module(module)
            raise

        # PROCESS TYPE. Determines basic process context (messaging, service interface)
        process_type = get_safe(process_cfg, "process.type") or getattr(service_cls, "process_type", PROCTYPE_SERVICE)

        process_start_mode = get_safe(config, "process.start_mode")

        process_instance = None

        # alert we have a spawning process, but we don't have the instance yet, so give the class instead (more accurate than name)
        # Note: this uses a str as first argument instead of a process instance
        self._call_proc_state_changed("%s.%s" % (module, cls), ProcessStateEnum.PENDING)

        try:
            # Additional attributes to set with the process instance
            proc_attr = {"_proc_type": process_type,
                         "_proc_spawn_cfg": config
                         }

            # SPAWN.  Determined by type
            if process_type == PROCTYPE_SERVICE:
                process_instance = self._spawn_service_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_STREAMPROC:
                process_instance = self._spawn_stream_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_AGENT:
                process_instance = self._spawn_agent_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_STANDALONE:
                process_instance = self._spawn_standalone_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_IMMEDIATE:
                process_instance = self._spawn_immediate_process(process_id, name, module, cls, process_cfg, proc_attr)

            elif process_type == PROCTYPE_SIMPLE:
                process_instance = self._spawn_simple_process(process_id, name, module, cls, process_cfg, proc_attr)

            else:
                raise BadRequest("Unknown process type: %s" % process_type)

            # REGISTER.
            self._register_process(process_instance, name)

            process_instance.errcause = "OK"
            log.info("ProcManager.spawn_process: %s.%s -> pid=%s OK", module, cls, process_id)

            if process_type == PROCTYPE_IMMEDIATE:
                log.debug('Terminating immediate process: %s', process_instance.id)
                self.terminate_process(process_instance.id)

                # Terminate process also triggers TERMINATING/TERMINATED
                self._call_proc_state_changed(process_instance, ProcessStateEnum.EXITED)

            else:
                # Update local policies for the new process
                if self.container.has_capability(self.container.CCAP.GOVERNANCE_CONTROLLER):
                    self.container.governance_controller.update_process_policies(
                                process_instance, safe_mode=True, force_update=False)

            return process_instance.id

        except IonProcessError:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)
            return None

        except Exception:
            errcause = process_instance.errcause if process_instance else "instantiating process"
            log.exception("Error spawning %s %s process (process_id: %s): %s", name, process_type, process_id, errcause)

            # trigger failed notification - catches problems in init/start
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)

            raise

    def _create_process_config(self, config):
        """ Prepare the config for the new process. Clone system config and apply process overrides.
        Support including config by reference of a resource attribute or object from object store.
        """
        process_cfg = deepcopy(CFG)
        if config:
            # Use provided config. Must be dict or DotDict
            if not isinstance(config, DotDict):
                config = DotDict(config)
            if config.get_safe("process.config_ref"):
                # Use a reference
                config_ref = config.get_safe("process.config_ref")
                log.info("Enhancing new process spawn config from ref=%s" % config_ref)
                matches = re.match(r'^([A-Za-z]+):([A-Za-z0-9_\.]+)/(.*)$', config_ref)
                if matches:
                    ref_type, ref_id, ref_ext = matches.groups()
                    if ref_type == "resources":
                        if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                            try:
                                obj = self.container.resource_registry.read(ref_id)
                                if obj and hasattr(obj, ref_ext):
                                    ref_config = getattr(obj, ref_ext)
                                    if isinstance(ref_config, dict):
                                        dict_merge(process_cfg, ref_config, inplace=True)
                                    else:
                                        raise BadRequest("config_ref %s exists but not dict" % config_ref)
                                else:
                                    raise BadRequest("config_ref %s - attribute not found" % config_ref)
                            except NotFound as nf:
                                log.warn("config_ref %s - object not found" % config_ref)
                                raise
                        else:
                            log.error("Container missing RESOURCE_REGISTRY capability to resolve process config ref %s" % config_ref)
                    elif ref_type == "objects":
                        if self.container.has_capability(self.container.CCAP.OBJECT_STORE):
                            try:
                                obj = self.container.object_store.read_doc(ref_id)
                                ref_config = obj
                                if ref_ext:
                                    ref_config = get_safe(obj, ref_ext, None)
                                    if ref_config is None:
                                        raise BadRequest("config_ref %s - attribute not found" % config_ref)

                                if isinstance(ref_config, dict):
                                    dict_merge(process_cfg, ref_config, inplace=True)
                                else:
                                    raise BadRequest("config_ref %s exists but not dict" % config_ref)
                            except NotFound as nf:
                                log.warn("config_ref %s - object not found" % config_ref)
                                raise
                        else:
                            log.error("Container missing OBJECT_STORE capability to resolve process config ref %s" % config_ref)
                    else:
                        raise BadRequest("Unknown reference type in: %s" % config_ref)

            dict_merge(process_cfg, config, inplace=True)
            if self.container.spawn_args:
                # Override config with spawn args
                dict_merge(process_cfg, self.container.spawn_args, inplace=True)

        #log.debug("spawn_process() pid=%s process_cfg=%s", process_id, process_cfg)
        return process_cfg

    def list_local_processes(self, process_type=''):
        """ Returns a list of the running ION processes in the container or filtered by the process_type
        """
        if not process_type:
            return self.procs.values()

        return [p for p in self.procs.itervalues() if p.process_type == process_type]

    def get_a_local_process(self, proc_name=''):
        """ Returns a running ION process in the container for the specified process name
        """
        for p in self.procs.itervalues():
            if p.name == proc_name:
                return p

            if p.process_type == PROCTYPE_AGENT and p.resource_type == proc_name:
                return p

        return None

    def get_local_service_processes(self, service_name=''):
        """ Returns a list of running ION processes in the container for the specified service name
        """
        proc_list = [p for p in self.procs.itervalues() if p.process_type == PROCTYPE_SERVICE and p.name == service_name]
        return proc_list

    def is_local_service_process(self, service_name):
        local_services = self.list_local_processes(PROCTYPE_SERVICE)
        for p in local_services:
            if p.name == service_name:
                return True

        return False

    def is_local_agent_process(self, resource_type):
        local_agents = self.list_local_processes(PROCTYPE_AGENT)
        for p in local_agents:
            if p.resource_type == resource_type:
                return True
        return False

    def _spawned_proc_failed(self, gproc):
        log.error("ProcManager._spawned_proc_failed: %s, %s", gproc, gproc.exception)

        prc = self._spawned_proc_to_process.get(gproc, None)

        # stop the rest of the process
        if prc is not None:
            try:
                self.terminate_process(prc.id, False)
            except Exception as e:
                log.warn("Problem while stopping rest of failed process %s: %s", prc, e)
            finally:
                self._call_proc_state_changed(prc, ProcessStateEnum.FAILED)
        else:
            log.warn("No ION process found for failed proc manager child: %s", gproc)

        # Stop the container if this was the last process
        if not self.procs and CFG.get_safe("container.process.exit_once_empty", False):
            self.container.fail_fast("Terminating container after last process (%s) failed: %s" % (gproc, gproc.exception))

    def add_proc_state_changed_callback(self, cb):
        """
        Adds a callback to be called when a process' state changes.

        The callback should take three parameters: The process, the state, and the container.
        """
        self._proc_state_change_callbacks.append(cb)

    def remove_proc_state_changed_callback(self, cb):
        """
        Removes a callback from the process state change callback list.

        If the callback is not registered, this method does nothing.
        """
        if cb in self._proc_state_change_callbacks:
            self._proc_state_change_callbacks.remove(cb)

    def _call_proc_state_changed(self, svc, state):
        """
        Internal method to call all registered process state change callbacks.
        """
        #log.debug("Proc State Changed (%s): %s", ProcessStateEnum._str_map.get(state, state), svc)
        for cb in self._proc_state_change_callbacks:
            cb(svc, state, self.container)

        # Trigger event
        if self.publish_events:
            self._publish_process_event(svc, state)

    def _create_listening_endpoint(self, **kwargs):
        """
        Creates a listening endpoint for spawning processes.

        This method exists to be able to override the type created via configuration.
        In most cases it will create a ProcessRPCServer.
        """
        eptypestr = CFG.get_safe('container.messaging.endpoint.proc_listening_type', None)
        if eptypestr is not None:
            module, cls     = eptypestr.rsplit('.', 1)
            mod             = __import__(module, fromlist=[cls])
            eptype          = getattr(mod, cls)
            ep              = eptype(**kwargs)
        else:
            ep = ProcessRPCServer(**kwargs)
        return ep

    def _add_process_publishers(self, process_instance, config):
        # Add publishers if declared...
        publish_streams = get_safe(config, "process.publish_streams")
        pub_names = self._set_publisher_endpoints(process_instance, publish_streams)
        return pub_names

    # -----------------------------------------------------------------
    # PROCESS TYPE: service
    # - has service listen binding/queue and RPC interface
    def _spawn_service_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as a service worker.
        Attach to service queue with service definition, attach to service pid
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)

        listen_name = get_safe(config, "process.listen_name") or process_instance.name
        listen_name_xo = self.container.create_service_xn(listen_name)

        log.debug("Service Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        # Service RPC endpoint
        rsvc1 = self._create_listening_endpoint(node=self.container.node,
                                                from_name=listen_name_xo,
                                                process=process_instance)

        # Start an ION process with the right kind of endpoint factory
        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc1],
                                   proc_name=process_instance._proc_name)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_service_process for %s" % ",".join((str(listen_name), process_instance.id)))

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: stream process
    # - has stream listen binding/queue
    # - has publishers if declared
    def _spawn_stream_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as a data stream process.
        Attach to subscription queue with process function.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)
        listeners = []

        # Stream listener
        listen_name = get_safe(config, "process.listen_name") or name
        log.debug("Stream Process (%s) listen_name: %s", name, listen_name)
        process_instance._proc_listen_name = listen_name

        process_instance.stream_subscriber = StreamSubscriber(process=process_instance, exchange_name=listen_name,
                                                              callback=process_instance.call_process)
        listeners.append(process_instance.stream_subscriber)

        pub_names = self._add_process_publishers(process_instance, config)

        # Private PID listener
        # pid_listener_xo = self.container.create_process_xn(process_instance.id)
        # rsvc = self._create_listening_endpoint(node=self.container.node,
        #                                        from_name=pid_listener_xo,
        #                                        process=process_instance)
        # listeners.append(rsvc)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=listeners,
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_stream_process for %s" % process_instance._proc_name)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: agent
    # - has resource ID (or if non-existent PID) listen binding/queue
    # - has RPC interface
    def _spawn_agent_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as agent process.
        Attach to service pid.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)
        if not isinstance(process_instance, ResourceAgent):
            raise ContainerConfigError("Agent process must extend ResourceAgent")
        listeners = []

        # Set the resource ID if we get it through the config
        resource_id = get_safe(process_instance.CFG, "agent.resource_id")
        if resource_id:
            process_instance.resource_id = resource_id

            # Resource ID listener
            resource_id_xo = self.container.create_process_xn(resource_id)

            alistener = self._create_listening_endpoint(node=self.container.node,
                                                        from_name=resource_id_xo,
                                                        process=process_instance)

            listeners.append(alistener)

        else:
            # Private PID listener
            pid_listener_xo = self.container.create_process_xn(process_instance.id)
            rsvc = self._create_listening_endpoint(node=self.container.node,
                                                   from_name=pid_listener_xo,
                                                   process=process_instance)

            listeners.append(rsvc)

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=listeners,
                                   proc_name=process_instance._proc_name)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_agent_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        # Now call the on_init of the agent.
        self._process_init(process_instance)

        if not process_instance.resource_id:
            log.warn("New agent pid=%s has no resource_id set" % process_id)

        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        if not process_instance.resource_id:
            log.warn("Agent process id=%s does not define resource_id!!" % process_instance.id)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: standalone
    # - has PID binding/queue with RPC interface
    # - has publishers if declared
    def _spawn_standalone_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as standalone process.
        Attach to service pid.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)

        # Private PID listener
        pid_listener_xo = self.container.create_process_xn(process_instance.id, auto_delete=True)
        rsvc = self._create_listening_endpoint(node=self.container.node,
                                               from_name=pid_listener_xo,
                                               process=process_instance)

        pub_names = self._add_process_publishers(process_instance, config)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[rsvc],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_standalone_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        try:
            proc.start_listeners()
        except IonProcessError:
            self._process_quit(process_instance)
            self._call_proc_state_changed(process_instance, ProcessStateEnum.FAILED)
            raise

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: simple
    # - has publishers if declared
    def _spawn_simple_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as simple process.
        No attachments.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)

        pub_names = self._add_process_publishers(process_instance, config)

        # cleanup method to delete process queue (@TODO: leaks a bit here - should use XOs)
        def cleanup(*args):
            for name in pub_names:
                p = getattr(process_instance, name)
                p.close()

        proc = self.proc_sup.spawn(name=process_instance.id,
                                   service=process_instance,
                                   listeners=[],
                                   proc_name=process_instance._proc_name,
                                   cleanup_method=cleanup)
        proc.proc._glname = "ION Proc %s" % process_instance._proc_name
        self.proc_sup.ensure_ready(proc, "_spawn_simple_process for %s" % process_instance.id)

        # map gproc to process_instance
        self._spawned_proc_to_process[proc.proc] = process_instance

        # set service's reference to process
        process_instance._process = proc

        self._process_init(process_instance)
        self._process_start(process_instance)

        return process_instance

    # -----------------------------------------------------------------
    # PROCESS TYPE: immediate
    # - will not be registered
    # - will be terminated right after start
    def _spawn_immediate_process(self, process_id, name, module, cls, config, proc_attr):
        """
        Spawn a process acting as immediate one off process.
        No messaging attachments.
        """
        process_instance = self._create_app_instance(process_id, name, module, cls, config, proc_attr)
        self._process_init(process_instance)
        self._process_start(process_instance)
        return process_instance

    # -----------------------------------------------------------------

    def _create_app_instance(self, process_id, name, module, cls, config, proc_attr):
        """
        Creates an instance of a BaseService, representing the app logic of a ION process.
        This is independent of the process type service, agent, standalone, etc.
        """
        # APP INSTANCE.
        app_instance = for_name(module, cls)
        if not isinstance(app_instance, BaseService):
            raise ContainerConfigError("Instantiated service not a BaseService %r" % app_instance)

        # Set BaseService instance common attributes
        app_instance.errcause = ""
        app_instance.id = process_id
        app_instance.container = self.container
        app_instance.CFG = config
        app_instance._proc_name = name
        app_instance._proc_start_time = time.time()
        for att, att_val in proc_attr.iteritems():
            setattr(app_instance, att, att_val)

        # Unless the process has been started as part of another Org, default to the container Org or the ION Org
        if 'org_governance_name' in config:
            app_instance.org_governance_name = config['org_governance_name']
        else:
            app_instance.org_governance_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))

        # Add process state management, if applicable
        self._add_process_state(app_instance)

        # Check dependencies (RPC clients)
        self._check_process_dependencies(app_instance)

        return app_instance

    def _add_process_state(self, process_instance):
        """ Add stateful process operations, if applicable
        """
        # Only applies if the process implements stateful interface
        if hasattr(process_instance, "_flush_state"):
            def _flush_state():
                with process_instance._state_lock:
                    state_obj = process_instance.container.state_repository.put_state(process_instance.id, process_instance._proc_state,
                                                                                      state_obj=process_instance._proc_state_obj)
                    state_obj.state = None   # Make sure memory footprint is low for larger states
                    process_instance._proc_state_obj = state_obj
                    process_instance._proc_state_changed = False

            def _load_state():
                if not hasattr(process_instance, "_proc_state"):
                    process_instance._proc_state = {}
                try:
                    with process_instance._state_lock:
                        new_state, state_obj = process_instance.container.state_repository.get_state(process_instance.id)
                        process_instance._proc_state.clear()
                        process_instance._proc_state.update(new_state)
                        process_instance._proc_state_obj = state_obj
                        process_instance._proc_state_changed = False
                except NotFound as nf:
                    log.debug("No persisted state available for process %s", process_instance.id)
                except Exception as ex:
                    log.warn("Process %s load state failed: %s", process_instance.id, str(ex))
            process_instance._flush_state = _flush_state
            process_instance._load_state = _load_state
            process_instance._state_lock = RLock()
            process_instance._proc_state = {}
            process_instance._proc_state_obj = None
            process_instance._proc_state_changed = False

            # PROCESS RESTART: Need to check whether this process had persisted state.
            # Note: This could happen anytime during a system run, not just on RESTART boot
            log.debug("Loading persisted state for process %s", process_instance.id)
            process_instance._load_state()

    def _check_process_dependencies(self, app_instance):
        app_instance.errcause = "setting service dependencies"
        log.debug("spawn_process dependencies: %s", app_instance.dependencies)
        # TODO: Service dependency != process dependency
        for dependency in app_instance.dependencies:
            client = getattr(app_instance.clients, dependency)
            assert client, "Client for dependency not found: %s" % dependency

            # @TODO: should be in a start_client in RPCClient chain
            client.process = app_instance
            client.node = self.container.node

            # Ensure that dep actually exists and is running?

    def _process_init(self, process_instance):
        """ Initialize the process, primarily by calling on_init()
        """
        process_instance.errcause = "initializing service"
        process_instance.init()

    def _process_start(self, process_instance):
        """ Start the process, primarily by calling on_start()
        """
        # Should this be after spawn_process?
        # Should we check for timeout?
        process_instance.errcause = "starting service"
        process_instance.start()

    def _process_quit(self, process_instance):
        """ Common method to handle process stopping.
        """
        process_instance.errcause = "quitting process"

        # Give the process notice to quit doing stuff.
        process_instance.quit()

        # Terminate IonProcessThread (may not have one, i.e. simple process)
        # @TODO: move this into process' on_quit()
        if getattr(process_instance, '_process', None) is not None and process_instance._process:
            process_instance._process.notify_stop()
            process_instance._process.stop()

    def _set_publisher_endpoints(self, process_instance, publisher_streams=None):
        """ Creates and attaches named stream publishers
        """
        publisher_streams = publisher_streams or {}
        names = []

        for name, stream_id in publisher_streams.iteritems():
            # problem is here
            pub = StreamPublisher(process=process_instance, stream_id=stream_id)

            setattr(process_instance, name, pub)
            names.append(name)

        return names

    def _register_process(self, process_instance, name):
        """
        Performs all actions related to registering the new process in the system.
        Also performs process type specific registration, such as for services and agents
        """
        # Add process instance to container's process dict
        if name in self.procs_by_name:
            log.warn("Process name already registered in container: %s" % name)
        self.procs_by_name[name] = process_instance
        self.procs[process_instance.id] = process_instance

        # Add Process to resource registry
        process_instance.errcause = "registering"

        if process_instance._proc_type != PROCTYPE_IMMEDIATE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                proc_obj = Process(name=process_instance.id, label=name,
                                   process_type=process_instance._proc_type,
                                   service_name=getattr(process_instance, "name", None) or "",
                                   process_state=ProcessStateEnum.RUNNING)
                proc_id, _ = self.container.resource_registry.create(proc_obj)
                process_instance._proc_res_id = proc_id

                # Associate process with container resource
                self.container.resource_registry.create_association(self.cc_id, PRED.hasProcess, proc_id)
        else:
            process_instance._proc_res_id = None

        # Process type specific registration
        if process_instance._proc_type == PROCTYPE_SERVICE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                # Registration of SERVICE process: in resource registry
                service_list, _ = self.container.resource_registry.find_resources(
                        restype=RT.Service, name=process_instance.name, id_only=True)
                if service_list:
                    process_instance._proc_svc_id = service_list[0]
                    if len(service_list) > 1:
                        log.warn("More than 1 Service resource found with name %s: %s", process_instance.name, service_list)
                else:
                    # We are starting the first process of a service instance
                    # TODO: This should be created by the HA Service agent in the future
                    svc_obj = Service(name=process_instance.name, exchange_name=process_instance._proc_listen_name,
                                      state=ServiceStateEnum.READY)
                    process_instance._proc_svc_id, _ = self.container.resource_registry.create(svc_obj)

                    # Create association to service definition resource
                    svcdef_list, _ = self.container.resource_registry.find_resources(
                            restype=RT.ServiceDefinition, name=process_instance.name, id_only=True)
                    if svcdef_list:
                        if len(svcdef_list) > 1:
                            log.warn("More than 1 ServiceDefinition resource found with name %s: %s", process_instance.name, svcdef_list)
                        self.container.resource_registry.create_association(
                                process_instance._proc_svc_id, PRED.hasServiceDefinition, svcdef_list[0])
                    else:
                        log.error("Cannot find ServiceDefinition resource for %s", process_instance.name)

                self.container.resource_registry.create_association(
                        process_instance._proc_svc_id, PRED.hasProcess, proc_id)

        elif process_instance._proc_type == PROCTYPE_AGENT:
            if self.container.has_capability(self.container.CCAP.DIRECTORY):
                # Registration of AGENT process: in Directory
                caps = process_instance.get_capabilities()
                self.container.directory.register("/Agents", process_instance.id,
                        **dict(name=process_instance._proc_name,
                               container=process_instance.container.id,
                               resource_id=process_instance.resource_id,
                               agent_id=process_instance.agent_id,
                               def_id=process_instance.agent_def_id,
                               capabilities=caps))

        self._call_proc_state_changed(process_instance, ProcessStateEnum.RUNNING)

    def terminate_process(self, process_id, do_notifications=True):
        """
        Terminates a process and all its resources. Termination is graceful with timeout.

        @param  process_id          The id of the process to terminate. Should exist in the container's
                                    list of processes or this will raise.
        @param  do_notifications    If True, emits process state changes for TERMINATING and TERMINATED.
                                    If False, supresses any state changes. Used near EXITED and FAILED.
        """
        process_instance = self.procs.get(process_id, None)
        if not process_instance:
            raise BadRequest("Cannot terminate. Process id='%s' unknown on container id='%s'" % (
                                        process_id, self.container.id))

        log.info("ProcManager.terminate_process: %s -> pid=%s", process_instance._proc_name, process_id)

        if do_notifications:
            self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATING)

        self._process_quit(process_instance)

        self._unregister_process(process_id, process_instance)

        if do_notifications:
            self._call_proc_state_changed(process_instance, ProcessStateEnum.TERMINATED)

    def _unregister_process(self, process_id, process_instance):
        # Remove process registration in resource registry
        if process_instance._proc_res_id:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                try:
                    self.container.resource_registry.delete(process_instance._proc_res_id, del_associations=True)
                except NotFound:
                    # OK if already gone
                    pass
                except Exception as ex:
                    log.exception(ex)
                    pass

        # Cleanup for specific process types
        if process_instance._proc_type == PROCTYPE_SERVICE:
            if self.container.has_capability(self.container.CCAP.RESOURCE_REGISTRY):
                # Check if this is the last process for this service and do auto delete service resources here
                svcproc_list, _ = self.container.resource_registry.find_objects(
                        process_instance._proc_svc_id, PRED.hasProcess, RT.Process, id_only=True)
                if not svcproc_list:
                    try:
                        self.container.resource_registry.delete(process_instance._proc_svc_id, del_associations=True)
                    except NotFound:
                        # OK if already gone
                        pass
                    except Exception as ex:
                        log.exception(ex)
                        pass

        elif process_instance._proc_type == PROCTYPE_AGENT:
            if self.container.has_capability(self.container.CCAP.DIRECTORY):
                self.container.directory.unregister_safe("/Agents", process_instance.id)

        # Remove internal registration in container
        del self.procs[process_id]
        if process_instance._proc_name in self.procs_by_name:
            del self.procs_by_name[process_instance._proc_name]
        else:
            log.warn("Process name %s not in local registry", process_instance.name)


    def _publish_process_event(self, proc_inst, state):
        sub_type = ProcessStateEnum._str_map.get(state, state)
        if isinstance(proc_inst, basestring):
            # self.event_pub.publish_event(event_type=OT.ProcessLifecycleEvent,
            #         origin=proc_inst, origin_type=RT.Process, sub_type=sub_type,
            #         state=state,
            #         container_id=self.container.id,
            #         process_type="", process_name=proc_inst,
            #         process_resource_id="", service_name="")
            # This is a PENDING process without process_id
            pass
        else:
            try:
                self.event_pub.publish_event(event_type=OT.ProcessLifecycleEvent,
                        origin=getattr(proc_inst, "id", "PD"), origin_type=RT.Process, sub_type=sub_type,
                        state=state,
                        container_id=self.container.id,
                        process_type=getattr(proc_inst, "_proc_type", ""),
                        process_name=getattr(proc_inst, "_proc_name", ""),
                        process_resource_id=getattr(proc_inst, "_proc_res_id", ""),
                        service_name=getattr(proc_inst, "name", ""))
            except Exception:
                log.exception("Could not publish process event")
    # -----------------------------------------------------------------

    def _start_process_dispatcher(self):
        from ion.core.process.pd_core import ProcessDispatcher
        self.pd_core = ProcessDispatcher(container=self.container, config=self.pd_cfg)
        self.pd_core.start()

    def _stop_process_dispatcher(self):
        if self.pd_core:
            self.pd_core.stop()
Пример #36
0
    def Xtest_pub_with_event_repo(self):
        pub = EventPublisher(event_type="ResourceEvent", node=self.container.node)
        pub.publish_event(origin="specifics", description="hallo")

        evs = self.container.event_repository.find_events(origin='specifics')
        self.assertEquals(len(evs), 1)