def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params:
                # I don't care about
                return
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=data_product_id,
                             callback=cb,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            async_queue.get(timeout=120)
        except Empty:
            raise AssertionError('QC was not flagged in time')
Пример #2
0
    def on_start(self):
        self.data_source_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.DataSource,
            callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.ExternalDataProvider,
            callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services',
                                               False)
        self.server = CFG.get_safe(
            'eoi.importer_service.server', "localhost") + ":" + str(
                CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices=" + str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...")

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")
Пример #3
0
    def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params:
                # I don't care about
                return
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)


        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=data_product_id, callback=cb, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            async_queue.get(timeout=120)
        except Empty:
            raise AssertionError('QC was not flagged in time')
Пример #4
0
    def check_localrange(self):
        log.info('check_localrange')
        TestQCFunctions.check_localrange(self)
        self.init_check()

        flagged = Event()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            if not event.qc_parameter == 'tempwat_loclrng_qc':
                return
            np.testing.assert_array_equal(
                times,
                np.array([
                    3580144708.7555027, 3580144709.7555027, 3580144710.7555027,
                    3580144711.7555027, 3580144712.7555027
                ]))
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,
                                           origin=self.dp_id,
                                           callback=cb,
                                           auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)
        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_loclrng_qc'],
                                             [1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
        self.assertTrue(flagged.wait(10))
Пример #5
0
    def check_global_range(self):
        TestQCFunctions.check_global_range(self)
        self.init_check()

        flagged = Event()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(times, [0.0, 7.0])
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,
                                           origin=self.dp_id,
                                           callback=cb,
                                           auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(
            self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_glblrng_qc'],
                                             [0, 1, 1, 1, 1, 1, 1, 0])
        self.assertTrue(flagged.wait(10))
Пример #6
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
    def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=data_product_id, callback=cb, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.process_dispatcher.schedule_process(self.process_definition_id, process_id=self.process_id, configuration=config) # The process is now up and running maybe?

        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            for i in xrange(24):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC was not flagged in time: %d', i)
Пример #8
0
    def test_qc_interval_integration(self):

        # 1 need to make a dataset that only has one discrete qc violation
        # 2 Launch the process
        # 3 Setup the scheduler to run it say three times
        # 4 Get the Events and verify the data
    
        #-------------------------------------------------------------------------------- 
        # Make a dataset that has only one discrete qc violation
        #-------------------------------------------------------------------------------- 

        dp_id, dataset_id, stream_def_id = self.make_data_product()
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        for rdt in self.populate_vectors(stream_def_id, 1, lambda x : [41] + [39] * (x-1)):
            ph.publish_rdt_to_data_product(dp_id, rdt)
        self.assertTrue(monitor.event.wait(10)) # Give it 10 seconds to populate


        #--------------------------------------------------------------------------------
        # Launch the process
        #--------------------------------------------------------------------------------

        interval_key = uuid4().hex
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = ['glblrng_qc'] # The others are tested in other tests for completeness
        self.sync_launch(config)

        async_queue = Queue()
        def callback(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), 1)
            async_queue.put(1)
        es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dp_id, callback=callback, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #--------------------------------------------------------------------------------
        # Setup the scheduler
        #--------------------------------------------------------------------------------


        timer_id = self.scheduler_service.create_interval_timer(start_time=time.time(),
                end_time=time.time()+13,
                interval=5,
                event_origin=interval_key)


        #--------------------------------------------------------------------------------
        # Get the events and verify them
        #--------------------------------------------------------------------------------

        try:
            for i in xrange(2):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC Events not raised')
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id)

        tdom, sdom = time_series_domain()

        dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump())
        dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)


        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]
        
        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp'])
        tempwat_dp = DataProduct(name='TEMPWAT')
        tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time','temp']))
Пример #10
0
    def test_event_subscriber_auto_delete(self):
        mocknode = Mock()
        ev = EventSubscriber(event_type="ProcessLifecycleEvent", callback=lambda *a,**kw: None, auto_delete=sentinel.auto_delete, node=mocknode)
        self.assertEquals(ev._auto_delete, sentinel.auto_delete)

        # we don't want to have to patch out everything here, so call initialize directly, which calls create_channel for us
        ev._setup_listener = Mock()
        ev.initialize(sentinel.binding)

        self.assertEquals(ev._chan.queue_auto_delete, sentinel.auto_delete)
Пример #11
0
    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')
Пример #12
0
    def test_event_subscriber_auto_delete(self):
        mocknode = Mock()
        ev = EventSubscriber(event_type="ProcessLifecycleEvent",
                             callback=lambda *a, **kw: None,
                             auto_delete=sentinel.auto_delete,
                             node=mocknode)
        self.assertEquals(ev._auto_delete, sentinel.auto_delete)

        # we don't want to have to patch out everything here, so call initialize directly, which calls create_channel for us
        ev._setup_listener = Mock()
        ev.initialize(sentinel.binding)

        self.assertEquals(ev._chan.queue_auto_delete, sentinel.auto_delete)
Пример #13
0
 def on_start(self):
     '''
     Process initialization
     '''
     self._thread = self._process.thread_manager.spawn(self.thread_loop)
     self._event_subscriber = EventSubscriber(
         event_type=OT.ResetQCEvent,
         callback=self.receive_event,
         auto_delete=True)  # TODO Correct event types
     self._event_subscriber.start()
     self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
     self.resource_registry = self.container.resource_registry
     self.event_queue = Queue()
Пример #14
0
    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop,
                                       self.persist_interval)
        log.debug(
            'EventPersister persist greenlet started in "%s" (interval %s)',
            self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister",
                                         auto_delete=False)

        self.event_sub.start()
Пример #15
0
 def on_start(self):
     TransformDataProcess.on_start(self)
     self.pubsub_management = PubsubManagementServiceProcessClient(
         process=self)
     self.stored_values = StoredValueManager(self.container)
     self.input_data_product_ids = self.CFG.get_safe(
         'process.input_products', [])
     self.output_data_product_ids = self.CFG.get_safe(
         'process.output_products', [])
     self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
     self.new_lookups = Queue()
     self.lookup_monitor = EventSubscriber(
         event_type=OT.ExternalReferencesUpdatedEvent,
         callback=self._add_lookups,
         auto_delete=True)
     self.lookup_monitor.start()
Пример #16
0
    def test_base_subscriber_as_catchall(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(callback=cb)
        pub1 = EventPublisher(event_type="ResourceEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        self._listen(sub)

        pub1.publish_event(origin="some", description="1")
        pub2.publish_event(origin="other", description="2")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
Пример #17
0
    def test_pub_sub_exception_event_origin(self):
        #test origin
        ar = event.AsyncResult()

        self.count = 0

        def cb(*args, **kwargs):
            self.count = self.count + 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="specific")
        self._listen(sub)

        @handle_stream_exception("specific")
        def _test_origin():
            raise CorruptionError()

        _test_origin()

        exception_event = ar.get(timeout=5)

        self.assertEquals(self.count, 1)
        self.assertEquals(exception_event.exception_type,
                          "<class 'pyon.core.exception.CorruptionError'>")
        self.assertEquals(exception_event.origin, "specific")
Пример #18
0
    def on_start(self): #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)
        
        self.queue_name = self.CFG.get_safe('process.queue_name',self.id)
        self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback)
        self.thread_lock = RLock()
        
        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.connection_id = ''
        self.connection_index = None
        
        self.start_listener()
Пример #19
0
    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary', False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl", message="Container start")
Пример #20
0
    def start(self):

        log.debug("GovernanceController starting ...")

        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)

        log.info("GovernanceInterceptor enabled: %s" % str(self.enabled))

        self.policy_event_subscriber = None

        #containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary',False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        if self.enabled:

            config = CFG.get_safe('interceptor.interceptors.governance.config')

            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self.rr_client = ResourceRegistryServiceProcessClient(node=self.container.node, process=self.container)
            self.policy_client = PolicyManagementServiceProcessClient(node=self.container.node, process=self.container)
Пример #21
0
    def test_pub_on_different_origins(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent", callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="one", description="1")
        pub.publish_event(origin="two", description="2")
        pub.publish_event(origin="three", description="3")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 3)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
        self.assertEquals(res[2].description, "3")
Пример #22
0
 def start(self):
     ## create queue listener and publisher
     self.sender = EventPublisher(event_type="ContainerManagementResult")
     self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
     with self.lock:
         self.running = True
         self.receiver.start()
     log.info('ready for container management requests')
Пример #23
0
    def test_global_range_test(self):
        TestQCFunctions.test_global_range_test(self)

        flagged = Event()
        def cb(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(times,[0.0, 7.0])
            flagged.set()

        event_subscriber = EventSubscriber(event_type=OT.ParameterQCEvent,origin=self.dataset_id, callback=cb, auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.dataset_monitor.event.wait(10)

        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_glblrng_qc'], [0, 1, 1, 1, 1, 1, 1, 0])
        self.assertTrue(flagged.wait(10))
Пример #24
0
 def on_start(self):
     '''
     Process initialization
     '''
     self._thread = self._process.thread_manager.spawn(self.thread_loop)
     self._event_subscriber = EventSubscriber(event_type=OT.ResetQCEvent, callback=self.receive_event, auto_delete=True) # TODO Correct event types
     self._event_subscriber.start()
     self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
     self.resource_registry = self.container.resource_registry
     self.event_queue = Queue()
Пример #25
0
 def on_start(self):
     TransformDataProcess.on_start(self)
     self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
     self.stored_values = StoredValueManager(self.container)
     self.input_data_product_ids = self.CFG.get_safe('process.input_products', [])
     self.output_data_product_ids = self.CFG.get_safe('process.output_products', [])
     self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
     self.new_lookups = Queue()
     self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent,callback=self._add_lookups, auto_delete=True)
     self.lookup_monitor.start()
Пример #26
0
    def make_large_dataset(self, temp_vector):

        monitor_queue = Queue()
        # Make 27 hours of data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        data_product_id, dataset_id, stream_def_id = self.make_data_product()
        es = EventSubscriber(event_type=OT.DatasetModified, origin=dataset_id, auto_delete=True, callback = lambda *args, **kwargs : monitor_queue.put(1))
        es.start()
        self.addCleanup(es.stop)
        for rdt in self.populate_vectors(stream_def_id, 3, temp_vector):
            ph.publish_rdt_to_data_product(data_product_id, rdt)

        try:
            for i in xrange(3):
                monitor_queue.get(timeout=10)
        except Empty:
            raise AssertionError('Failed to populate dataset in time')

            
        return data_product_id
Пример #27
0
    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop, self.persist_interval)
        log.debug('EventPersister persist greenlet started in "%s" (interval %s)', self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister")

        self.event_sub.start()
Пример #28
0
    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe(
            'interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe(
            'container.org_boundary', False)
        self._container_org_name = CFG.get_safe(
            'container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (
            self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(
            process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(
            process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(
                event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl",
                                    message="Container start")
Пример #29
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent",
                              sub_type="st1",
                              callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
Пример #30
0
    def __init__(self, read_process_fn=None, process_id='', desired_state=None, *args, **kwargs):

        if not process_id:
            raise BadRequest("ProcessStateGate trying to wait on invalid process (id = '%s')" % process_id)

        EventSubscriber.__init__(self, *args,
                                 callback=self.trigger_cb,
                                 event_type="ProcessLifecycleEvent",
                                 origin=process_id,
                                 origin_type="DispatchedProcess",
                                 **kwargs)

        self.desired_state = desired_state
        self.process_id = process_id
        self.read_process_fn = read_process_fn
        self.last_chance = None
        self.first_chance = None

        _ = ProcessStateEnum._str_map[self.desired_state] # make sure state exists
        log.info("ProcessStateGate is going to wait on process '%s' for state '%s'",
                self.process_id,
                ProcessStateEnum._str_map[self.desired_state])
Пример #31
0
    def on_start(self):
        self.data_source_subscriber = EventSubscriber(event_type=OT.ResourceModifiedEvent,
                                                      origin_type=RT.DataSource,
                                                      callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(event_type=OT.ResourceModifiedEvent,
                                                      origin_type=RT.ExternalDataProvider,
                                                      callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services', False)
        self.server = CFG.get_safe('eoi.importer_service.server', "localhost")+":"+str(CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices="+str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...") 

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")  
    def check_localrange(self):
        log.info('check_localrange')
        TestQCFunctions.check_localrange(self)
        self.init_check()

        flagged = Event()
        def cb(event, *args, **kwargs):
            times = event.temporal_values
            if not event.qc_parameter == 'tempwat_loclrng_qc':
                return
            np.testing.assert_array_equal( times, np.array([ 3580144708.7555027, 3580144709.7555027, 3580144710.7555027, 3580144711.7555027, 3580144712.7555027]))
            flagged.set()

        event_subscriber = EventSubscriber(event_type = OT.ParameterQCEvent, origin=self.dp_id, callback=cb, auto_delete=True)
        event_subscriber.start()
        self.addCleanup(event_subscriber.stop)

        self.ph.publish_rdt_to_data_product(self.dp_id, self.rdt)
        self.assertTrue(self.dataset_monitor.wait())
        rdt = RecordDictionaryTool.load_from_granule(self.data_retriever.retrieve(self.dataset_id))
        np.testing.assert_array_almost_equal(rdt['tempwat_loclrng_qc'], [1 ,1 ,1 ,1 ,1 ,0 ,0 ,0 ,0 ,0])
        self.assertTrue(flagged.wait(10))
Пример #33
0
    def start(self):
        # Install the container tracer (could be its own
        self.container_tracer = ContainerTracer()
        self.container_tracer.start_tracing()
        self.container.tracer = CallTracer
        self.container.tracer.configure(CFG.get_safe("container.tracer", {}))

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.info('ready for container management requests')
    def on_start(self): #pragma no cover
        super(ScienceGranuleIngestionWorker,self).on_start()
        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True)
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.lookup_monitor.start()
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        self.connection_id = ''
        self.connection_index = None
Пример #35
0
    def make_large_dataset(self, temp_vector):

        monitor_queue = Queue()
        # Make 27 hours of data
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        data_product_id, dataset_id, stream_def_id = self.make_data_product()
        es = EventSubscriber(
            event_type=OT.DatasetModified,
            origin=dataset_id,
            auto_delete=True,
            callback=lambda *args, **kwargs: monitor_queue.put(1))
        es.start()
        self.addCleanup(es.stop)
        for rdt in self.populate_vectors(stream_def_id, 3, temp_vector):
            ph.publish_rdt_to_data_product(data_product_id, rdt)

        try:
            for i in xrange(3):
                monitor_queue.get(timeout=10)
        except Empty:
            raise AssertionError('Failed to populate dataset in time')

        return data_product_id
Пример #36
0
    def test_pub_sub_exception_event(self):
        ar = event.AsyncResult()

        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        #test file system error event
        sub = EventSubscriber(event_type="ExceptionEvent",
                              callback=cb,
                              origin="stream_exception")
        self._listen(sub)

        @handle_stream_exception()
        def _raise_filesystem_error():
            raise FilesystemError()

        _raise_filesystem_error()

        @handle_stream_exception()
        def _raise_streaming_error():
            raise StreamingError()

        _raise_streaming_error()

        @handle_stream_exception()
        def _raise_corruption_error():
            raise CorruptionError()

        _raise_corruption_error()

        ar.get(timeout=5)
        res = []
        for i in xrange(self.count):
            exception_event = gq.get(timeout=5)
            res.append(exception_event)

        self.assertEquals(res[0].exception_type,
                          "<class 'pyon.core.exception.FilesystemError'>")
        self.assertEquals(res[1].exception_type,
                          "<class 'pyon.core.exception.StreamingError'>")
        self.assertEquals(res[2].exception_type,
                          "<class 'pyon.core.exception.CorruptionError'>")
        self.assertEquals(res[2].origin, "stream_exception")
Пример #37
0
 def on_start(self):
     SimpleProcess.on_start(self)
     self.data_retriever = DataRetrieverServiceProcessClient(process=self)
     self.interval_key = self.CFG.get_safe('process.interval_key', None)
     self.qc_params = self.CFG.get_safe('process.qc_params', [])
     validate_is_not_none(
         self.interval_key,
         'An interval key is necessary to paunch this process')
     self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                             origin=self.interval_key,
                                             callback=self._event_callback,
                                             auto_delete=True)
     self.add_endpoint(self.event_subscriber)
     self.resource_registry = self.container.resource_registry
     self.run_interval = self.CFG.get_safe(
         'service.qc_processing.run_interval', 24)
Пример #38
0
    def start(self):
        # Create directory root entry (for current org) if not existing
        if CFG.system.auto_bootstrap:
            root_de = self.register("/", "DIR", sys_name=bootstrap.get_sys_name())
            if root_de is None:
                # We created this directory just now
                pass

        if self.events_enabled:
            # init change event publisher
            self.event_pub = EventPublisher()

            # Register to receive directory changes
            self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent",
                                             origin="Directory",
                                             callback=self.receive_directory_change_event)
Пример #39
0
    def test_subscriber_listening_for_specific_origin(self):
        ar = event.AsyncResult()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ResourceEvent",
                              origin="specific",
                              callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="notspecific", description="1")
        pub.publish_event(origin="notspecific", description="2")
        pub.publish_event(origin="specific", description="3")
        pub.publish_event(origin="notspecific", description="4")

        evmsg = ar.get(timeout=5)
        self.assertEquals(self.count, 1)
        self.assertEquals(evmsg.description, "3")
Пример #40
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [
            DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0)
            for i in xrange(4)
        ]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()

            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.*",
                               callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.a",
                               callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="*.a",
                               callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1",
                               callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")
Пример #41
0
class TransformPrime(TransformDataProcess):
    binding = ['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)
        self.stored_values = StoredValueManager(self.container)
        self.input_data_product_ids = self.CFG.get_safe(
            'process.input_products', [])
        self.output_data_product_ids = self.CFG.get_safe(
            'process.output_products', [])
        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.lookup_monitor.start()

    def on_quit(self):
        self.lookup_monitor.stop()
        TransformDataProcess.on_quit(self)

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin in self.input_data_product_ids + self.output_data_product_ids:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    @memoize_lru(100)
    def read_stream_def(self, stream_id):
        return self.pubsub_management.read_stream_definition(
            stream_id=stream_id)

    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for stream_in_id, routes in process_routes.iteritems():
            if stream_id == stream_in_id:
                for stream_out_id, actor in routes.iteritems():
                    if actor is None:
                        rdt_out = self._execute_transform(
                            msg, (stream_in_id, stream_out_id))
                        self.publish(rdt_out.to_granule(), stream_out_id)
                    else:
                        outgoing = self._execute_actor(
                            msg, actor, (stream_in_id, stream_out_id))
                        self.publish(outgoing, stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)

    def _load_actor(self, actor):
        '''
        Returns callable execute method if it exists, otherwise it raises a BadRequest
        '''
        try:
            module = __import__(actor['module'], fromlist=[''])
        except ImportError:
            log.exception('Actor could not be loaded')
            raise
        try:
            cls = getattr(module, actor['class'])
        except AttributeError:
            log.exception('Module %s does not have class %s', repr(module),
                          actor['class'])
            raise
        try:
            execute = getattr(cls, 'execute')
        except AttributeError:
            log.exception('Actor class does not contain execute method')
            raise
        return execute

    def _execute_actor(self, msg, actor, streams):
        stream_in_id, stream_out_id = streams
        stream_def_out = self.read_stream_def(stream_out_id)
        params = self.CFG.get_safe('process.params', {})
        config = self.CFG.get_safe('process')
        #do the stuff with the actor
        params['stream_def'] = stream_def_out._id
        executor = self._load_actor(actor)
        try:
            rdt_out = executor(msg, None, config, params, None)
        except:
            log.exception('Error running actor for %s', self.id)
            raise
        return rdt_out

    def _merge_pdicts(self, pdict1, pdict2):
        incoming_pdict = ParameterDictionary.load(pdict1)
        outgoing_pdict = ParameterDictionary.load(pdict2)

        merged_pdict = ParameterDictionary()
        for k, v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k, v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        return merged_pdict

    def _merge_rdt(self, stream_def_in, stream_def_out):
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary

        merged_pdict = self._merge_pdicts(incoming_pdict_dump,
                                          outgoing_pdict_dump)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        return rdt_temp

    def _get_lookup_value(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs

        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_values.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')

        return None

    def _execute_transform(self, msg, streams):
        stream_in_id, stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)

        rdt_temp = self._merge_rdt(stream_def_in, stream_def_out)

        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(
                    rdt_temp._pdict.get_context(field).param_type,
                    ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass

        rdt_temp.fetch_lookup_values()

        for lookup_field in rdt_temp.lookup_values():
            s = lookup_field
            stored_value = self._get_lookup_value(
                rdt_temp.context(s).lookup_value)
            if stored_value is not None:
                rdt_temp[s] = stored_value

        for field in rdt_temp.fields:
            if isinstance(
                    rdt_temp._pdict.get_context(field).param_type,
                    ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]

        return rdt_out
Пример #42
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container.stats_mgr.stop()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
Пример #43
0
    def test_derived_data_product(self):
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='ctd parsed', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition,
                        ctd_stream_def_id)

        dp = DataProduct(name='Instrument DP')
        dp_id = self.dpsc_cli.create_data_product(
            dp, stream_definition_id=ctd_stream_def_id)
        self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id)

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Make the derived data product
        simple_stream_def_id = self.pubsubcli.create_stream_definition(
            name='TEMPWAT stream def',
            parameter_dictionary_id=pdict_id,
            available_fields=['time', 'temp'])
        tempwat_dp = DataProduct(name='TEMPWAT',
                                 category=DataProductTypeEnum.DERIVED)
        tempwat_dp_id = self.dpsc_cli.create_data_product(
            tempwat_dp,
            stream_definition_id=simple_stream_def_id,
            parent_data_product_id=dp_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id)
        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)
        rdt['pressure'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id,
                                                            PRED.hasDataset,
                                                            id_only=True)
        tempwat_dataset_id = tempwat_dataset_ids[0]
        granule = self.data_retriever.retrieve(
            tempwat_dataset_id, delivery_format=simple_stream_def_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['time'], np.arange(20))
        self.assertEquals(set(rdt.fields), set(['time', 'temp']))
Пример #44
0
    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name(
            'ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(
            name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
                           name='DP1',
                           description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(
            data_product=dp_obj, stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent,
                             callback=info_event_received,
                             origin=dp_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)

        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" %
                           str(dp_id))
        dataset_id = dataset_ids[0]

        # Check that the streams associated with the data product are persisted with
        stream_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasStream,
                                                   RT.Stream, True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id, route)

        dataset_modified = Event()

        def cb(*args, **kwargs):
            dataset_modified.set()

        es = EventSubscriber(event_type=OT.DatasetModified,
                             callback=cb,
                             origin=dataset_id,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug(
            "The data retriever was able to replay the dataset that was attached to the data product "
            "we wanted to be persisted. Therefore the data product was indeed persisted with "
            "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
            "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'"
        )

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name, 'DP1')
        self.assertEquals(data_product_object.description, 'some new dp')

        log.debug(
            "Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
            " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
            "resource registry, name='%s', desc='%s'" %
            (dp_obj.name, dp_obj.description, data_product_object.name,
             data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)

        dataset_modified.clear()

        rdt['time'] = np.arange(20, 40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))

        dataset_ids, _ = self.rrclient.find_objects(dp_id,
                                                    PRED.hasDataset,
                                                    id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)

        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60:
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Пример #45
0
class QCProcessor(SimpleProcess):
    def __init__(self):
        self.event = Event() # Synchronizes the thread
        self.timeout = 10

    def on_start(self):
        '''
        Process initialization
        '''
        self._thread = self._process.thread_manager.spawn(self.thread_loop)
        self._event_subscriber = EventSubscriber(event_type=OT.ResetQCEvent, callback=self.receive_event, auto_delete=True) # TODO Correct event types
        self._event_subscriber.start()
        self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
        self.resource_registry = self.container.resource_registry
        self.event_queue = Queue()

    def on_quit(self):
        '''
        Stop and cleanup the thread
        '''
        self._event_subscriber.stop()
        self.suspend()

    def receive_event(self, event, *args, **kwargs):
        log.error("Adding event to the event queue")
        self.event_queue.put(event)

    def thread_loop(self):
        '''
        Asynchronous event-loop
        '''
        threading.current_thread().name = '%s-qc-processor' % self.id
        while not self.event.wait(1):
            try:
                self.qc_processing_loop()
            except:
                log.error("Error in QC Processing Loop", exc_info=True)
            try:
                self.event_processing_loop()
            except:
                log.error("Error in QC Event Loop", exc_info=True)

    def qc_processing_loop(self):
        '''
        Iterates through available data products and evaluates QC
        '''
        data_products, _ = self.container.resource_registry.find_resources(restype=RT.DataProduct, id_only=False)
        for data_product in data_products:
            # Get the reference designator
            try:
                rd = self.get_reference_designator(data_product._id)
            except BadRequest:
                continue
            parameters = self.get_parameters(data_product)
            # Create a mapping of inputs to QC
            qc_mapping = {}

            # Creates a dictionary { data_product_name : parameter_name }
            for p in parameters:
                if p.ooi_short_name:
                    sname = p.ooi_short_name
                    g = re.match(r'([a-zA-Z-_]+)(_L[0-9])', sname)
                    if g:
                        sname = g.groups()[0]
                    qc_mapping[sname] = p.name

            for p in parameters:
                # for each parameter, if the name ends in _qc run the qc
                if p.name.endswith('_qc'):
                    self.run_qc(data_product,rd, p, qc_mapping)

            # Break early if we can
            if self.event.is_set(): 
                break

    def event_processing_loop(self):
        '''
        Processes the events in the event queue
        '''
        log.error("Processing event queue")
        self.event_queue.put(StopIteration)
        for event in self.event_queue:
            log.error("My event's reference designator: %s", event.origin)

    def suspend(self):
        '''
        Stops the event loop
        '''
        self.event.set()
        self._thread.join(self.timeout)
        log.info("QC Thread Suspended")


    def get_reference_designator(self, data_product_id=''):
        '''
        Returns the reference designator for a data product if it has one
        '''
        # First try to get the parent data product
        data_product_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataProductParent, id_only=True)
        if data_product_ids:
            return self.get_reference_designator(data_product_ids[0])

        device_ids, _ = self.resource_registry.find_subjects(object=data_product_id, predicate=PRED.hasOutputProduct, subject_type=RT.InstrumentDevice, id_only=True)
        if not device_ids: 
            raise BadRequest("No instrument device associated with this data product")
        device_id = device_ids[0]

        sites, _ = self.resource_registry.find_subjects(object=device_id, predicate=PRED.hasDevice, subject_type=RT.InstrumentSite, id_only=False)
        if not sites:
            raise BadRequest("No site is associated with this data product")
        site = sites[0]
        rd = site.reference_designator
        return rd

    def run_qc(self, data_product, reference_designator, parameter, qc_mapping):
        '''
        Determines which algorithm the parameter should run, then evaluates the QC
        '''

        # We key off of the OOI Short Name
        # DATAPRD_ALGRTHM_QC
        dp_ident, alg, qc = parameter.ooi_short_name.split('_')
        if dp_ident not in qc_mapping:
            return # No input!
        input_name = qc_mapping[dp_ident]

        try:
            doc = self.container.object_store.read_doc(reference_designator)
        except NotFound:
            return # NO QC lookups found
        if dp_ident not in doc:
            log.critical("Data product %s not in doc", dp_ident)
            return # No data product of this listing in the RD's entry
        # Lookup table has the rows for the QC inputs
        lookup_table = doc[dp_ident]

        # An instance of the coverage is loaded if we need to run an algorithm
        dataset_id = self.get_dataset(data_product)
        coverage = self.get_coverage(dataset_id)
        if not coverage.num_timesteps: # No data = no qc
            coverage.close()
            return

        try:
            # Get the lookup table info then run
            if alg.lower() == 'glblrng':
                row = self.recent_row(lookup_table['global_range'])
                min_value = row['min_value']
                max_value = row['max_value']
                self.process_glblrng(coverage, parameter, input_name, min_value, max_value)

            elif alg.lower() == 'stuckvl':
                row = self.recent_row(lookup_table['stuck_value'])
                resolution = row['resolution']
                N = row['consecutive_values']
                self.process_stuck_value(coverage, parameter,input_name, resolution, N)

            elif alg.lower() == 'trndtst':
                row = self.recent_row(lookup_table['trend_test'])
                ord_n = row['polynomial_order']
                nstd = row['standard_deviation']
                self.process_trend_test(coverage, parameter, input_name, ord_n, nstd)

            elif alg.lower() == 'spketst':
                row = self.recent_row(lookup_table['spike_test'])
                acc = row['accuracy']
                N = row['range_multiplier']
                L = row['window_length']
                self.process_spike_test(coverage, parameter, input_name, acc, N, L)

            elif alg.lower() == "gradtst":
                row = self.recent_row(lookup_table["gradient_test"])
                ddatdx = row["ddatdx"]
                mindx = row["mindx"]
                startdat = row["startdat"]
                if isinstance(startdat, basestring) and not startdat:
                    startdat = np.nan
                if isinstance(mindx, basestring) and not mindx:
                    mindx = np.nan
                toldat = row["toldat"]
                self.process_gradient_test(coverage, parameter, input_name, ddatdx, mindx, startdat, toldat)

            elif alg.lower() == 'loclrng':
                pass

        except KeyError: # No lookup table
            self.set_error(coverage, parameter)


        finally:
            coverage.close()

    def set_error(self, coverage, parameter):
        log.error("setting coverage parameter %s to -99", parameter.name)

    def process_glblrng(self, coverage, parameter, input_name, min_value, max_value):
        '''
        Evaluates the QC for global range for all data values that equal -88 (not yet evaluated)
        '''
        log.error("input name: %s", input_name)
        log.info("Num timesteps: %s", coverage.num_timesteps)

        # Get all of the QC values, and find where -88 is set (uninitialized)
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where( qc_array == -88 )[0]

        # Now build a variable, but I need to keep track of the time where the data goes
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)[indexes]
        value_array = coverage.get_parameter_values(input_name)[indexes]

        from ion_functions.qc.qc_functions import dataqc_globalrangetest
        qc = dataqc_globalrangetest(value_array, [min_value, max_value])
        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc
        }


    def process_stuck_value(self, coverage, parameter, input_name, resolution, N):
        '''
        Evaluates the QC for stuck value for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        # Horribly inefficient...
        from ion_functions.qc.qc_functions import dataqc_stuckvaluetest_wrapper
        value_array = coverage.get_parameter_values(input_name)
        qc_array = dataqc_stuckvaluetest_wrapper(value_array, resolution, N)
        qc_array = qc_array[indexes]
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)[indexes]

        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc_array
        }


    def process_trend_test(self, coverage, parameter, input_name, ord_n, nstd):
        '''
        Evaluates the QC for trend test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_polytrendtest_wrapper
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)
        value_array = coverage.get_parameter_values(input_name)

        qc_array = dataqc_polytrendtest_wrapper(value_array, time_array, ord_n, nstd)
        qc_array = qc_array[indexes]
        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc_array
        }

    def process_spike_test(self, coverage, parameter, input_name, acc, N, L):
        '''
        Evaluates the QC for spike test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_spiketest_wrapper
        value_array = coverage.get_parameter_values(input_name)
        qc_array = dataqc_spiketest_wrapper(value_array, acc, N, L)
        qc_array = qc_array[indexes]
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)[indexes]
        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc_array
        }

    def process_gradient_test(self, coverage, parameter, input_name, ddatdx, mindx, startdat, toldat):
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_gradienttest_wrapper
        value_array = coverage.get_parameter_values(input_name)
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)
        
        qc_array = dataqc_gradienttest_wrapper(value_array, time_array, ddatdx, mindx, startdat, toldat)

        return_dictionary = {
                coverage.temporal_parameter_name : time_array[indexes],
                parameter.name : qc_array[indexes]
        }


    def process_local_range_test(self, coverage, parameter, input_name, datlim, datlimz):
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_localrangetest
        value_array = coverage.get_parameter_values(input_name)
        # z_parameter_name needs to come from, I guess the column headings... 
        # I also need to deal with the case where there are multiple axes... 
        # I don't have a good feeling about this.
        z_parameter_name = None
        z_array = coverage.get_parameter_values(z_parameter_name)

        qc_array = dataqc_localrangetest(value_array, z_array, datlim, datlimz)
        return_dictionary = {
                coverage.temporal_parameter_name : time_array[indexes],
                parameter.name : qc_array[indexes]
        }





    def get_dataset(self, data_product):
        dataset_ids, _ = self.resource_registry.find_objects(data_product, PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise BadRequest("No Dataset")
        dataset_id = dataset_ids[0]
        return dataset_id

    def get_coverage(self, dataset_id):
        cov = DatasetManagementService._get_coverage(dataset_id, mode='r+')
        return cov

    def recent_row(self, rows):
        '''
        Determines the most recent data based on the timestamp
        '''
        most_recent = None
        ts = 0
        for row in rows:
            if row['ts_created'] > ts:
                most_recent = row
                ts = row['ts_created']
        return most_recent


    def get_parameters(self, data_product):
        '''
        Returns the relevant parameter contexts of the data product
        '''

        # DataProduct -> StreamDefinition
        stream_defs, _ = self.resource_registry.find_objects(data_product._id, PRED.hasStreamDefinition, id_only=False)
        stream_def = stream_defs[0]

        # StreamDefinition -> ParameterDictionary
        pdict_ids, _ = self.resource_registry.find_objects(stream_def._id, PRED.hasParameterDictionary, id_only=True)
        pdict_id = pdict_ids[0]

        # ParameterDictionary -> ParameterContext
        pctxts, _ = self.resource_registry.find_objects(pdict_id, PRED.hasParameterContext, id_only=False)
        relevant = [ctx for ctx in pctxts if not stream_def.available_fields or (stream_def.available_fields and ctx.name in stream_def.available_fields)]
        return relevant
Пример #46
0
class EOIRegistrationProcess(SimpleProcess):

    def on_start(self):
        self.data_source_subscriber = EventSubscriber(event_type=OT.ResourceModifiedEvent,
                                                      origin_type=RT.DataSource,
                                                      callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(event_type=OT.ResourceModifiedEvent,
                                                      origin_type=RT.ExternalDataProvider,
                                                      callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services', False)
        self.server = CFG.get_safe('eoi.importer_service.server', "localhost")+":"+str(CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices="+str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...") 

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")  

    def check_for_importer_service(self):
        '''
        only gets run on start, used to identify if importer service is available
        '''        
        try:
            r = requests.get(self.server+'/service=alive&name=ooi&id=ooi')
            log.info("importer service available, status code: %s", str(r.status_code))
            #alive service returned ok
            if r.status_code == 200:
                return True
            else:
                return False
        except Exception as e:
            #SERVICE IS REALLY NOT AVAILABLE
            log.warn("importer service is really not available...%s", e)
            return False    


    def _register_data_source(self, event, *args, **kwargs):        
        '''
        used to create a harvester
        '''
        if self.importer_service_available:
            obj = self.rr.read(event.origin)        
            data_fields = []
            for attrname, value in vars(obj).iteritems():           
                #generate th param list to pass to importer service using field names
                if attrname is not "contact":
                    f = attrname.replace("_", "")+"="+str(obj[attrname])
                    data_fields.append(f)

            param_list = '&'.join(data_fields)

            request_string = self.server+'/service='+CREATE_HARVESTER+"&"+param_list            
            r = requests.get(request_string)


    def _register_provider(self, event, *args, **kwargs):
        if self.importer_service_available:
            #print "provider id:", event.origin
            pass
            

    def on_quit(self):
        self.data_source_subscriber.stop()
        self.provider_subscriber.stop()
Пример #47
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    """

    def __init__(self,container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = dict()
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to check before the op in a process is allowed to be called
        self._service_op_preconditions = dict()

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

        # For policy debugging purposes. Keeps a list of most recent policy updates for later readout
        self._policy_update_log = []
        self._policy_snapshot = None

    def start(self):

        log.debug("GovernanceController starting ...")

        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)

        log.info("GovernanceInterceptor enabled: %s" % str(self.enabled))

        self.policy_event_subscriber = None

        #containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary',False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        if self.enabled:

            config = CFG.get_safe('interceptor.interceptors.governance.config')

            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self.rr_client = ResourceRegistryServiceProcessClient(node=self.container.node, process=self.container)
            self.policy_client = PolicyManagementServiceProcessClient(node=self.container.node, process=self.container)

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl", message="Container start")

    def initialize_from_config(self, config):

        self.governance_dispatcher = GovernanceDispatcher()

        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        if 'interceptor_order' in config:
            self.interceptor_order = config['interceptor_order']

        if 'governance_interceptors' in config:
            gov_ints = config['governance_interceptors']

            for name in gov_ints:
                interceptor_def = gov_ints[name]

                # Instantiate and put in by_name array
                parts = interceptor_def["class"].split('.')
                modpath = ".".join(parts[:-1])
                classname = parts[-1]
                module = __import__(modpath, fromlist=[classname])
                classobj = getattr(module, classname)
                classinst = classobj()

                # Put in by_name_dict for possible re-use
                self.interceptor_by_name_dict[name] = classinst

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()


    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG


    @property
    def rr(self):
        """
        Returns the active resource registry instance or client.

        Used to directly contact the resource registry via the container if available,
        otherwise the messaging client to the RR service is returned.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry

        return self.rr_client


    def get_container_org_boundary_id(self):
        """
        Returns the permanent org identifier configured for this container
        @return:
        """

        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            orgs, _ = self.rr.find_resources(restype=RT.Org)
            for org in orgs:
                if org.org_governance_name == self._container_org_name:
                    self._container_org_id = org._id
                    break

        return self._container_org_id

    def process_incoming_message(self,invocation):
        """
        The GovernanceController hook into the incoming message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, self.interceptor_order,'incoming' )
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self,invocation):
        """
        The GovernanceController hook into the outgoing message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, reversed(self.interceptor_order),'outgoing')
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self,invocation,interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and evaluate the annotations
        to see what actions should be done.
        @TODO - may want to make this more dynamic instead of hard coded for the moment.
        @param invocation:
        @param interceptor_list:
        @param method:
        @return:
        """
        for int_name in interceptor_list:
            class_inst = self.interceptor_by_name_dict[int_name]
            getattr(class_inst, method)(invocation)

            #Stop processing message if an issue with the message was found by an interceptor.
            if ( invocation.message_annotations.has_key(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) or\
               ( invocation.message_annotations.has_key(GovernanceDispatcher.POLICY__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.POLICY__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) :
                break

        return invocation



    # Manage all of the policies in the container

    def policy_event_callback(self, policy_event, *args, **kwargs):
        """
        The generic policy event call back for dispatching policy related events
        """
        # Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        log.info("Policy event callback received: %s" % policy_event)

        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(policy_event, *args, **kwargs)

        self._log_policy_update("policy_event_callback",
                                message="Event processed",
                                event=policy_event)

    def resource_policy_event_callback(self, resource_policy_event, *args, **kwargs):
        """
        The ResourcePolicyEvent handler
        """
        log.debug('Resource policy event received: %s', str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def service_policy_event_callback(self, service_policy_event, *args, **kwargs):
        """
        The ServicePolicyEvent handler

        @param args:
        @param kwargs:
        @return:
        """
        log.debug('Service policy event received: %s', str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()


    def reset_policy_cache(self):
        """
        The function to empty and reload the container's policy caches

        @return:
        """
        log.info('Resetting policy cache')

        #First remove all cached polices and precondition functions that are not hard-wired
        self._reset_container_policy_caches()

        #Then load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        #Now iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_container_policies(proc)

        self._log_policy_update("reset_policy_cache")


    def _reset_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def _get_policy_snapshot(self):
        policy_snap = {}
        policy_snap["snap_ts"] = get_ion_ts()

        policies = self.get_active_policies()
        common_list = []
        policy_snap["common_pdp"] = common_list
        for rule in policies.get("common_service_access", {}).policy.rules:
            rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
            common_list.append(rule_dict)

        service_dict = {}
        policy_snap["service_pdp"] = service_dict
        for (svc_name, sp) in policies.get("service_access", {}).iteritems():
            for rule in sp.policy.rules:
                if svc_name not in service_dict:
                    service_dict[svc_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                service_dict[svc_name].append(rule_dict)

        service_pre_dict = {}
        policy_snap["service_precondition"] = service_pre_dict
        for (svc_name, sp) in policies.get("service_operation", {}).iteritems():
            for op, f in sp.iteritems():
                if svc_name not in service_pre_dict:
                    service_pre_dict[svc_name] = []
                service_pre_dict[svc_name].append(op)

        resource_dict = {}
        policy_snap["resource_pdp"] = resource_dict
        for (res_name, sp) in policies.get("resource_access", {}).iteritems():
            for rule in sp.policy.rules:
                if res_name not in service_dict:
                    resource_dict[res_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                resource_dict[res_name].append(rule_dict)

        return policy_snap

    def _log_policy_update(self, update_type=None, message=None, event=None, process=None):
        policy_update_dict = {}
        policy_update_dict["update_ts"] = get_ion_ts()
        policy_update_dict["update_type"] = update_type or ""
        policy_update_dict["message"] = message or ""
        if event:
            policy_update_dict["event._id"] = getattr(event, "_id", "")
            policy_update_dict["event.ts_created"] = getattr(event, "ts_created", "")
            policy_update_dict["event.type_"] = getattr(event, "type_", "")
            policy_update_dict["event.sub_type"] = getattr(event, "sub_type", "")
        if process:
            policy_update_dict["proc._proc_name"] = getattr(process, "_proc_name", "")
            policy_update_dict["proc.name"] = getattr(process, "name", "")
            policy_update_dict["proc._proc_listen_name"] = getattr(process, "_proc_listen_name", "")
            policy_update_dict["proc.resource_type"] = getattr(process, "resource_type", "")
            policy_update_dict["proc.resource_id"] = getattr(process, "resource_id", "")
        any_change = False   # Change can only be detected in number/names of policy not content
        snapshot = self._policy_snapshot
        policy_now = self._get_policy_snapshot()
        # Comparison of snapshot to current policy
        try:
            def compare_policy(pol_cur, pol_snap, key, res):
                pol_cur_set = {d["id"] if isinstance(d, dict) else d for d in pol_cur}
                pol_snap_set = {d["id"] if isinstance(d, dict) else d for d in pol_snap}
                if pol_cur_set != pol_snap_set:
                    policy_update_dict["snap.%s.%s.added" % (key, res)] = pol_cur_set - pol_snap_set
                    policy_update_dict["snap.%s.%s.removed" % (key, res)] = pol_snap_set - pol_cur_set
                    log.debug("Policy changed for %s.%s: %s vs %s" % (key, res, pol_cur_set, pol_snap_set))
                    return True
                return False
            policy_update_dict["snap.snap_ts"] = snapshot["snap_ts"]
            for key in ("common_pdp", "service_pdp", "service_precondition", "resource_pdp"):
                pol_snap = snapshot[key]
                pol_cur = policy_now[key]
                if isinstance(pol_cur, dict):
                    for res in pol_cur.keys():
                        pol_list = pol_cur[res]
                        snap_list = pol_snap.get(res, [])
                        any_change = compare_policy(pol_list, snap_list, key, res) or any_change
                elif isinstance(pol_cur, list):
                    any_change = compare_policy(pol_cur, pol_snap, key, "common") or any_change

            policy_update_dict["snap.policy_changed"] = str(any_change)
        except Exception as ex:
            log.warn("Cannot compare current policy to prior snapshot", exc_info=True)

        self._policy_update_log.append(policy_update_dict)
        self._policy_update_log = self._policy_update_log[-100:]
        self._policy_snapshot = policy_now

        log.info("Policy update logged. Type=%s, message=%s, changed=%s" % (update_type, message, any_change))

    def update_container_policies(self, process_instance, safe_mode=False):
        """
        Load any applicable process policies. To be called by the container proc manager after
        registering a new process.
        @param process_instance  The ION process for which to load policy
        @param safe_mode  If True, will not attempt to read policy if Policy MS not available
        """

        # This method can be called before policy management service is available during system startup
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing() and (process_instance.name not in (
                "resource_registry", "system_management", "directory", "identity_management") and
                process_instance._proc_name != "event_persister"):
                # We are in the early phases of bootstrapping
                log.warn("update_container_policies(%s) - No update. Policy MS not available" % process_instance._proc_name)

            self._log_policy_update("update_container_policies",
                                    message="No update. Policy MS not available",
                                    process=process_instance)
            return

        # Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        if process_instance._proc_type == SERVICE_PROCESS_TYPE:
            # look to load any existing policies for this service

            self.update_service_access_policy(process_instance._proc_listen_name)

        elif process_instance._proc_type == AGENT_PROCESS_TYPE:
            # look to load any existing policies for this agent service
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name)
            else:
                self.update_service_access_policy(process_instance.resource_type)

            if process_instance.resource_id:
                # look to load any existing policies for this resource
                self.update_resource_access_policy(process_instance.resource_id)

        self._log_policy_update("update_container_policies",
                                message="Updated",
                                process=process_instance)


    def update_resource_access_policy(self, resource_id, delete_policy=False):

        if self.policy_decision_point_manager is not None:

            try:
                policy_rules = self.policy_client.get_active_resource_access_policy_rules(resource_id, headers=self.system_actor_user_header)
                self.policy_decision_point_manager.load_resource_policy_rules(resource_id, policy_rules)

            except Exception, e:
                #If the resource does not exist, just ignore it - but log a warning.
                log.warn("The resource %s is not found or there was an error applying access policy: %s" % ( resource_id, e.message))
Пример #48
0
class EventPersister(StandaloneProcess):

    def on_init(self):
        # Time in between event persists
        self.persist_interval = float(self.CFG.get_safe("process.event_persister.persist_interval", 1.0))

        self.persist_blacklist = self.CFG.get_safe("process.event_persister.persist_blacklist", {})

        self._event_type_blacklist = [entry['event_type'] for entry in self.persist_blacklist if entry.get('event_type', None) and len(entry) == 1]
        self._complex_blacklist = [entry for entry in self.persist_blacklist if not (entry.get('event_type', None) and len(entry) == 1)]
        if self._complex_blacklist:
            log.warn("EventPersister does not yet support complex blacklist expressions: %s", self._complex_blacklist)

        # Holds received events FIFO in syncronized queue
        self.event_queue = Queue()

        # Temporarily holds list of events to persist while datastore operation are not yet completed
        # This is where events to persist will remain if datastore operation fails occasionally.
        self.events_to_persist = None

        # Number of unsuccessful attempts to persist in a row
        self.failure_count = 0

        # bookkeeping for greenlet
        self._persist_greenlet = None
        self._terminate_persist = Event() # when set, exits the persister greenlet

        # The event subscriber
        self.event_sub = None

        # Registered event process plugins
        self.process_plugins = {}
        for plugin_name, plugin_cls, plugin_args in PROCESS_PLUGINS:
            try:
                plugin = named_any(plugin_cls)(**plugin_args)
                self.process_plugins[plugin_name]= plugin
                log.info("Loaded event processing plugin %s (%s)", plugin_name, plugin_cls)
            except Exception as ex:
                log.error("Cannot instantiate event processing plugin %s (%s): %s", plugin_name, plugin_cls, ex)


    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop, self.persist_interval)
        log.debug('EventPersister persist greenlet started in "%s" (interval %s)', self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister")

        self.event_sub.start()

    def on_quit(self):
        # Stop event subscriber
        self.event_sub.stop()

        # tell the trigger greenlet we're done
        self._terminate_persist.set()

        # wait on the greenlets to finish cleanly
        self._persist_greenlet.join(timeout=5)

    def _on_event(self, event, *args, **kwargs):
        self.event_queue.put(event)

    def _in_blacklist(self, event):
        if event.type_ in self._event_type_blacklist:
            return True
        if event.base_types:
            for base_type in event.base_types:
                if base_type in self._event_type_blacklist:
                    return True
            # TODO: Complex event blacklist
        return False

    def _persister_loop(self, persist_interval):
        log.debug('Starting event persister thread with persist_interval=%s', persist_interval)

        # Event.wait returns False on timeout (and True when set in on_quit), so we use this to both exit cleanly and do our timeout in a loop
        while not self._terminate_persist.wait(timeout=persist_interval):
            try:
                # leftover events_to_persist indicate previous attempt did not succeed
                if self.events_to_persist and self.failure_count > 2:
                    bad_events = []
                    log.warn("Attempting to persist %s events individually" % (len(self.events_to_persist)))
                    for event in self.events_to_persist:
                        try:
                            self.container.event_repository.put_event(event)
                        except Exception:
                            bad_events.append(event)

                    if len(self.events_to_persist) != len(bad_events):
                        log.warn("Succeeded to persist some of the events - rest must be bad")
                        self._log_events(bad_events)
                    elif bad_events:
                        log.error("Discarding %s events after %s attempts!!" % (len(bad_events), self.failure_count))
                        self._log_events(bad_events)

                    self.events_to_persist = None
                    self.failure_count = 0

                elif self.events_to_persist:
                    # There was an error last time and we need to retry
                    log.info("Retry persisting %s events" % len(self.events_to_persist))
                    self._persist_events(self.events_to_persist)
                    self.events_to_persist = None

                # process ALL events (not retried on fail like peristing is)
                events_to_process = [self.event_queue.get() for x in xrange(self.event_queue.qsize())]
                # only persist events not in blacklist
                self.events_to_persist = [x for x in events_to_process if not self._in_blacklist(x)]

                try:
                    self._persist_events(self.events_to_persist)
                finally:
                    self._process_events(events_to_process)
                self.events_to_persist = None
                self.failure_count = 0
            except Exception as ex:
                # Note: Persisting events may fail occasionally during test runs (when the "events" datastore is force
                # deleted and recreated). We'll log and keep retrying forever.
                log.exception("Failed to persist %s received events. Will retry next cycle" % len(self.events_to_persist))
                self.failure_count += 1
                self._log_events(self.events_to_persist)

    def _persist_events(self, event_list):
        if event_list:
            self.container.event_repository.put_events(event_list)

    def _process_events(self, event_list):
        for plugin_name, plugin in self.process_plugins.iteritems():
            try:
                plugin.process_events(event_list)
            except Exception as ex:
                log.exception("Error processing events in plugin %s", plugin_name)

    def _log_events(self, events):
        events_str = pprint.pformat([event.__dict__ for event in events]) if events else ""
        log.warn("EVENTS:\n%s", events_str)
Пример #49
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    """

    def __init__(self,container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = dict()
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to check before the op in a process is allowed to be called
        self._service_op_preconditions = dict()

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

    def start(self):

        log.debug("GovernanceController starting ...")

        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)

        log.info("GovernanceInterceptor enabled: %s" % str(self.enabled))

        self.policy_event_subscriber = None

        #containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary',False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        if self.enabled:

            config = CFG.get_safe('interceptor.interceptors.governance.config')

            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self.rr_client = ResourceRegistryServiceProcessClient(node=self.container.node, process=self.container)
            self.policy_client = PolicyManagementServiceProcessClient(node=self.container.node, process=self.container)

    def initialize_from_config(self, config):

        self.governance_dispatcher = GovernanceDispatcher()

        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        if 'interceptor_order' in config:
            self.interceptor_order = config['interceptor_order']

        if 'governance_interceptors' in config:
            gov_ints = config['governance_interceptors']

            for name in gov_ints:
                interceptor_def = gov_ints[name]

                # Instantiate and put in by_name array
                parts = interceptor_def["class"].split('.')
                modpath = ".".join(parts[:-1])
                classname = parts[-1]
                module = __import__(modpath, fromlist=[classname])
                classobj = getattr(module, classname)
                classinst = classobj()

                # Put in by_name_dict for possible re-use
                self.interceptor_by_name_dict[name] = classinst

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()


    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG


    @property
    def rr(self):
        """
        Returns the active resource registry instance or client.

        Used to directly contact the resource registry via the container if available,
        otherwise the messaging client to the RR service is returned.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry

        return self.rr_client


    def get_container_org_boundary_id(self):
        """
        Returns the permanent org identifier configured for this container
        @return:
        """

        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            orgs, _ = self.rr.find_resources(restype=RT.Org)
            for org in orgs:
                if org.org_governance_name == self._container_org_name:
                    self._container_org_id = org._id
                    break

        return self._container_org_id

    def process_incoming_message(self,invocation):
        """
        The GovernanceController hook into the incoming message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, self.interceptor_order,'incoming' )
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self,invocation):
        """
        The GovernanceController hook into the outgoing message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, reversed(self.interceptor_order),'outgoing')
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self,invocation,interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and evaluate the annotations
        to see what actions should be done.
        @TODO - may want to make this more dynamic instead of hard coded for the moment.
        @param invocation:
        @param interceptor_list:
        @param method:
        @return:
        """
        for int_name in interceptor_list:
            class_inst = self.interceptor_by_name_dict[int_name]
            getattr(class_inst, method)(invocation)

            #Stop processing message if an issue with the message was found by an interceptor.
            if ( invocation.message_annotations.has_key(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) or\
               ( invocation.message_annotations.has_key(GovernanceDispatcher.POLICY__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.POLICY__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) :
                break

        return invocation



    #Manage all of the policies in the container

    def policy_event_callback(self, *args, **kwargs):
        """
        The generic policy event call back for dispatching policy related events

        @param args:
        @param kwargs:
        @return:
        """
        #Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        policy_event = args[0]
        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(*args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(*args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(*args, **kwargs)

    def resource_policy_event_callback(self, *args, **kwargs):
        """
        The ResourcePolicyEvent handler

        @param args:
        @param kwargs:
        @return:
        """
        resource_policy_event = args[0]
        log.debug('Resource policy event received: %s', str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def service_policy_event_callback(self, *args, **kwargs):
        """
        The ServicePolicyEvent handler

        @param args:
        @param kwargs:
        @return:
        """
        service_policy_event = args[0]
        log.debug('Service policy event received: %s', str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()



    def reset_policy_cache(self):
        """
        The function to empty and reload the container's policy caches

        @return:
        """
        log.info('Resetting policy cache')

        #First remove all cached polices and precondition functions that are not hard-wired
        self._reset_container_policy_caches()

        #Then load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        #Now iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_container_policies(proc)


    def _reset_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def update_container_policies(self, process_instance, safe_mode=False):
        """
        This must be called after registering a new process to load any applicable policies

        @param process_instance:
        @return:
        """

        #This method can be called before policy management service is available during system startup
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing():
                log.warn("Requested update_container_policies() but ignore - Policy MS not available")
            return

        #Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        if process_instance._proc_type == SERVICE_PROCESS_TYPE:

            # look to load any existing policies for this service

            self.update_service_access_policy(process_instance._proc_listen_name)

        elif process_instance._proc_type == AGENT_PROCESS_TYPE:

            # look to load any existing policies for this agent service
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name)
            else:
                self.update_service_access_policy(process_instance.resource_type)

            if process_instance.resource_id:
                # look to load any existing policies for this resource
                self.update_resource_access_policy(process_instance.resource_id)


    def update_resource_access_policy(self, resource_id, delete_policy=False):

        if self.policy_decision_point_manager is not None:

            try:
                policy_rules = self.policy_client.get_active_resource_access_policy_rules(resource_id, headers=self.system_actor_user_header)
                self.policy_decision_point_manager.load_resource_policy_rules(resource_id, policy_rules)

            except Exception, e:
                #If the resource does not exist, just ignore it - but log a warning.
                log.warn("The resource %s is not found or there was an error applying access policy: %s" % ( resource_id, e.message))
Пример #50
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        # Install the container tracer (could be its own
        self.container_tracer = ContainerTracer()
        self.container_tracer.start_tracing()
        self.container.tracer = CallTracer
        self.container.tracer.configure(CFG.get_safe("container.tracer", {}))

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.info('ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container_tracer.stop_tracing()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
Пример #51
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    Registers event callback for PolicyEvent to update local policies on change.
    """
    def __init__(self, container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = {}
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to be called before operation is invoked
        self._service_op_preconditions = {}
        # Holds a list per process operation of policy methods to be called before operation is invoked
        self._process_op_preconditions = {}

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

        # For policy debugging purposes. Keeps a list of most recent policy updates for later readout
        self._policy_update_log = []
        self._policy_snapshot = None

    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe(
            'interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe(
            'container.org_boundary', False)
        self._container_org_name = CFG.get_safe(
            'container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (
            self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(
            process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(
            process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(
                event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl",
                                    message="Container start")

    def initialize_from_config(self, config):
        self.governance_dispatcher = GovernanceDispatcher()
        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        self.interceptor_order = config.get('interceptor_order', None) or []
        gov_ints = config.get('governance_interceptors', None) or {}
        for name in gov_ints:
            interceptor_def = gov_ints[name]
            classobj = named_any(interceptor_def["class"])
            classinst = classobj()
            self.interceptor_by_name_dict[name] = classinst

    def _ensure_system_actor(self):
        """Make sure we have a handle for the system actor"""
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header(
                    system_actor)

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()

    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG

    @property
    def rr(self):
        """Returns the active resource registry instance if available in the container or service client.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry
        return self.rr_client

    def get_container_org_boundary_id(self):
        """Returns the permanent org identifier configured for this container
        """
        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            org_ids, _ = self.rr.find_resources_ext(
                restype=RT.Org,
                attr_name="org_governance_name",
                attr_value=self._container_org_name,
                id_only=True)
            if org_ids:
                self._container_org_id = org_ids[0]

        return self._container_org_id

    # --- Interceptor management

    def process_incoming_message(self, invocation):
        """The GovernanceController hook into the incoming message interceptor stack
        """
        self.process_message(invocation, self.interceptor_order,
                             Invocation.PATH_IN)
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self, invocation):
        """The GovernanceController hook into the outgoing message interceptor stack
        """
        self.process_message(invocation, reversed(self.interceptor_order),
                             Invocation.PATH_OUT)
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self, invocation, interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and
        evaluate the annotations to see what actions should be done.
        """
        for int_name in interceptor_list:
            interceptor_obj = self.interceptor_by_name_dict[int_name]
            interceptor_func = getattr(interceptor_obj, method)
            # Invoke interceptor function for designated path
            interceptor_func(invocation)

            # Stop processing message if an issue with the message was found by an interceptor
            if invocation.message_annotations.get(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT or \
               invocation.message_annotations.get(GovernanceDispatcher.POLICY__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT:
                break

        return invocation

    # --- Container policy management

    def policy_event_callback(self, policy_event, *args, **kwargs):
        """Generic policy event handler for dispatching policy related events.
        """
        self._ensure_system_actor()

        log.info("Received policy event: %s", policy_event)

        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(policy_event, *args, **kwargs)

        self._log_policy_update("policy_event_callback",
                                message="Event processed",
                                event=policy_event)

    def service_policy_event_callback(self, service_policy_event, *args,
                                      **kwargs):
        """The ServicePolicyEvent handler
        """
        log.debug('Service policy event: %s',
                  str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(
                    service_name):
                self.update_service_access_policy(service_name,
                                                  service_op,
                                                  delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(
                    service_name):
                self.update_service_access_policy(service_name,
                                                  service_op,
                                                  delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()

    def resource_policy_event_callback(self, resource_policy_event, *args,
                                       **kwargs):
        """The ResourcePolicyEvent handler
        """
        log.debug('Resource policy event: %s',
                  str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def reset_policy_cache(self):
        """Empty and reload the container's policy caches.
        Reload by getting policy for each of the container's processes and common policy.
        """
        log.info('Resetting policy cache')

        # First remove all cached polices and operation precondition functions
        self._clear_container_policy_caches()

        # Load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        # Iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_process_policies(proc, force_update=False)

        self._log_policy_update("reset_policy_cache")

    def _clear_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def update_process_policies(self,
                                process_instance,
                                safe_mode=False,
                                force_update=True):
        """
        Load any applicable process policies for a container process.
        To be called by when spawning a new process, or when policy is reset.
        @param process_instance  The ION process for which to load policy
        @param safe_mode  If True, will not attempt to read policy if Policy MS not available
        """
        # NOTE: During restart, we rely on the bootstrap code to remove registration of Policy MS
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing() and (process_instance.name not in {
                    "resource_registry", "system_management", "directory",
                    "identity_management"
            } and process_instance._proc_name != "event_persister"):
                # We are in the early phases of bootstrapping
                log.warn(
                    "update_process_policies(%s) - No update. Policy MS not available",
                    process_instance._proc_name)

            self._log_policy_update(
                "update_process_policies",
                message="No update. Policy MS not available",
                process=process_instance)
            return

        self._ensure_system_actor()

        if process_instance._proc_type == PROCTYPE_SERVICE:
            self.update_service_access_policy(
                process_instance._proc_listen_name, force_update=force_update)

        elif process_instance._proc_type == PROCTYPE_AGENT:
            # Load any existing policies for this agent with type or name
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name,
                                                  force_update=force_update)
            else:
                self.update_service_access_policy(
                    process_instance.resource_type, force_update=force_update)

            if process_instance.resource_id:
                # Load any existing policies for this resource
                self.update_resource_access_policy(
                    process_instance.resource_id, force_update=force_update)

        self._log_policy_update("update_process_policies",
                                message="Checked",
                                process=process_instance)

    def update_common_service_access_policy(self, delete_policy=False):
        """Update policy common to all services"""
        if self.policy_decision_point_manager is None:
            return

        try:
            rules = self.policy_client.get_active_service_access_policy_rules(
                service_name='',
                org_name=self._container_org_name,
                headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_common_service_policy_rules(
                rules)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("There was an error applying access policy: %s" %
                     e.message)

    def update_service_access_policy(self,
                                     service_name,
                                     service_op='',
                                     delete_policy=False,
                                     force_update=True):
        """Update policy for a service"""
        if self.policy_decision_point_manager is None:
            return
        if not force_update and not service_op and self.policy_decision_point_manager.has_service_policy(
                service_name):
            log.info("Skipping update of service %s policy - already cached",
                     service_name)
            return

        try:
            if service_op:
                policies = self.policy_client.get_active_service_operation_preconditions(
                    service_name=service_name,
                    op=service_op,
                    org_name=self._container_org_name,
                    headers=self.system_actor_user_header)
            else:
                policies = self.policy_client.get_active_service_access_policy_rules(
                    service_name=service_name,
                    org_name=self._container_org_name,
                    headers=self.system_actor_user_header)

            # First update any access policy rules
            svc_access_policy = [
                p for p in policies
                if p.policy_type in (PolicyTypeEnum.COMMON_SERVICE_ACCESS,
                                     PolicyTypeEnum.SERVICE_ACCESS)
            ]
            self.policy_decision_point_manager.set_service_policy_rules(
                service_name, svc_access_policy)

            # Next update any precondition policies
            svc_preconditions = [
                p for p in policies
                if p.policy_type == PolicyTypeEnum.SERVICE_OP_PRECOND
            ]

            # There can be several local processes for a service
            procs = self.container.proc_manager.get_local_service_processes(
                service_name)
            for proc in procs:
                if svc_preconditions:
                    for op_pre_policy in svc_preconditions:
                        for pre_check in op_pre_policy.preconditions:
                            self.unregister_process_operation_precondition(
                                proc, op_pre_policy.op, pre_check)
                            if not delete_policy:
                                self.register_process_operation_precondition(
                                    proc, op_pre_policy.op, pre_check)
                else:
                    # Unregister all, just in case
                    self.unregister_all_process_operation_precondition(
                        proc, service_op)

        except Exception as ex:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("Error applying access policy for service %s: %s" %
                     (service_name, ex.message))

    def update_resource_access_policy(self,
                                      resource_id,
                                      delete_policy=False,
                                      force_update=True):
        """Update policy for a resource (such as a device fronted by an agent process)"""
        if self.policy_decision_point_manager is None:
            return
        if self.policy_decision_point_manager.has_resource_policy(resource_id):
            return

        try:
            policy_list = self.policy_client.get_active_resource_access_policy_rules(
                resource_id, headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_resource_policy_rules(
                resource_id, policy_list)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn(
                "There was an error applying access policy for resource %s: %s",
                resource_id, e.message)

    def update_process_access_policy(self,
                                     process_key,
                                     service_op='',
                                     delete_policy=False,
                                     force_update=True):
        pass
        # procs, op_preconditions = [], None
        # try:
        #     # There can be several local processes for a service all with different names
        #     procs = self.container.proc_manager.get_local_service_processes(service_name)
        #     if procs:
        #         op_preconditions = self.policy_client.get_active_service_operation_preconditions(
        #                 service_name=service_name, op=service_op, org_name=self._container_org_name,
        #                 headers=self.system_actor_user_header)
        # except Exception as ex:
        #     # If the resource does not exist, just ignore it - but log a warning.
        #     log.warn("Error applying precondition access policy for service %s: %s" % (service_name, ex.message))
        #
        # for proc in procs:
        #     try:
        #         if op_preconditions:
        #             for op in op_preconditions:
        #                 for pre in op.preconditions:
        #                     self.unregister_process_operation_precondition(proc, op.op, pre)
        #                     if not delete_policy:
        #                         self.register_process_operation_precondition(proc, op.op, pre)
        #         else:
        #             # Unregister all, just in case
        #             self.unregister_all_process_operation_precondition(proc, service_op)
        #     except Exception as ex:
        #         # If the resource does not exist, just ignore it - but log a warning.
        #         log.warn("Error applying precondition access policy for process %s of service %s: %s" % (proc, service_name, ex.message))

    def get_active_policies(self):
        container_policies = dict()
        container_policies[
            'common_service_access'] = self.policy_decision_point_manager.load_common_service_pdp
        container_policies['service_access'] = {
            k: v
            for (k, v) in self.policy_decision_point_manager.
            service_policy_decision_point.iteritems() if v is not None
        }
        container_policies['resource_access'] = {
            k: v
            for (k, v) in self.policy_decision_point_manager.
            resource_policy_decision_point.iteritems() if v is not None
        }
        container_policies['service_operation'] = dict(
            self._service_op_preconditions)

        #log.info(container_policies)
        return container_policies

    def _is_policy_management_service_available(self):
        """
        Method to verify if the Policy Management Service is running in the system. If the container cannot connect to
        the RR then assume it is remote container so do not try to access Policy Management Service
        """
        policy_service = get_service_registry().is_service_available(
            'policy_management', True)
        if policy_service:
            return True
        return False

    def _get_policy_snapshot(self):
        """Debugging helper that snapshot copies the current container's policy state.
        """
        policy_snap = {}
        policy_snap["snap_ts"] = get_ion_ts()

        policies = self.get_active_policies()
        common_list = []
        policy_snap["common_pdp"] = common_list
        for rule in policies.get("common_service_access", {}).policy.rules:
            rule_dict = dict(id=rule.id,
                             description=rule.description,
                             effect=rule.effect.value)
            common_list.append(rule_dict)

        service_dict = {}
        policy_snap["service_pdp"] = service_dict
        for (svc_name, sp) in policies.get("service_access", {}).iteritems():
            for rule in sp.policy.rules:
                if svc_name not in service_dict:
                    service_dict[svc_name] = []
                rule_dict = dict(id=rule.id,
                                 description=rule.description,
                                 effect=rule.effect.value)
                service_dict[svc_name].append(rule_dict)

        service_pre_dict = {}
        policy_snap["service_precondition"] = service_pre_dict
        for (svc_name, sp) in policies.get("service_operation",
                                           {}).iteritems():
            for op, f in sp.iteritems():
                if svc_name not in service_pre_dict:
                    service_pre_dict[svc_name] = []
                service_pre_dict[svc_name].append(op)

        resource_dict = {}
        policy_snap["resource_pdp"] = resource_dict
        for (res_name, sp) in policies.get("resource_access", {}).iteritems():
            for rule in sp.policy.rules:
                if res_name not in service_dict:
                    resource_dict[res_name] = []
                rule_dict = dict(id=rule.id,
                                 description=rule.description,
                                 effect=rule.effect.value)
                resource_dict[res_name].append(rule_dict)

        return policy_snap

    def _log_policy_update(self,
                           update_type=None,
                           message=None,
                           event=None,
                           process=None):
        policy_update_dict = {}
        policy_update_dict["update_ts"] = get_ion_ts()
        policy_update_dict["update_type"] = update_type or ""
        policy_update_dict["message"] = message or ""
        if event:
            policy_update_dict["event._id"] = getattr(event, "_id", "")
            policy_update_dict["event.ts_created"] = getattr(
                event, "ts_created", "")
            policy_update_dict["event.type_"] = getattr(event, "type_", "")
            policy_update_dict["event.sub_type"] = getattr(
                event, "sub_type", "")
        if process:
            policy_update_dict["proc._proc_name"] = getattr(
                process, "_proc_name", "")
            policy_update_dict["proc.name"] = getattr(process, "name", "")
            policy_update_dict["proc._proc_listen_name"] = getattr(
                process, "_proc_listen_name", "")
            policy_update_dict["proc.resource_type"] = getattr(
                process, "resource_type", "")
            policy_update_dict["proc.resource_id"] = getattr(
                process, "resource_id", "")
        any_change = False  # Change can only be detected in number/names of policy not content
        snapshot = self._policy_snapshot
        policy_now = self._get_policy_snapshot()
        # Comparison of snapshot to current policy
        try:

            def compare_policy(pol_cur, pol_snap, key, res):
                pol_cur_set = {
                    d["id"] if isinstance(d, dict) else d
                    for d in pol_cur
                }
                pol_snap_set = {
                    d["id"] if isinstance(d, dict) else d
                    for d in pol_snap
                }
                if pol_cur_set != pol_snap_set:
                    policy_update_dict["snap.%s.%s.added" %
                                       (key, res)] = pol_cur_set - pol_snap_set
                    policy_update_dict["snap.%s.%s.removed" %
                                       (key, res)] = pol_snap_set - pol_cur_set
                    log.debug("Policy changed for %s.%s: %s vs %s" %
                              (key, res, pol_cur_set, pol_snap_set))
                    return True
                return False

            policy_update_dict["snap.snap_ts"] = snapshot["snap_ts"]
            for key in ("common_pdp", "service_pdp", "service_precondition",
                        "resource_pdp"):
                pol_snap = snapshot[key]
                pol_cur = policy_now[key]
                if isinstance(pol_cur, dict):
                    for res in pol_cur.keys():
                        pol_list = pol_cur[res]
                        snap_list = pol_snap.get(res, [])
                        any_change = compare_policy(pol_list, snap_list, key,
                                                    res) or any_change
                elif isinstance(pol_cur, list):
                    any_change = compare_policy(pol_cur, pol_snap, key,
                                                "common") or any_change

            policy_update_dict["snap.policy_changed"] = str(any_change)
        except Exception as ex:
            log.warn("Cannot compare current policy to prior snapshot",
                     exc_info=True)

        self._policy_update_log.append(policy_update_dict)
        self._policy_update_log = self._policy_update_log[-100:]
        self._policy_snapshot = policy_now

        if any_change:
            log.debug("Container policy changed. Cause: %s/%s" %
                      (update_type, message))
        else:
            log.debug("Container policy checked but no change. Cause: %s/%s" %
                      (update_type, message))

    # --- Methods for managing operation specific preconditions

    def get_process_operation_dict(self, process_name, auto_add=True):
        if process_name in self._service_op_preconditions:
            return self._service_op_preconditions[process_name]

        if auto_add:
            self._service_op_preconditions[process_name] = dict()
            return self._service_op_preconditions[process_name]

        return None

    def register_process_operation_precondition(self, process, operation,
                                                precondition):
        """
        This method is used to register process operation precondition functions
        with the governance controller. The endpoint code will call check_process_operation_preconditions()
        below before calling the business logic operation and if any of
        the precondition functions return False, then the request is denied as Unauthorized.

        At some point, this should be refactored to by another interceptor, but at the operation level.
        """
        if not hasattr(process, operation):
            raise NotFound(
                "The operation %s does not exist for the %s process" %
                (operation, process.name))

        if type(precondition
                ) == types.MethodType and precondition.im_self != process:
            raise NotFound("The method %s does not exist for the %s process." %
                           (str(precondition), process.name))

        process_op_conditions = self.get_process_operation_dict(process.name)
        if operation in process_op_conditions:
            process_op_conditions[operation].append(precondition)
        else:
            preconditions = list()
            preconditions.append(precondition)
            process_op_conditions[operation] = preconditions

    def unregister_all_process_operation_precondition(self, process,
                                                      operation):
        """
        This method removes all precondition functions registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        process_op_conditions = self.get_process_operation_dict(process.name,
                                                                auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            del process_op_conditions[operation]

    def unregister_process_operation_precondition(self, process, operation,
                                                  precondition):
        """
        This method removes a specific precondition function registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        #Just skip this if there operation is not passed in.
        if operation is None:
            return

        if not hasattr(process, operation):
            raise NotFound(
                "The operation %s does not exist for the %s service" %
                (operation, process.name))

        process_op_conditions = self.get_process_operation_dict(process.name,
                                                                auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            preconditions[:] = [
                pre for pre in preconditions if not pre == precondition
            ]
            if not preconditions:
                del process_op_conditions[operation]

    def unregister_all_process_policy_preconditions(self):
        """
        This method removes all precondition functions registered with an operation on a process.
        It will not remove "hard wired" preconditions that are directly registered by processes in a container.
        """
        for proc in self._service_op_preconditions:
            process_op_conditions = self.get_process_operation_dict(
                proc, auto_add=False)
            if process_op_conditions is not None:
                for op in process_op_conditions:
                    preconditions = process_op_conditions[op]
                    preconditions[:] = [
                        pre for pre in preconditions
                        if type(pre) == types.FunctionType
                    ]

    def check_process_operation_preconditions(self, process, msg, headers):
        """
        This method is called by the ION endpoint to execute any process operation preconditions functions before
        allowing the operation to be called.
        """
        operation = headers.get('op', None)
        if operation is None:
            return

        process_op_conditions = self.get_process_operation_dict(process.name,
                                                                auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            for precond in reversed(preconditions):
                if type(precond) in (types.MethodType, types.FunctionType):
                    # Handle precondition which are built-in functions
                    try:
                        ret_val, ret_message = precond(msg, headers)
                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error(
                            'Executing precondition function: %s for operation: %s - %s so it will be ignored.'
                            % (precond.__name__, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)

                elif isinstance(precond, basestring):
                    try:
                        # See if this is method within the endpoint process, if so call it
                        method = getattr(process, precond, None)
                        if method:
                            ret_val, ret_message = method(msg, headers)
                        else:
                            # It is not a method in the process, so try to execute as a simple python function
                            exec precond
                            pref = locals()["precondition_func"]
                            ret_val, ret_message = pref(process, msg, headers)

                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error(
                            'Executing precondition function: %s for operation: %s - %s so it will be ignored.'
                            % (precond, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)
Пример #52
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    Registers event callback for PolicyEvent to update local policies on change.
    """

    def __init__(self, container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = {}
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to be called before operation is invoked
        self._service_op_preconditions = {}
        # Holds a list per process operation of policy methods to be called before operation is invoked
        self._process_op_preconditions = {}

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

        # For policy debugging purposes. Keeps a list of most recent policy updates for later readout
        self._policy_update_log = []
        self._policy_snapshot = None

    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary', False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl", message="Container start")

    def initialize_from_config(self, config):
        self.governance_dispatcher = GovernanceDispatcher()
        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        self.interceptor_order = config.get('interceptor_order', None) or []
        gov_ints = config.get('governance_interceptors', None) or {}
        for name in gov_ints:
            interceptor_def = gov_ints[name]
            classobj = named_any(interceptor_def["class"])
            classinst = classobj()
            self.interceptor_by_name_dict[name] = classinst

    def _ensure_system_actor(self):
        """Make sure we have a handle for the system actor"""
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header(system_actor)

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()

    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG


    @property
    def rr(self):
        """Returns the active resource registry instance if available in the container or service client.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry
        return self.rr_client


    def get_container_org_boundary_id(self):
        """Returns the permanent org identifier configured for this container
        """
        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            org_ids, _ = self.rr.find_resources_ext(restype=RT.Org, attr_name="org_governance_name",
                                                    attr_value=self._container_org_name, id_only=True)
            if org_ids:
                self._container_org_id = org_ids[0]

        return self._container_org_id

    # --- Interceptor management

    def process_incoming_message(self, invocation):
        """The GovernanceController hook into the incoming message interceptor stack
        """
        self.process_message(invocation, self.interceptor_order, Invocation.PATH_IN)
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self, invocation):
        """The GovernanceController hook into the outgoing message interceptor stack
        """
        self.process_message(invocation, reversed(self.interceptor_order), Invocation.PATH_OUT)
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self, invocation, interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and
        evaluate the annotations to see what actions should be done.
        """
        for int_name in interceptor_list:
            interceptor_obj = self.interceptor_by_name_dict[int_name]
            interceptor_func = getattr(interceptor_obj, method)
            # Invoke interceptor function for designated path
            interceptor_func(invocation)

            # Stop processing message if an issue with the message was found by an interceptor
            if invocation.message_annotations.get(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT or \
               invocation.message_annotations.get(GovernanceDispatcher.POLICY__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT:
                break

        return invocation

    # --- Container policy management

    def policy_event_callback(self, policy_event, *args, **kwargs):
        """Generic policy event handler for dispatching policy related events.
        """
        self._ensure_system_actor()

        log.info("Received policy event: %s", policy_event)

        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(policy_event, *args, **kwargs)

        self._log_policy_update("policy_event_callback",
                                message="Event processed",
                                event=policy_event)

    def service_policy_event_callback(self, service_policy_event, *args, **kwargs):
        """The ServicePolicyEvent handler
        """
        log.debug('Service policy event: %s', str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()

    def resource_policy_event_callback(self, resource_policy_event, *args, **kwargs):
        """The ResourcePolicyEvent handler
        """
        log.debug('Resource policy event: %s', str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def reset_policy_cache(self):
        """Empty and reload the container's policy caches.
        Reload by getting policy for each of the container's processes and common policy.
        """
        log.info('Resetting policy cache')

        # First remove all cached polices and operation precondition functions
        self._clear_container_policy_caches()

        # Load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        # Iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_process_policies(proc, force_update=False)

        self._log_policy_update("reset_policy_cache")

    def _clear_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def update_process_policies(self, process_instance, safe_mode=False, force_update=True):
        """
        Load any applicable process policies for a container process.
        To be called by when spawning a new process, or when policy is reset.
        @param process_instance  The ION process for which to load policy
        @param safe_mode  If True, will not attempt to read policy if Policy MS not available
        """
        # NOTE: During restart, we rely on the bootstrap code to remove registration of Policy MS
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing() and (process_instance.name not in {"resource_registry", "system_management",
                    "directory", "identity_management"} and process_instance._proc_name != "event_persister"):
                # We are in the early phases of bootstrapping
                log.warn("update_process_policies(%s) - No update. Policy MS not available", process_instance._proc_name)

            self._log_policy_update("update_process_policies",
                                    message="No update. Policy MS not available",
                                    process=process_instance)
            return

        self._ensure_system_actor()

        if process_instance._proc_type == PROCTYPE_SERVICE:
            self.update_service_access_policy(process_instance._proc_listen_name, force_update=force_update)

        elif process_instance._proc_type == PROCTYPE_AGENT:
            # Load any existing policies for this agent with type or name
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name, force_update=force_update)
            else:
                self.update_service_access_policy(process_instance.resource_type, force_update=force_update)

            if process_instance.resource_id:
                # Load any existing policies for this resource
                self.update_resource_access_policy(process_instance.resource_id, force_update=force_update)

        self._log_policy_update("update_process_policies",
                                message="Checked",
                                process=process_instance)

    def update_common_service_access_policy(self, delete_policy=False):
        """Update policy common to all services"""
        if self.policy_decision_point_manager is None:
            return

        try:
            rules = self.policy_client.get_active_service_access_policy_rules(
                    service_name='', org_name=self._container_org_name,
                    headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_common_service_policy_rules(rules)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("There was an error applying access policy: %s" % e.message)

    def update_service_access_policy(self, service_name, service_op='', delete_policy=False, force_update=True):
        """Update policy for a service"""
        if self.policy_decision_point_manager is None:
            return
        if not force_update and not service_op and self.policy_decision_point_manager.has_service_policy(service_name):
            log.info("Skipping update of service %s policy - already cached", service_name)
            return

        try:
            if service_op:
                policies = self.policy_client.get_active_service_operation_preconditions(
                        service_name=service_name, op=service_op, org_name=self._container_org_name,
                        headers=self.system_actor_user_header)
            else:
                policies = self.policy_client.get_active_service_access_policy_rules(
                        service_name=service_name, org_name=self._container_org_name,
                        headers=self.system_actor_user_header)

            # First update any access policy rules
            svc_access_policy = [p for p in policies
                                 if p.policy_type in (PolicyTypeEnum.COMMON_SERVICE_ACCESS, PolicyTypeEnum.SERVICE_ACCESS)]
            self.policy_decision_point_manager.set_service_policy_rules(service_name, svc_access_policy)

            # Next update any precondition policies
            svc_preconditions = [p for p in policies
                                 if p.policy_type == PolicyTypeEnum.SERVICE_OP_PRECOND]

            # There can be several local processes for a service
            procs = self.container.proc_manager.get_local_service_processes(service_name)
            for proc in procs:
                if svc_preconditions:
                    for op_pre_policy in svc_preconditions:
                        for pre_check in op_pre_policy.preconditions:
                            self.unregister_process_operation_precondition(proc, op_pre_policy.op, pre_check)
                            if not delete_policy:
                                self.register_process_operation_precondition(proc, op_pre_policy.op, pre_check)
                else:
                    # Unregister all, just in case
                    self.unregister_all_process_operation_precondition(proc, service_op)

        except Exception as ex:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("Error applying access policy for service %s: %s" % (service_name, ex.message))

    def update_resource_access_policy(self, resource_id, delete_policy=False, force_update=True):
        """Update policy for a resource (such as a device fronted by an agent process)"""
        if self.policy_decision_point_manager is None:
            return
        if self.policy_decision_point_manager.has_resource_policy(resource_id):
            return

        try:
            policy_list = self.policy_client.get_active_resource_access_policy_rules(
                    resource_id, headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_resource_policy_rules(resource_id, policy_list)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("There was an error applying access policy for resource %s: %s", resource_id, e.message)

    def update_process_access_policy(self, process_key, service_op='', delete_policy=False, force_update=True):
        pass
        # procs, op_preconditions = [], None
        # try:
        #     # There can be several local processes for a service all with different names
        #     procs = self.container.proc_manager.get_local_service_processes(service_name)
        #     if procs:
        #         op_preconditions = self.policy_client.get_active_service_operation_preconditions(
        #                 service_name=service_name, op=service_op, org_name=self._container_org_name,
        #                 headers=self.system_actor_user_header)
        # except Exception as ex:
        #     # If the resource does not exist, just ignore it - but log a warning.
        #     log.warn("Error applying precondition access policy for service %s: %s" % (service_name, ex.message))
        #
        # for proc in procs:
        #     try:
        #         if op_preconditions:
        #             for op in op_preconditions:
        #                 for pre in op.preconditions:
        #                     self.unregister_process_operation_precondition(proc, op.op, pre)
        #                     if not delete_policy:
        #                         self.register_process_operation_precondition(proc, op.op, pre)
        #         else:
        #             # Unregister all, just in case
        #             self.unregister_all_process_operation_precondition(proc, service_op)
        #     except Exception as ex:
        #         # If the resource does not exist, just ignore it - but log a warning.
        #         log.warn("Error applying precondition access policy for process %s of service %s: %s" % (proc, service_name, ex.message))


    def get_active_policies(self):
        container_policies = dict()
        container_policies['common_service_access'] = self.policy_decision_point_manager.load_common_service_pdp
        container_policies['service_access'] = {k: v for (k, v) in self.policy_decision_point_manager.service_policy_decision_point.iteritems() if v is not None}
        container_policies['resource_access'] = {k: v for (k, v) in self.policy_decision_point_manager.resource_policy_decision_point.iteritems() if v is not None}
        container_policies['service_operation'] = dict(self._service_op_preconditions)

        #log.info(container_policies)
        return container_policies

    def _is_policy_management_service_available(self):
        """
        Method to verify if the Policy Management Service is running in the system. If the container cannot connect to
        the RR then assume it is remote container so do not try to access Policy Management Service
        """
        policy_service = get_service_registry().is_service_available('policy_management', True)
        if policy_service:
            return True
        return False

    def _get_policy_snapshot(self):
        """Debugging helper that snapshot copies the current container's policy state.
        """
        policy_snap = {}
        policy_snap["snap_ts"] = get_ion_ts()

        policies = self.get_active_policies()
        common_list = []
        policy_snap["common_pdp"] = common_list
        for rule in policies.get("common_service_access", {}).policy.rules:
            rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
            common_list.append(rule_dict)

        service_dict = {}
        policy_snap["service_pdp"] = service_dict
        for (svc_name, sp) in policies.get("service_access", {}).iteritems():
            for rule in sp.policy.rules:
                if svc_name not in service_dict:
                    service_dict[svc_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                service_dict[svc_name].append(rule_dict)

        service_pre_dict = {}
        policy_snap["service_precondition"] = service_pre_dict
        for (svc_name, sp) in policies.get("service_operation", {}).iteritems():
            for op, f in sp.iteritems():
                if svc_name not in service_pre_dict:
                    service_pre_dict[svc_name] = []
                service_pre_dict[svc_name].append(op)

        resource_dict = {}
        policy_snap["resource_pdp"] = resource_dict
        for (res_name, sp) in policies.get("resource_access", {}).iteritems():
            for rule in sp.policy.rules:
                if res_name not in service_dict:
                    resource_dict[res_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                resource_dict[res_name].append(rule_dict)

        return policy_snap

    def _log_policy_update(self, update_type=None, message=None, event=None, process=None):
        policy_update_dict = {}
        policy_update_dict["update_ts"] = get_ion_ts()
        policy_update_dict["update_type"] = update_type or ""
        policy_update_dict["message"] = message or ""
        if event:
            policy_update_dict["event._id"] = getattr(event, "_id", "")
            policy_update_dict["event.ts_created"] = getattr(event, "ts_created", "")
            policy_update_dict["event.type_"] = getattr(event, "type_", "")
            policy_update_dict["event.sub_type"] = getattr(event, "sub_type", "")
        if process:
            policy_update_dict["proc._proc_name"] = getattr(process, "_proc_name", "")
            policy_update_dict["proc.name"] = getattr(process, "name", "")
            policy_update_dict["proc._proc_listen_name"] = getattr(process, "_proc_listen_name", "")
            policy_update_dict["proc.resource_type"] = getattr(process, "resource_type", "")
            policy_update_dict["proc.resource_id"] = getattr(process, "resource_id", "")
        any_change = False   # Change can only be detected in number/names of policy not content
        snapshot = self._policy_snapshot
        policy_now = self._get_policy_snapshot()
        # Comparison of snapshot to current policy
        try:
            def compare_policy(pol_cur, pol_snap, key, res):
                pol_cur_set = {d["id"] if isinstance(d, dict) else d for d in pol_cur}
                pol_snap_set = {d["id"] if isinstance(d, dict) else d for d in pol_snap}
                if pol_cur_set != pol_snap_set:
                    policy_update_dict["snap.%s.%s.added" % (key, res)] = pol_cur_set - pol_snap_set
                    policy_update_dict["snap.%s.%s.removed" % (key, res)] = pol_snap_set - pol_cur_set
                    log.debug("Policy changed for %s.%s: %s vs %s" % (key, res, pol_cur_set, pol_snap_set))
                    return True
                return False
            policy_update_dict["snap.snap_ts"] = snapshot["snap_ts"]
            for key in ("common_pdp", "service_pdp", "service_precondition", "resource_pdp"):
                pol_snap = snapshot[key]
                pol_cur = policy_now[key]
                if isinstance(pol_cur, dict):
                    for res in pol_cur.keys():
                        pol_list = pol_cur[res]
                        snap_list = pol_snap.get(res, [])
                        any_change = compare_policy(pol_list, snap_list, key, res) or any_change
                elif isinstance(pol_cur, list):
                    any_change = compare_policy(pol_cur, pol_snap, key, "common") or any_change

            policy_update_dict["snap.policy_changed"] = str(any_change)
        except Exception as ex:
            log.warn("Cannot compare current policy to prior snapshot", exc_info=True)

        self._policy_update_log.append(policy_update_dict)
        self._policy_update_log = self._policy_update_log[-100:]
        self._policy_snapshot = policy_now

        if any_change:
            log.debug("Container policy changed. Cause: %s/%s" % (update_type, message))
        else:
            log.debug("Container policy checked but no change. Cause: %s/%s" % (update_type, message))

    # --- Methods for managing operation specific preconditions

    def get_process_operation_dict(self, process_name, auto_add=True):
        if process_name in self._service_op_preconditions:
            return self._service_op_preconditions[process_name]

        if auto_add:
            self._service_op_preconditions[process_name] = dict()
            return self._service_op_preconditions[process_name]

        return None

    def register_process_operation_precondition(self, process, operation, precondition):
        """
        This method is used to register process operation precondition functions
        with the governance controller. The endpoint code will call check_process_operation_preconditions()
        below before calling the business logic operation and if any of
        the precondition functions return False, then the request is denied as Unauthorized.

        At some point, this should be refactored to by another interceptor, but at the operation level.
        """
        if not hasattr(process, operation):
            raise NotFound("The operation %s does not exist for the %s process" % (operation, process.name))

        if type(precondition) == types.MethodType and precondition.im_self != process:
            raise NotFound("The method %s does not exist for the %s process." % (str(precondition), process.name))

        process_op_conditions = self.get_process_operation_dict(process.name)
        if operation in process_op_conditions:
            process_op_conditions[operation].append(precondition)
        else:
            preconditions = list()
            preconditions.append(precondition)
            process_op_conditions[operation] = preconditions

    def unregister_all_process_operation_precondition(self, process, operation):
        """
        This method removes all precondition functions registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        process_op_conditions = self.get_process_operation_dict(process.name, auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            del process_op_conditions[operation]

    def unregister_process_operation_precondition(self, process, operation, precondition):
        """
        This method removes a specific precondition function registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        #Just skip this if there operation is not passed in.
        if operation is None:
            return

        if not hasattr(process, operation):
            raise NotFound("The operation %s does not exist for the %s service" % (operation, process.name))

        process_op_conditions = self.get_process_operation_dict(process.name, auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            preconditions[:] = [pre for pre in preconditions if not pre == precondition]
            if not preconditions:
                del process_op_conditions[operation]

    def unregister_all_process_policy_preconditions(self):
        """
        This method removes all precondition functions registered with an operation on a process.
        It will not remove "hard wired" preconditions that are directly registered by processes in a container.
        """
        for proc in self._service_op_preconditions:
            process_op_conditions = self.get_process_operation_dict(proc, auto_add=False)
            if process_op_conditions is not None:
                for op in process_op_conditions:
                    preconditions = process_op_conditions[op]
                    preconditions[:] = [pre for pre in preconditions if type(pre) == types.FunctionType]

    def check_process_operation_preconditions(self, process, msg, headers):
        """
        This method is called by the ION endpoint to execute any process operation preconditions functions before
        allowing the operation to be called.
        """
        operation = headers.get('op', None)
        if operation is None:
            return

        process_op_conditions = self.get_process_operation_dict(process.name, auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            for precond in reversed(preconditions):
                if type(precond) in (types.MethodType, types.FunctionType):
                    # Handle precondition which are built-in functions
                    try:
                        ret_val, ret_message = precond(msg, headers)
                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error('Executing precondition function: %s for operation: %s - %s so it will be ignored.' %
                                  (precond.__name__, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)

                elif isinstance(precond, basestring):
                    try:
                        # See if this is method within the endpoint process, if so call it
                        method = getattr(process, precond, None)
                        if method:
                            ret_val, ret_message = method(msg, headers)
                        else:
                            # It is not a method in the process, so try to execute as a simple python function
                            exec precond
                            pref = locals()["precondition_func"]
                            ret_val, ret_message = pref(process, msg, headers)

                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error('Executing precondition function: %s for operation: %s - %s so it will be ignored.' %
                                  (precond, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)
Пример #53
0
    def test_qc_interval_integration(self):

        # 1 need to make a dataset that only has one discrete qc violation
        # 2 Launch the process
        # 3 Setup the scheduler to run it say three times
        # 4 Get the Events and verify the data

        #--------------------------------------------------------------------------------
        # Make a dataset that has only one discrete qc violation
        #--------------------------------------------------------------------------------

        dp_id, dataset_id, stream_def_id = self.make_data_product()
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        monitor = DatasetMonitor(dataset_id)
        self.addCleanup(monitor.stop)
        for rdt in self.populate_vectors(stream_def_id, 1,
                                         lambda x: [41] + [39] * (x - 1)):
            ph.publish_rdt_to_data_product(dp_id, rdt)
        self.assertTrue(
            monitor.event.wait(10))  # Give it 10 seconds to populate

        #--------------------------------------------------------------------------------
        # Launch the process
        #--------------------------------------------------------------------------------

        interval_key = uuid4().hex
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = [
            'glblrng_qc'
        ]  # The others are tested in other tests for completeness
        self.sync_launch(config)

        async_queue = Queue()

        def callback(event, *args, **kwargs):
            times = event.temporal_values
            self.assertEquals(len(times), 1)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=dp_id,
                             callback=callback,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        #--------------------------------------------------------------------------------
        # Setup the scheduler
        #--------------------------------------------------------------------------------

        timer_id = self.scheduler_service.create_interval_timer(
            start_time=time.time(),
            end_time=time.time() + 13,
            interval=5,
            event_origin=interval_key)

        #--------------------------------------------------------------------------------
        # Get the events and verify them
        #--------------------------------------------------------------------------------

        try:
            for i in xrange(2):
                async_queue.get(timeout=10)
        except Empty:
            raise AssertionError('QC Events not raised')
Пример #54
0
class QCProcessor(SimpleProcess):
    def __init__(self):
        self.event = Event()  # Synchronizes the thread
        self.timeout = 10

    def on_start(self):
        '''
        Process initialization
        '''
        self._thread = self._process.thread_manager.spawn(self.thread_loop)
        self._event_subscriber = EventSubscriber(
            event_type=OT.ResetQCEvent,
            callback=self.receive_event,
            auto_delete=True)  # TODO Correct event types
        self._event_subscriber.start()
        self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
        self.resource_registry = self.container.resource_registry
        self.event_queue = Queue()

    def on_quit(self):
        '''
        Stop and cleanup the thread
        '''
        self._event_subscriber.stop()
        self.suspend()

    def receive_event(self, event, *args, **kwargs):
        log.error("Adding event to the event queue")
        self.event_queue.put(event)

    def thread_loop(self):
        '''
        Asynchronous event-loop
        '''
        threading.current_thread().name = '%s-qc-processor' % self.id
        while not self.event.wait(1):
            try:
                self.qc_processing_loop()
            except:
                log.error("Error in QC Processing Loop", exc_info=True)
            try:
                self.event_processing_loop()
            except:
                log.error("Error in QC Event Loop", exc_info=True)

    def qc_processing_loop(self):
        '''
        Iterates through available data products and evaluates QC
        '''
        data_products, _ = self.container.resource_registry.find_resources(
            restype=RT.DataProduct, id_only=False)
        for data_product in data_products:
            # Get the reference designator
            try:
                rd = self.get_reference_designator(data_product._id)
            except BadRequest:
                continue
            parameters = self.get_parameters(data_product)
            # Create a mapping of inputs to QC
            qc_mapping = {}

            # Creates a dictionary { data_product_name : parameter_name }
            for p in parameters:
                if p.ooi_short_name:
                    sname = p.ooi_short_name
                    g = re.match(r'([a-zA-Z-_]+)(_L[0-9])', sname)
                    if g:
                        sname = g.groups()[0]
                    qc_mapping[sname] = p.name

            for p in parameters:
                # for each parameter, if the name ends in _qc run the qc
                if p.name.endswith('_qc'):
                    self.run_qc(data_product, rd, p, qc_mapping, parameters)

            # Break early if we can
            if self.event.is_set():
                break

    def event_processing_loop(self):
        '''
        Processes the events in the event queue
        '''
        log.error("Processing event queue")
        self.event_queue.put(StopIteration)
        for event in self.event_queue:
            log.error("My event's reference designator: %s", event.origin)

    def suspend(self):
        '''
        Stops the event loop
        '''
        self.event.set()
        self._thread.join(self.timeout)
        log.info("QC Thread Suspended")

    def get_reference_designator(self, data_product_id=''):
        '''
        Returns the reference designator for a data product if it has one
        '''
        # First try to get the parent data product
        data_product_ids, _ = self.resource_registry.find_objects(
            subject=data_product_id,
            predicate=PRED.hasDataProductParent,
            id_only=True)
        if data_product_ids:
            return self.get_reference_designator(data_product_ids[0])

        device_ids, _ = self.resource_registry.find_subjects(
            object=data_product_id,
            predicate=PRED.hasOutputProduct,
            subject_type=RT.InstrumentDevice,
            id_only=True)
        if not device_ids:
            raise BadRequest(
                "No instrument device associated with this data product")
        device_id = device_ids[0]

        sites, _ = self.resource_registry.find_subjects(
            object=device_id,
            predicate=PRED.hasDevice,
            subject_type=RT.InstrumentSite,
            id_only=False)
        if not sites:
            raise BadRequest("No site is associated with this data product")
        site = sites[0]
        rd = site.reference_designator
        return rd

    def calibrated_candidates(self, data_product, parameter, qc_mapping,
                              parameters):
        '''
        Returns a list of potential candidate parameter names to use as the input parameter
        '''

        # 1st Priority is *b_interp
        # 2nd Priority is *b_pd
        # 3rd Priority is input_name
        parameters = {p.name: p for p in parameters}

        dp_ident, alg, qc = parameter.ooi_short_name.split('_')
        input_name = qc_mapping[dp_ident]  # input_name is the third priority

        sname = parameters[
            input_name].ooi_short_name  # should be something like tempwat_l1

        interp = sname.lower() + 'b_interp'
        pd = sname.lower() + 'b_pd'

        print "1st priority:", interp  # 1st priority
        print "2nd priority:", pd  # 2nd priority
        print "3rd priority:", input_name  # 3rd priority

        if interp in parameters:
            return interp
        elif pd in parameters:
            return pd
        else:
            return input_name

    def run_qc(self, data_product, reference_designator, parameter, qc_mapping,
               parameters):
        '''
        Determines which algorithm the parameter should run, then evaluates the QC

        data_product         - Data Product Resource
        reference_designator - reference designator string
        parameter            - parameter context resource
        qc_mapping           - a dictionary of { data_product_name : parameter_name }
        '''

        # We key off of the OOI Short Name
        # DATAPRD_ALGRTHM_QC
        dp_ident, alg, qc = parameter.ooi_short_name.split('_')
        if dp_ident not in qc_mapping:
            return  # No input!
        input_name = self.calibrated_candidates(data_product, parameter,
                                                qc_mapping, parameters)

        try:
            doc = self.container.object_store.read_doc(reference_designator)
        except NotFound:
            return  # NO QC lookups found
        if dp_ident not in doc:
            log.critical("Data product %s not in doc", dp_ident)
            return  # No data product of this listing in the RD's entry
        # Lookup table has the rows for the QC inputs
        lookup_table = doc[dp_ident]

        # An instance of the coverage is loaded if we need to run an algorithm
        dataset_id = self.get_dataset(data_product)
        coverage = self.get_coverage(dataset_id)
        if not coverage.num_timesteps:  # No data = no qc
            coverage.close()
            return

        try:
            # Get the lookup table info then run
            if alg.lower() == 'glblrng':
                row = self.recent_row(lookup_table['global_range'])
                min_value = row['min_value']
                max_value = row['max_value']
                self.process_glblrng(coverage, parameter, input_name,
                                     min_value, max_value)

            elif alg.lower() == 'stuckvl':
                row = self.recent_row(lookup_table['stuck_value'])
                resolution = row['resolution']
                N = row['consecutive_values']
                self.process_stuck_value(coverage, parameter, input_name,
                                         resolution, N)

            elif alg.lower() == 'trndtst':
                row = self.recent_row(lookup_table['trend_test'])
                ord_n = row['polynomial_order']
                nstd = row['standard_deviation']
                self.process_trend_test(coverage, parameter, input_name, ord_n,
                                        nstd)

            elif alg.lower() == 'spketst':
                row = self.recent_row(lookup_table['spike_test'])
                acc = row['accuracy']
                N = row['range_multiplier']
                L = row['window_length']
                self.process_spike_test(coverage, parameter, input_name, acc,
                                        N, L)

            elif alg.lower() == "gradtst":
                row = self.recent_row(lookup_table["gradient_test"])
                ddatdx = row["ddatdx"]
                mindx = row["mindx"]
                startdat = row["startdat"]
                if isinstance(startdat, basestring) and not startdat:
                    startdat = np.nan
                if isinstance(mindx, basestring) and not mindx:
                    mindx = np.nan
                toldat = row["toldat"]
                self.process_gradient_test(coverage, parameter, input_name,
                                           ddatdx, mindx, startdat, toldat)

            elif alg.lower() == 'loclrng':
                row = self.recent_row(lookup_table["local_range"])
                table = row['table']
                dims = []
                datlimz = []
                for key in table.iterkeys():
                    # Skip the datlims
                    if 'datlim' in key:
                        continue
                    dims.append(key)
                    datlimz.append(table[key])

                datlimz = np.column_stack(datlimz)
                datlim = np.column_stack([table['datlim1'], table['datlim2']])
                self.process_local_range_test(coverage, parameter, input_name,
                                              datlim, datlimz, dims)

        except KeyError:  # No lookup table
            self.set_error(coverage, parameter)

        finally:
            coverage.close()

    def set_error(self, coverage, parameter):
        log.error("setting coverage parameter %s to -99", parameter.name)

    def get_parameter_values(self, coverage, name):
        array = coverage.get_parameter_values(
            [name], fill_empty_params=True).get_data()[name]
        return array

    def process_glblrng(self, coverage, parameter, input_name, min_value,
                        max_value):
        '''
        Evaluates the QC for global range for all data values that equal -88 (not yet evaluated)
        '''
        log.error("input name: %s", input_name)
        log.info("Num timesteps: %s", coverage.num_timesteps)

        # Get all of the QC values, and find where -88 is set (uninitialized)
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        # Now build a variable, but I need to keep track of the time where the data goes
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]
        value_array = self.get_parameter_values(coverage, input_name)[indexes]

        from ion_functions.qc.qc_functions import dataqc_globalrangetest
        qc = dataqc_globalrangetest(value_array, [min_value, max_value])
        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc
        }

    def process_stuck_value(self, coverage, parameter, input_name, resolution,
                            N):
        '''
        Evaluates the QC for stuck value for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        # Horribly inefficient...
        from ion_functions.qc.qc_functions import dataqc_stuckvaluetest_wrapper
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        qc_array = dataqc_stuckvaluetest_wrapper(value_array, resolution, N)
        qc_array = qc_array[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]

        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc_array
        }

    def process_trend_test(self, coverage, parameter, input_name, ord_n, nstd):
        '''
        Evaluates the QC for trend test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_polytrendtest_wrapper
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]
        value_array = self.get_parameter_values(coverage, input_name)[indexes]

        qc_array = dataqc_polytrendtest_wrapper(value_array, time_array, ord_n,
                                                nstd)
        qc_array = qc_array[indexes]
        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc_array
        }

    def process_spike_test(self, coverage, parameter, input_name, acc, N, L):
        '''
        Evaluates the QC for spike test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_spiketest_wrapper
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        qc_array = dataqc_spiketest_wrapper(value_array, acc, N, L)
        qc_array = qc_array[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]
        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc_array
        }

    def process_gradient_test(self, coverage, parameter, input_name, ddatdx,
                              mindx, startdat, toldat):
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_gradienttest_wrapper
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]

        qc_array = dataqc_gradienttest_wrapper(value_array, time_array, ddatdx,
                                               mindx, startdat, toldat)

        return_dictionary = {
            coverage.temporal_parameter_name: time_array[indexes],
            parameter.name: qc_array[indexes]
        }

    def process_local_range_test(self, coverage, parameter, input_name, datlim,
                                 datlimz, dims):
        return  # Not ready
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_localrangetest_wrapper
        # dat
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]

        # datlim is an argument and comes from the lookup table
        # datlimz is an argument and comes from the lookup table
        # dims is an argument and is created using the column headings
        # pval_callback, well as for that...
        # TODO: slice_ is the window of the site data product, but for
        # now we'll just use a global slice
        slice_ = slice(None)

        def parameter_callback(param_name):
            return coverage.get_parameter_values(param_name, slice_)

        qc_array = dataqc_localrangetest_wrapper(value_array, datlim, datlimz,
                                                 dims, parameter_callback)
        return_dictionary = {
            coverage.temporal_parameter_name: time_array[indexes],
            parameter.name: qc_array[indexes]
        }
        log.error("Here's what it would look like\n%s", return_dictionary)

    def get_dataset(self, data_product):
        dataset_ids, _ = self.resource_registry.find_objects(data_product,
                                                             PRED.hasDataset,
                                                             id_only=True)
        if not dataset_ids:
            raise BadRequest("No Dataset")
        dataset_id = dataset_ids[0]
        return dataset_id

    def get_coverage(self, dataset_id):
        cov = DatasetManagementService._get_coverage(dataset_id, mode='r+')
        return cov

    def recent_row(self, rows):
        '''
        Determines the most recent data based on the timestamp
        '''
        most_recent = None
        ts = 0
        for row in rows:
            if row['ts_created'] > ts:
                most_recent = row
                ts = row['ts_created']
        return most_recent

    def get_parameters(self, data_product):
        '''
        Returns the relevant parameter contexts of the data product
        '''

        # DataProduct -> StreamDefinition
        stream_defs, _ = self.resource_registry.find_objects(
            data_product._id, PRED.hasStreamDefinition, id_only=False)
        stream_def = stream_defs[0]

        # StreamDefinition -> ParameterDictionary
        pdict_ids, _ = self.resource_registry.find_objects(
            stream_def._id, PRED.hasParameterDictionary, id_only=True)
        pdict_id = pdict_ids[0]

        # ParameterDictionary -> ParameterContext
        pctxts, _ = self.resource_registry.find_objects(
            pdict_id, PRED.hasParameterContext, id_only=False)
        relevant = [
            ctx for ctx in pctxts if not stream_def.available_fields or (
                stream_def.available_fields
                and ctx.name in stream_def.available_fields)
        ]
        return relevant
Пример #55
0
class Directory(object):
    """
    Frontent to a directory functionality backed by the resource registry to provide a directory lookup mechanism.
    Terms:
      directory: instance of a Directory, representing entries within one Org. A tree of entries.
      path: parent+key (= qualified name of an entry). All paths start with '/'
      entry: node in the directory tree with a name (key) and parent path, holding arbitrary attributes
      key: local name of an entry
    """

    def __init__(self, orgname=None, datastore_manager=None, events_enabled=False, container=None):
        self.container = container or bootstrap.container_instance
        # Get an instance of datastore configured as directory.
        datastore_manager = datastore_manager or self.container.datastore_manager
        self.dir_store = datastore_manager.get_datastore(DataStore.DS_DIRECTORY)

        self.orgname = orgname or CFG.system.root_org
        self.is_root = (self.orgname == CFG.system.root_org)

        self.events_enabled = events_enabled
        self.event_pub = None
        self.event_sub = None


    def start(self):
        # Create directory root entry (for current org) if not existing
        if CFG.system.auto_bootstrap:
            root_de = self.register("/", "DIR", sys_name=bootstrap.get_sys_name())
            if root_de is None:
                # We created this directory just now
                pass

        if self.events_enabled:
            # init change event publisher
            self.event_pub = EventPublisher()

            # Register to receive directory changes
            self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent",
                                             origin="Directory",
                                             callback=self.receive_directory_change_event)

    def stop(self):
        self.close()

    def close(self):
        """
        Close directory and all resources including datastore and event listener.
        """
        if self.event_sub:
            self.event_sub.deactivate()
        self.dir_store.close()

    def _get_path(self, parent, key):
        """
        Returns the qualified directory path for a directory entry.
        """
        if parent == "/":
            return parent + key
        elif parent.startswith("/"):
            return parent + "/" + key
        else:
            raise BadRequest("Illegal parent: %s" % parent)

    def _get_key(self, path):
        """
        Returns the key from a qualified directory path
        """
        parent, key = path.rsplit("/", 1)
        return key

    def _create_dir_entry(self, parent, key, orgname=None, ts=None, attributes=None):
        """
        Standard way to create a DirEntry object (without persisting it)
        """
        orgname = orgname or self.orgname
        ts = ts or get_ion_ts()
        attributes = attributes if attributes is not None else {}
        parent = parent or "/"
        de = DirEntry(org=orgname, parent=parent, key=key, attributes=attributes, ts_created=ts, ts_updated=ts)
        return de

    def _read_by_path(self, path, orgname=None):
        """
        Given a qualified path, find entry in directory and return DirEntry
        object or None if not found.
        A side effect is to clean any but the most recent entries found for this path.
        """
        if path is None:
            raise BadRequest("Illegal arguments")
        orgname = orgname or self.orgname
        parent, key = path.rsplit("/", 1)
        parent = parent or "/"
        find_key = [orgname, key, parent]
        view_res = self.dir_store.find_by_view('directory', 'by_key', key=find_key, id_only=True, convert_doc=True)

        match = [doc for docid, index, doc in view_res]
        if len(match) > 1:
            log.warn("More than one directory entry found for key %s" % path)
            recent_match = self._cleanup_outdated_entries(match, "path=%s" % path)
            return recent_match
        elif match:
            return match[0]
        return None

    def _cleanup_outdated_entries(self, dir_entries, common="key"):
        """
        This function takes all DirEntry from the list and removes all but the most recent one
        by ts_updated timestamp. It returns the most recent DirEntry and removes the others by
        direct datastore operations.
        """
        if not dir_entries:
            return
        newest_entry = dir_entries[0]
        try:
            remove_list = []
            for de in dir_entries:
                if int(de.ts_updated) > int(newest_entry.ts_updated):
                    remove_list.append(newest_entry)
                    newest_entry = de
                elif de.key != newest_entry.key:
                    remove_list.append(de)

            log.info("Attempting to cleanup these directory entries: %s" % remove_list)
            for de in remove_list:
                try:
                    self.dir_store.delete(de)
                except Exception as ex:
                    log.warn("Removal of outdated %s directory entry failed: %s" % (common, de))
            log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common))

        except Exception as ex:
            log.warn("Cleanup of multiple directory entries for %s failed: %s" % (
                common, str(ex)))

        return newest_entry

    def lookup(self, parent, key=None, return_entry=False):
        """
        Read entry residing in directory at parent node level.
        """
        path = self._get_path(parent, key) if key else parent
        direntry = self._read_by_path(path)
        if return_entry:
            return direntry
        else:
            return direntry.attributes if direntry else None

    def _get_unique_parents(self, entry_list):
        """Returns a sorted, unique list of parents of DirEntries (excluding the root /)"""
        if entry_list and type(entry_list) not in (list, tuple):
            entry_list = [entry_list]
        parents = set()
        for entry in entry_list:
            parents.add(entry.parent)
        if "/" in parents:
            parents.remove("/")
        return sorted(parents)

    def _ensure_parents_exist(self, entry_list, create=True):
        parents_list = self._get_unique_parents(entry_list)
        pe_list = []
        try:
            for parent in parents_list:
                pe = self.lookup(parent)
                if pe is None:
                    pp, pk = parent.rsplit("/", 1)
                    direntry = self._create_dir_entry(parent=pp, key=pk)
                    pe_list.append(direntry)
                    if create:
                        self.dir_store.create(direntry, create_unique_directory_id())
        except Exception as ex:
            log.warn("_ensure_parents_exist(): Error creating directory parents", exc_info=True)
        return pe_list

    def register(self, parent, key, create_only=False, **kwargs):
        """
        Add/replace an entry within directory, below a parent node or "/".
        Note: Replaces (not merges) the attribute values of the entry if existing
        @param create_only  If True, does not change an existing entry
        @retval  DirEntry if previously existing
        """
        if not (parent and key):
            raise BadRequest("Illegal arguments")
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal arguments: parent")

        dn = self._get_path(parent, key)
        log.debug("Directory.register(%s): %s", dn, kwargs)

        entry_old = None
        cur_time = get_ion_ts()
        # Must read existing entry by path to make sure to not create path twice
        direntry = self._read_by_path(dn)
        if direntry and create_only:
            # We only wanted to make sure entry exists. Do not change
            return direntry
        elif direntry:
            entry_old = direntry.attributes
            direntry.attributes = kwargs
            direntry.ts_updated = cur_time
            # TODO: This may fail because of concurrent update
            self.dir_store.update(direntry)
        else:
            direntry = self._create_dir_entry(parent, key, attributes=kwargs, ts=cur_time)
            self._ensure_parents_exist([direntry])
            self.dir_store.create(direntry, create_unique_directory_id())

        return entry_old

    def register_safe(self, parent, key, **kwargs):
        """
        Use this method to protect caller from any form of directory register error
        """
        try:
            return self.register(parent, key, **kwargs)
        except Exception as ex:
            log.exception("Error registering key=%s/%s, args=%s" % (parent, key, kwargs))

    def register_mult(self, entries):
        """
        Registers multiple directory entries efficiently in one datastore access.
        Note: this fails of entries are currently existing, so works for create only.
        """
        if type(entries) not in (list, tuple):
            raise BadRequest("Bad entries type")
        de_list = []
        cur_time = get_ion_ts()
        for parent, key, attrs in entries:
            direntry = self._create_dir_entry(parent, key, attributes=attrs, ts=cur_time)
            de_list.append(direntry)
        pe_list = self._ensure_parents_exist(de_list, create=False)
        de_list.extend(pe_list)
        deid_list = [create_unique_directory_id() for i in xrange(len(de_list))]
        self.dir_store.create_mult(de_list, deid_list)

    def unregister(self, parent, key=None, return_entry=False):
        """
        Remove entry from directory.
        Returns attributes of deleted DirEntry
        """
        path = self._get_path(parent, key) if key else parent
        log.debug("Removing content at path %s" % path)

        direntry = self._read_by_path(path)
        if direntry:
            self.dir_store.delete(direntry)

        if direntry and not return_entry:
            return direntry.attributes
        else:
            return direntry

    def unregister_safe(self, parent, key):
        try:
            return self.unregister(parent, key)
        except Exception as ex:
            log.exception("Error unregistering key=%s/%s" % (parent, key))

    def find_child_entries(self, parent='/', direct_only=True, **kwargs):
        """
        Return all child entries (ordered by path) for the given parent path.
        Does not return the parent itself. Optionally returns child of child entries.
        Additional kwargs are applied to constrain the search results (limit, descending, skip).
        @param parent  Path to parent (must start with "/")
        @param direct_only  If False, includes child of child entries
        @retval  A list of DirEntry objects for the matches
        """
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal argument parent: %s" % parent)
        if direct_only:
            start_key = [self.orgname, parent, 0]
            end_key = [self.orgname, parent]
            res = self.dir_store.find_by_view('directory', 'by_parent',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)
        else:
            path = parent[1:].split("/")
            start_key = [self.orgname, path, 0]
            end_key = [self.orgname, list(path) + ["ZZZZZZ"]]
            res = self.dir_store.find_by_view('directory', 'by_path',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [doc for docid, indexkey, doc in res]
        return match

    def find_by_key(self, key=None, parent='/', **kwargs):
        """
        Returns a list of DirEntry for each directory entry that matches the given key name.
        If a parent is provided, only checks in this parent and all subtree.
        These entries are in the same org's directory but have different parents.
        """
        if key is None:
            raise BadRequest("Illegal arguments")
        if parent is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, key, parent]
        end_key = [self.orgname, key, parent + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_key',
            start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [doc for docid, indexkey, doc in res]
        return match

    def find_by_value(self, subtree='/', attribute=None, value=None, **kwargs):
        """
        Returns a list of DirEntry with entries that have an attribute with the given value.
        """
        if attribute is None:
            raise BadRequest("Illegal arguments")
        if subtree is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, attribute, value, subtree]
        end_key = [self.orgname, attribute, value, subtree + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_attribute',
                        start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [doc for docid, indexkey, doc in res]
        return match

    def remove_child_entries(self, parent, delete_parent=False):
        pass

    # ------------------------------------------
    # Specific directory entry methods
    # ------------------------------------------
    # Internal methods
    def _assert_existence(self, parent, key, **kwargs):
        """
        Make sure an entry is in the directory.
        @retval True if entry existed
        """
        dn = self._get_path(parent, key)
        direntry = self._safe_read(dn)
        existed = bool(direntry)
        if not direntry:
            cur_time = get_ion_ts()
            parent_dn = self._get_path(parent)
            direntry = DirEntry(parent=parent_dn, key=key, attributes=kwargs, ts_created=cur_time, ts_updated=cur_time)
            # TODO: This may fail because of concurrent create
            self.dir_store.create(direntry, dn)
        return existed

    def receive_directory_change_event(self, event_msg, headers):
        # @TODO add support to fold updated config into container config
        pass
Пример #56
0
class EventPersister(SimpleProcess):
    def on_init(self):
        # Time in between event persists
        self.persist_interval = float(
            self.CFG.get_safe("process.event_persister.persist_interval", 1.0))

        self.persist_blacklist = self.CFG.get_safe(
            "process.event_persister.persist_blacklist", {})

        self._event_type_blacklist = [
            entry['event_type'] for entry in self.persist_blacklist
            if entry.get('event_type', None) and len(entry) == 1
        ]
        self._complex_blacklist = [
            entry for entry in self.persist_blacklist
            if not (entry.get('event_type', None) and len(entry) == 1)
        ]
        if self._complex_blacklist:
            log.warn(
                "EventPersister does not yet support complex blacklist expressions: %s",
                self._complex_blacklist)

        # Holds received events FIFO in synchronized queue
        self.event_queue = Queue()

        # Temporarily holds list of events to persist while datastore operation are not yet completed
        # This is where events to persist will remain if datastore operation fails occasionally.
        self.events_to_persist = None

        # Number of unsuccessful consecutive attempts to persist during loop
        self.failure_count = 0

        # bookkeeping for greenlet
        self._persist_greenlet = None
        self._terminate_persist = Event(
        )  # when set, exits the persister greenlet

        # The event subscriber
        self.event_sub = None

        process_plugin_defs = self.CFG.get_safe(
            "process.event_persister.process_plugins", {}) or {}

        # Registered event process plugins
        self.process_plugins = {}
        for plugin_name, plugin_cls, plugin_args in process_plugin_defs:
            try:
                plugin = named_any(plugin_cls)(**plugin_args)
                self.process_plugins[plugin_name] = plugin
                log.info("Loaded event processing plugin %s (%s)", plugin_name,
                         plugin_cls)
            except Exception as ex:
                log.error(
                    "Cannot instantiate event processing plugin %s (%s): %s",
                    plugin_name, plugin_cls, ex)

    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop,
                                       self.persist_interval)
        log.debug(
            'EventPersister persist greenlet started in "%s" (interval %s)',
            self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister",
                                         auto_delete=False)

        self.event_sub.start()

    def on_quit(self):
        # Stop event subscriber
        self.event_sub.stop()

        # tell the trigger greenlet we're done
        self._terminate_persist.set()

        # wait on the greenlets to finish cleanly
        self._persist_greenlet.join(timeout=5)

        # Check if there are still unsaved events in the queue and persist them
        leftover_events = self.event_queue.qsize()
        if leftover_events:
            log.info(
                "Storing {} events during event_persister shutdown".format(
                    leftover_events))
            events_to_process = [
                self.event_queue.get() for x in xrange(leftover_events)
            ]
            events_to_persist = [
                x for x in events_to_process if not self._in_blacklist(x)
            ]
            try:
                self._persist_events(events_to_persist)
            except Exception:
                log.exception("Could not persist all events")

    def _on_event(self, event, *args, **kwargs):
        self.event_queue.put(event)

    def _in_blacklist(self, event):
        if event.type_ in self._event_type_blacklist:
            return True
        if event.base_types:
            for base_type in event.base_types:
                if base_type in self._event_type_blacklist:
                    return True
            # TODO: Complex event blacklist
        return False

    def _persister_loop(self, persist_interval):
        log.debug('Starting event persister thread with persist_interval=%s',
                  persist_interval)

        # Event.wait returns False on timeout (and True when set in on_quit), so we use this to both exit cleanly and do our timeout in a loop
        while not self._terminate_persist.wait(timeout=persist_interval):
            try:
                # leftover events_to_persist indicate previous attempt did not succeed
                if self.events_to_persist and self.failure_count > 2:
                    bad_events = []
                    log.warn("Attempting to persist %s events individually" %
                             (len(self.events_to_persist)))
                    for event in self.events_to_persist:
                        try:
                            self.container.event_repository.put_event(event)
                        except Exception:
                            bad_events.append(event)

                    if len(self.events_to_persist) != len(bad_events):
                        log.warn(
                            "Succeeded to persist some of the events - rest must be bad"
                        )
                        self._log_events(bad_events)
                    elif bad_events:
                        log.error("Discarding %s events after %s attempts!!" %
                                  (len(bad_events), self.failure_count))
                        self._log_events(bad_events)

                    self.events_to_persist = None
                    self.failure_count = 0

                elif self.events_to_persist:
                    # There was an error last time and we need to retry
                    log.info("Retry persisting %s events" %
                             len(self.events_to_persist))
                    self._persist_events(self.events_to_persist)
                    self.events_to_persist = None

                # process ALL events (not retried on fail like peristing is)
                events_to_process = [
                    self.event_queue.get()
                    for x in xrange(self.event_queue.qsize())
                ]
                # only persist events not in blacklist
                self.events_to_persist = [
                    x for x in events_to_process if not self._in_blacklist(x)
                ]

                try:
                    self._persist_events(self.events_to_persist)
                finally:
                    self._process_events(events_to_process)
                self.events_to_persist = None
                self.failure_count = 0
            except Exception as ex:
                # Note: Persisting events may fail occasionally during test runs (when the "events" datastore is force
                # deleted and recreated). We'll log and keep retrying forever.
                log.exception(
                    "Failed to persist %s received events. Will retry next cycle"
                    % len(self.events_to_persist))
                self.failure_count += 1
                self._log_events(self.events_to_persist)

    def _persist_events(self, event_list):
        if event_list:
            self.container.event_repository.put_events(event_list)

    def _process_events(self, event_list):
        for plugin_name, plugin in self.process_plugins.iteritems():
            try:
                plugin.process_events(event_list)
            except Exception as ex:
                log.exception("Error processing events in plugin %s",
                              plugin_name)

    def _log_events(self, events):
        events_str = pprint.pformat([event.__dict__
                                     for event in events]) if events else ""
        log.warn("EVENTS:\n%s", events_str)
class ScienceGranuleIngestionWorker(TransformStreamListener):
    CACHE_LIMIT=CFG.get_safe('container.ingestion_cache',5)

    def __init__(self, *args,**kwargs):
        super(ScienceGranuleIngestionWorker, self).__init__(*args, **kwargs)
        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets  = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()

    def on_start(self): #pragma no cover
        super(ScienceGranuleIngestionWorker,self).on_start()
        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True)
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.lookup_monitor.start()
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        self.connection_id = ''
        self.connection_index = None


    def on_quit(self): #pragma no cover
        super(ScienceGranuleIngestionWorker, self).on_quit()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
    
    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = ResourceRegistryServiceClient()
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,predicate=PRED.hasStream,object=stream_id,id_only=True)
        if datasets:
            return datasets[0]
        return None
    
    def get_dataset(self,stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    def gap_coverage(self,stream_id):
        try:
            old_cov = self._coverages.pop(stream_id)
            dataset_id = self.get_dataset(stream_id)
            sdom, tdom = time_series_domain()
            new_cov = DatasetManagementService._create_simplex_coverage(dataset_id, old_cov.parameter_dictionary, sdom, tdom, old_cov._persistence_layer.inline_data_writes)
            old_cov.close()
            result = new_cov
        except KeyError:
            result = self.get_coverage(stream_id)
        self._coverages[stream_id] = result
        return result


    def dataset_changed(self, dataset_id, extents, window):
        self.event_publisher.publish_event(origin=dataset_id, author=self.id, extents=extents, window=window)

    def evaluate_qc(self, rdt, dataset_id):
        if self.qc_enabled:
            for field in rdt.fields:
                if not field.endswith('_qc'):
                    continue
                try:
                    values = rdt[field]
                    if values is not None:
                        if not all(values):
                            topology = np.nonzero(values)
                            first_occurrence = topology[0][0]
                            ts = rdt[rdt.temporal_parameter][first_occurrence]
                            self.flag_qc_parameter(dataset_id, field, ts, {})
                except:
                    continue
    def flag_qc_parameter(self, dataset_id, parameter, temporal_value, configuration):
        self.qc_publisher.publish_event(origin=dataset_id, qc_parameter=parameter, temporal_value=temporal_value, configuration=configuration)

    def update_connection_index(self, connection_id, connection_index):
        self.connection_id = connection_id
        try:
            connection_index = int(connection_index)
            self.connection_index = connection_index
        except ValueError:
            pass

    def has_gap(self, connection_id, connection_index):
        if connection_id:
            if not self.connection_id:
                self.update_connection_index(connection_id, connection_index)
                return False
            else:
                if connection_id != self.connection_id:
                    return True
        if connection_index:
            if self.connection_index is None:
                self.update_connection_index(connection_id, connection_index)
                return False
            try:
                connection_index = int(connection_index)
                if connection_index != self.connection_index+1:
                    return True
            except ValueError:
                pass

        return False

    def splice_coverage(self, dataset_id, coverage):
        log.info('Splicing new coverage')
        DatasetManagementService._splice_coverage(dataset_id, coverage)

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s', msg)
            return


        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        """ retry writing coverage multiple times and eventually time out """
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                self.add_granule(stream_id, rdt)
                done = True
            except:
                log.exception('An issue with coverage, retrying after a bit')
                if (time.time() - start) > MAX_RETRY_TIME: # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error("We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id))
                    raise

                if stream_id in self._coverages:
                    log.info('Popping coverage for stream %s', stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2


    def expand_coverage(self, coverage, elements, stream_id):
        try:
            coverage.insert_timesteps(elements, oob=False)
        except IOError as e:
            log.error("Couldn't insert time steps for coverage: %s",
                      coverage.persistence_dir, exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
    
    def get_stored_values(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs
        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_value_manager.read_value(key)
                if lookup_value in document:
                    return document[lookup_value] 
            except NotFound:
                log.warning('Specified lookup document does not exist')
        return None


    def fill_lookup_values(self, rdt):
        rdt.fetch_lookup_values()
        for field in rdt.lookup_values():
            value = self.get_stored_values(rdt.context(field).lookup_value)
            if value:
                rdt[field] = value

    def insert_sparse_values(self, coverage, rdt, stream_id):

        self.fill_lookup_values(rdt)
        for field in rdt._lookup_values():
            if rdt[field] is None:
                continue
            if not isinstance(rdt.context(field).param_type, SparseConstantType):
                # We only set sparse values before insert
                continue 
            value = rdt[field]
            try:
                coverage.set_parameter_values(param_name=field, value=value)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir, exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

    def insert_values(self, coverage, rdt, stream_id):
        elements = len(rdt)

        start_index = coverage.num_timesteps - elements

        for k,v in rdt.iteritems():
            if isinstance(v, SparseConstantValue):
                continue
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir, exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)
    
        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(coverage.get_parameter_context('ingestion_timestamp').uom, t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp', tdoa=slice_, value=ntp_time)
    
    def add_granule(self,stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info('Message attempting to be inserted into bad coverage: %s',
                     DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            
        #--------------------------------------------------------------------------------
        # Gap Analysis
        #--------------------------------------------------------------------------------
        gap_found = self.has_gap(rdt.connection_id, rdt.connection_index)
        if gap_found:
            log.error('Gap Found!   New connection: (%s,%s)\tOld Connection: (%s,%s)', rdt.connection_id, rdt.connection_index, self.connection_id, self.connection_index)
            self.gap_coverage(stream_id)



        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s', stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error("Couldn't open coverage: %s",
                      DatasetManagementService._get_coverage_path(self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug('%s: add_granule stream %s dataset %s coverage %r file %s',
                      self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error('Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        elements = len(rdt)

        self.insert_sparse_values(coverage,rdt,stream_id)
        
        if debugging:
            timer.complete_step('checks') # lightweight ops, should be zero
        
        self.expand_coverage(coverage, elements, stream_id)
        
        if debugging:
            timer.complete_step('insert')

        self.insert_values(coverage, rdt, stream_id)
        
        if debugging:
            timer.complete_step('keys')
        
        DatasetManagementService._save_coverage(coverage)
        
        if debugging:
            timer.complete_step('save')
        
        start_index = coverage.num_timesteps - elements
        self.dataset_changed(dataset_id,coverage.num_timesteps,(start_index,start_index+elements))

        if gap_found:
            self.splice_coverage(dataset_id, coverage)

        self.evaluate_qc(rdt, dataset_id)
        
        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)

        self.update_connection_index(rdt.connection_id, rdt.connection_index)

    def _add_timing_stats(self, timer):
        """ add stats from latest coverage operation to Accumulator and periodically log results """
        self.time_stats.add(timer)
        if self.time_stats.get_count() % REPORT_FREQUENCY>0:
            return

        if log.isEnabledFor(TRACE):
            # report per step
            for step in 'checks', 'insert', 'keys', 'save', 'notify':
                log.debug('%s step %s times: %s', self._id, step, self.time_stats.to_string(step))
        # report totals
        log.debug('%s total times: %s', self._id, self.time_stats)
Пример #58
0
    def test_pub_and_sub(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent",
                              callback=cb,
                              origin="specific")
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)
        pub.publish_event(origin="specific", description="hello")

        event_obj = bootstrap.IonObject('ResourceEvent',
                                        origin='specific',
                                        description='more testing')
        self.assertEqual(event_obj, pub.publish_event_object(event_obj))

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='2423')
            pub.publish_event_object(event_obj)
        self.assertIn('The ts_created value is not a valid timestamp',
                      cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='1000494978462')
            pub.publish_event_object(event_obj)
        self.assertIn('This ts_created value is too old', cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing')
            event_obj._id = '343434'
            pub.publish_event_object(event_obj)
        self.assertIn('The event object cannot contain a _id field',
                      cm.exception.message)

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), self.count)
        self.assertEquals(res[0].description, "hello")
        self.assertAlmostEquals(int(res[0].ts_created),
                                int(get_ion_ts()),
                                delta=5000)

        self.assertEquals(res[1].description, "more testing")
        self.assertAlmostEquals(int(res[1].ts_created),
                                int(get_ion_ts()),
                                delta=5000)
    def test_activate_suspend_data_product(self):

        #------------------------------------------------------------------------------------------------
        # create a stream definition for the data from the ctd simulator
        #------------------------------------------------------------------------------------------------
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True)
        ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id)
        log.debug("Created stream def id %s" % ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # test creating a new data product w/o a stream definition
        #------------------------------------------------------------------------------------------------
        # Construct temporal and spatial Coordinate Reference System objects

        dp_obj = IonObject(RT.DataProduct,
            name='DP1',
            description='some new dp')

        log.debug("Created an IonObject for a data product: %s" % dp_obj)

        #------------------------------------------------------------------------------------------------
        # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
        #------------------------------------------------------------------------------------------------

        dp_id = self.dpsc_cli.create_data_product(data_product= dp_obj,
            stream_definition_id=ctd_stream_def_id)

        #------------------------------------------------------------------------------------------------
        # Subscribe to persist events
        #------------------------------------------------------------------------------------------------
        queue = gevent.queue.Queue()

        def info_event_received(message, headers):
            queue.put(message)

        es = EventSubscriber(event_type=OT.InformationContentStatusEvent, callback=info_event_received, origin=dp_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)


        #------------------------------------------------------------------------------------------------
        # test activate and suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.activate_data_product_persistence(dp_id)
        
        dp_obj = self.dpsc_cli.read_data_product(dp_id)
        self.assertIsNotNone(dp_obj)

        dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise NotFound("Data Product %s dataset  does not exist" % str(dp_id))
        dataset_id = dataset_ids[0]


        # Check that the streams associated with the data product are persisted with
        stream_ids, _ =  self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True)
        for stream_id in stream_ids:
            self.assertTrue(self.ingestclient.is_persisted(stream_id))

        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id)
        rdt['time'] = np.arange(20)
        rdt['temp'] = np.arange(20)

        publisher = StandaloneStreamPublisher(stream_id,route)
        
        dataset_modified = Event()
        def cb(*args, **kwargs):
            dataset_modified.set()
        es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True)
        es.start()
        self.addCleanup(es.stop)

        publisher.publish(rdt.to_granule())

        self.assertTrue(dataset_modified.wait(30))

        #--------------------------------------------------------------------------------
        # Now get the data in one chunk using an RPC Call to start_retreive
        #--------------------------------------------------------------------------------

        replay_data = self.data_retriever.retrieve(dataset_ids[0])
        self.assertIsInstance(replay_data, Granule)

        log.debug("The data retriever was able to replay the dataset that was attached to the data product "
                  "we wanted to be persisted. Therefore the data product was indeed persisted with "
                  "otherwise we could not have retrieved its dataset using the data retriever. Therefore "
                  "this demonstration shows that L4-CI-SA-RQ-267 is satisfied: 'Data product management shall persist data products'")

        data_product_object = self.rrclient.read(dp_id)
        self.assertEquals(data_product_object.name,'DP1')
        self.assertEquals(data_product_object.description,'some new dp')

        log.debug("Towards L4-CI-SA-RQ-308: 'Data product management shall persist data product metadata'. "
                  " Attributes in create for the data product obj, name= '%s', description='%s', match those of object from the "
                  "resource registry, name='%s', desc='%s'" % (dp_obj.name, dp_obj.description,data_product_object.name,
                                                           data_product_object.description))

        #------------------------------------------------------------------------------------------------
        # test suspend data product persistence
        #------------------------------------------------------------------------------------------------
        self.dpsc_cli.suspend_data_product_persistence(dp_id)


        dataset_modified.clear()

        rdt['time'] = np.arange(20,40)

        publisher.publish(rdt.to_granule())
        self.assertFalse(dataset_modified.wait(2))

        self.dpsc_cli.activate_data_product_persistence(dp_id)
        dataset_modified.clear()

        publisher.publish(rdt.to_granule())
        self.assertTrue(dataset_modified.wait(30))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_almost_equal(rdt['time'], np.arange(40))


        dataset_ids, _ = self.rrclient.find_objects(dp_id, PRED.hasDataset, id_only=True)
        self.assertEquals(len(dataset_ids), 1)

        self.dpsc_cli.suspend_data_product_persistence(dp_id)
        self.dpsc_cli.force_delete_data_product(dp_id)
        # now try to get the deleted dp object

        with self.assertRaises(NotFound):
            dp_obj = self.rrclient.read(dp_id)


        info_event_counter = 0
        runtime = 0
        starttime = time.time()
        caught_events = []

        #check that the four InfoStatusEvents were received
        while info_event_counter < 4 and runtime < 60 :
            a = queue.get(timeout=60)
            caught_events.append(a)
            info_event_counter += 1
            runtime = time.time() - starttime

        self.assertEquals(info_event_counter, 4)
Пример #60
0
class EOIRegistrationProcess(SimpleProcess):
    def on_start(self):
        self.data_source_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.DataSource,
            callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.ExternalDataProvider,
            callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services',
                                               False)
        self.server = CFG.get_safe(
            'eoi.importer_service.server', "localhost") + ":" + str(
                CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices=" + str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...")

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")

    def check_for_importer_service(self):
        '''
        only gets run on start, used to identify if importer service is available
        '''
        try:
            r = requests.get(self.server + '/service=alive&name=ooi&id=ooi')
            log.info("importer service available, status code: %s",
                     str(r.status_code))
            #alive service returned ok
            if r.status_code == 200:
                return True
            else:
                return False
        except Exception as e:
            #SERVICE IS REALLY NOT AVAILABLE
            log.warn("importer service is really not available...%s", e)
            return False

    def _register_data_source(self, event, *args, **kwargs):
        '''
        used to create a harvester
        '''
        if self.importer_service_available:
            obj = self.rr.read(event.origin)
            data_fields = []
            for attrname, value in vars(obj).iteritems():
                #generate th param list to pass to importer service using field names
                if attrname is not "contact":
                    f = attrname.replace("_", "") + "=" + str(obj[attrname])
                    data_fields.append(f)

            param_list = '&'.join(data_fields)

            request_string = self.server + '/service=' + CREATE_HARVESTER + "&" + param_list
            r = requests.get(request_string)

    def _register_provider(self, event, *args, **kwargs):
        if self.importer_service_available:
            #print "provider id:", event.origin
            pass

    def on_quit(self):
        self.data_source_subscriber.stop()
        self.provider_subscriber.stop()