def _register_service(self): definition = self.process_definition existing_services, _ = self.container.resource_registry.find_resources( restype="Service", name=definition.name) if len(existing_services) > 0: if len(existing_services) > 1: log.warning( "There is more than one service object for %s. Using the first one" % definition.name) service_id = existing_services[0]._id else: svc_obj = Service(name=definition.name, exchange_name=definition.name) service_id, _ = self.container.resource_registry.create(svc_obj) svcdefs, _ = self.container.resource_registry.find_resources( restype="ServiceDefinition", name=definition.name) if svcdefs: try: self.container.resource_registry.create_association( service_id, "hasServiceDefinition", svcdefs[0]._id) except BadRequest: log.warn( "Failed to associate %s Service and ServiceDefinition. It probably exists.", definition.name) else: log.error("Cannot find ServiceDefinition resource for %s", definition.name) return service_id, definition.name
def _prepare_incremental(self): """ Look in the resource registry for any resources that have a preload ID on them so that they can be referenced under this preload ID during this load run. """ log.debug("Loading prior preloaded resources for reference") access_args = create_access_args("SUPERUSER", ["SUPERUSER"]) res_objs, res_keys = self.rr.find_resources_ext(alt_id_ns="PRE", id_only=False, access_args=access_args) res_preload_ids = [key['alt_id'] for key in res_keys] res_ids = [obj._id for obj in res_objs] log.debug("Found %s previously preloaded resources", len(res_objs)) res_assocs = self.rr.find_associations(predicate="*", id_only=False) [self.resource_assocs.setdefault(assoc["p"], []).append(assoc) for assoc in res_assocs] log.debug("Found %s existing associations", len(res_assocs)) existing_resources = dict(zip(res_preload_ids, res_objs)) if len(existing_resources) != len(res_objs): log.error("Stored preload IDs are NOT UNIQUE!!! This causes random links to existing resources") res_id_mapping = dict(zip(res_preload_ids, res_ids)) self.resource_ids.update(res_id_mapping) res_obj_mapping = dict(zip(res_preload_ids, res_objs)) self.resource_objs.update(res_obj_mapping)
def validate_driver_configuration(self, driver_config): """ Driver config must include 'oms_uri' entry. """ if not 'oms_uri' in driver_config: log.error("'oms_uri' not present in driver_config = %s", driver_config) raise PlatformDriverException(msg="driver_config does not indicate 'oms_uri'")
def make_event_data(alarm_def): """ """ event_data = { 'name': alarm_def.name, 'message': alarm_def.message, 'expr': alarm_def.expr, 'stream_name': alarm_def.stream_name, 'value_id': alarm_def.value_id, 'value': alarm_def.current_val } if alarm_def.status: event_data['event_type'] = 'StreamAllClearAlarmEvent' event_data['message'] = 'Alarm is cleared.' elif alarm_def.type == StreamAlarmType.WARNING: event_data['event_type'] = 'StreamWarningAlarmEvent' elif alarm_def.type == StreamAlarmType.ALERT: event_data['event_type'] = 'StreamAlertAlarmEvent' else: log.error('Unknown alarm type.') return None return event_data
def assert_state_change(self, target_agent_state, timeout=10): """ Verify the agent and resource states change as expected within the timeout Fail if the state doesn't change to the expected state. @param target_agent_state: State we expect the agent to be in @param timeout: how long to wait for the driver to change states """ to = gevent.Timeout(timeout) to.start() done = False agent_state = None try: while (not done): agent_state = self._dsa_client.get_agent_state() log.error("Current agent state: %s", agent_state) if (agent_state == target_agent_state): log.debug("Current state match: %s", agent_state) done = True if not done: log.debug( "state mismatch, waiting for state to transition.") gevent.sleep(1) except Timeout: log.error( "Failed to transition agent state to %s, current state: %s", target_agent_state, agent_state) self.fail("Failed to transition state.") finally: to.cancel()
def on_start(self): if not self.CFG.get_safe('system.elasticsearch', False): text = 'Can not initialize indexes without ElasticSearch enabled. Please enable system.elasticsearch.' log.error(text) raise BadRequest(text) self.sysname = get_sys_name().lower() self.es_host = self.CFG.get_safe('server.elasticsearch.host', 'localhost') self.es_port = self.CFG.get_safe('server.elasticsearch.port', '9200') self.index_shards = self.CFG.get_safe('server.elasticsearch.shards', 5) self.index_replicas = self.CFG.get_safe( 'server.elasticsearch.replicas', 1) self.river_shards = self.CFG.get_safe( 'server.elasticsearch.river_shards', 5) self.river_replicas = self.CFG.get_safe( 'server.elasticsearch.river_replicas', 1) self.es = ep.ElasticSearch(host=self.es_host, port=self.es_port, timeout=10) op = self.CFG.get('op', None) if op == 'index_bootstrap': self.index_bootstrap() elif op == 'clean_bootstrap': self.clean_bootstrap() else: raise BadRequest('Operation Unknown')
def insert_values(self, coverage, rdt, stream_id): elements = len(rdt) start_index = coverage.num_timesteps - elements for k, v in rdt.iteritems(): if isinstance(v, SparseConstantValue): continue slice_ = slice(start_index, None) try: coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v) except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) if 'ingestion_timestamp' in coverage.list_parameters(): t_now = time.time() ntp_time = TimeUtils.ts_to_units( coverage.get_parameter_context('ingestion_timestamp').uom, t_now) coverage.set_parameter_values(param_name='ingestion_timestamp', tdoa=slice_, value=ntp_time)
def insert_sparse_values(self, coverage, rdt, stream_id): self.fill_lookup_values(rdt) for field in rdt.fields: if rdt[field] is None: continue if not isinstance( rdt.context(field).param_type, SparseConstantType): # We only set sparse values before insert continue value = rdt[field] try: coverage.set_parameter_values(param_name=field, value=value) except ValueError as e: if "'lower_bound' cannot be >= 'upper_bound'" in e.message: continue else: raise except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message)
def _unregister_service(self): if not self.service_id: log.error("No service id. Cannot unregister service") return self.container.resource_registry.delete(self.service_id, del_associations=True)
def on_start(self): if not self.CFG.get_safe('system.elasticsearch', False): text = 'Can not initialize indexes without ElasticSearch enabled. Please enable system.elasticsearch.' log.error(text) raise BadRequest(text) self.sysname = get_sys_name().lower() self.es_host = self.CFG.get_safe('server.elasticsearch.host', 'localhost') self.es_port = self.CFG.get_safe('server.elasticsearch.port', '9200') self.index_shards = self.CFG.get_safe('server.elasticsearch.shards',5) self.index_replicas = self.CFG.get_safe('server.elasticsearch.replicas', 1) self.river_shards = self.CFG.get_safe('server.elasticsearch.river_shards',5) self.river_replicas = self.CFG.get_safe('server.elasticsearch.river_replicas',1) self.es = ep.ElasticSearch(host=self.es_host, port=self.es_port, timeout=10) op = self.CFG.get('op',None) if op == 'index_bootstrap': self.index_bootstrap() elif op == 'clean_bootstrap': self.clean_bootstrap() else: raise BadRequest('Operation Unknown')
def _process_gateway_request(resource_id, operation, json_request, requester): if requester is not None: json_request["agentRequest"]["requester"] = requester decoder = IonObjectSerializer() decoded_msg = decoder.serialize(json_request) payload = simplejson.dumps(decoded_msg) response = _agent_gateway_request(resource_id + '/' + operation, payload) if response['data'].has_key(GATEWAY_ERROR): log.error(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]) #raise BadRequest(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]) ex_cls = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_EXCEPTION] ex_msg = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE] if hasattr(pyex, ex_cls): raise getattr(pyex, ex_cls)(ex_msg) else: raise Exception(ex_msg) try: if "type_" in response['data'][GATEWAY_RESPONSE]: del response['data'][GATEWAY_RESPONSE]["type_"] except Exception, e: pass
def _construct_stream_and_publisher(self, stream_name, stream_config): if log.isEnabledFor(logging.TRACE): # pragma: no cover log.trace("%r: _construct_stream_and_publisher: " "stream_name:%r, stream_config:\n%s", self._platform_id, stream_name, self._pp.pformat(stream_config)) decoder = IonObjectDeserializer(obj_registry=get_obj_registry()) if 'stream_def_dict' not in stream_config: # should not happen: PlatformAgent._validate_configuration validates this. log.error("'stream_def_dict' key not in configuration for stream %r" % stream_name) return stream_def_dict = stream_config['stream_def_dict'] stream_def_dict['type_'] = 'StreamDefinition' stream_def_obj = decoder.deserialize(stream_def_dict) self._stream_defs[stream_name] = stream_def_obj routing_key = stream_config['routing_key'] stream_id = stream_config['stream_id'] exchange_point = stream_config['exchange_point'] parameter_dictionary = stream_def_dict['parameter_dictionary'] log.debug("%r: got parameter_dictionary from stream_def_dict", self._platform_id) self._data_streams[stream_name] = stream_id self._param_dicts[stream_name] = ParameterDictionary.load(parameter_dictionary) stream_route = StreamRoute(exchange_point=exchange_point, routing_key=routing_key) publisher = self._create_publisher(stream_id, stream_route) self._data_publishers[stream_name] = publisher log.debug("%r: created publisher for stream_name=%r", self._platform_id, stream_name)
def _set_result_set(self, result_set): """ Take data from yaml file and store it in internal objects for verifying data. Raise an exception on error. """ log.trace("Parsing result set header: %s", result_set) self._result_set_header = result_set.get("header") if not self._result_set_header: raise IOError("Missing result set header") log.trace("Header: %s", self._result_set_header) if self._result_set_header.get("stream)name") is None: IOError("header.stream_name not defined") self._result_set_data = {} data = result_set.get("data") if not data: raise IOError("Missing result set data") for granule in data: index = granule.get("_index") if index is None: log.error("Granule definition missing _index: %s", granule) raise IOError("Granule definition missing _index") if self._result_set_data.get(index) is not None: log.error("Duplicate granule definition for _index %s: %s", index, granule) raise IOError("Duplicate definition found for index: %s"% index) self._result_set_data[index] = granule log.trace("Result set data: %s", self._result_set_data)
def _initialize_queue_resource(self): """ Retrieve the resource and restore the remote queue. If it does not exist, create a new one. """ listen_name = self.CFG.process.listen_name objs, ids = self.clients.resource_registry.find_resources(name=listen_name) # If no persisted queue exists, create one. if len(objs) == 0: createtime = time.time() obj = IonObject("RemoteCommandQueue", name=listen_name, updated=createtime, created=createtime) # Persist object and read it back. obj_id, obj_rev = self.clients.resource_registry.create(obj) obj = self.clients.resource_registry.read(obj_id) log.debug("Created persistent queue for name=%s", listen_name) # If one exists, restore it here. elif len(objs) == 1: obj = objs[0] obj_id = ids[0] for command in obj.queue: self._tx_dict[command.command_id] = command self._client.enqueue(command) log.debug("Restored remote queue for name=%s: len=%i updated=%f.", listen_name, len(obj.queue), obj.updated) # Error: multiple queues with same name. else: log.error("%i > 1 remote command queues found for name=%s", len(objs), listen_name)
def on_init(self): if not EEAgentCore: msg = "EEAgentCore isn't available. Use autolaunch.cfg buildout" log.error(msg) self.heartbeat_thread = None return log.debug("ExecutionEngineAgent Pyon on_init") launch_type_name = self.CFG.eeagent.launch_type.name if not launch_type_name: # TODO: Fail fast here? log.error("No launch_type.name specified") self._factory = get_exe_factory(launch_type_name, self.CFG, pyon_container=self.container, log=log) # TODO: Allow other core class? self.core = EEAgentCore(self.CFG, self._factory, log) interval = self.CFG.eeagent.get('heartbeat', DEFAULT_HEARTBEAT) if interval > 0: self.heartbeater = HeartBeater(self.CFG, self._factory, log=log) self.heartbeater.poll() self.heartbeat_thread = looping_call(0.1, self.heartbeater.poll) else: self.heartbeat_thread = None
def _create_mission_scheduler(self, mission_id, mission_yml): """ @param mission_id @param mission_yml """ log.debug('[mm] _create_mission_scheduler: mission_id=%r', mission_id) mission_loader = MissionLoader(self._agent) mission_loader.load_mission(mission_id, mission_yml) self._mission_entries = mission_loader.mission_entries log.debug('[mm] _create_mission_scheduler: _ia_clients=\n%s', self._agent._pp.pformat(self._agent._ia_clients)) # get instrument IDs and clients for the valid running instruments: instruments = {} for (instrument_id, obj) in self._agent._ia_clients.iteritems(): if isinstance(obj, dict): # it's valid instrument. if instrument_id != obj.resource_id: log.error('[mm] _create_mission_scheduler: instrument_id=%s, ' 'resource_id=%s', instrument_id, obj.resource_id) instruments[obj.resource_id] = obj.ia_client mission_scheduler = MissionScheduler(self._agent, instruments, self._mission_entries) log.debug('[mm] _create_mission_scheduler: MissionScheduler created. entries=%s', self._mission_entries) return mission_scheduler
def _get_dsa_client(self, instrument_device, dsa_instance): """ Launch the agent and return a client """ fake_process = FakeProcess() fake_process.container = self.container clients = DataAcquisitionManagementServiceDependentClients(fake_process) config_builder = ExternalDatasetAgentConfigurationBuilder(clients) try: config_builder.set_agent_instance_object(dsa_instance) self.agent_config = config_builder.prepare() log.trace("Using dataset agent configuration: %s", pprint.pformat(self.agent_config)) except Exception as e: log.error('failed to launch: %s', e, exc_info=True) raise ServerError('failed to launch') dispatcher = ProcessDispatcherServiceClient() launcher = AgentLauncher(dispatcher) log.debug("Launching agent process!") process_id = launcher.launch(self.agent_config, config_builder._get_process_definition()._id) if not process_id: raise ServerError("Launched external dataset agent instance but no process_id") config_builder.record_launch_parameters(self.agent_config) launcher.await_launch(10.0) return ResourceAgentClient(instrument_device._id, process=FakeProcess())
def check_lifecycle_policy(self, process, message, headers): try: gov_values = GovernanceHeaderValues(headers=headers, process=process) resource_id = gov_values.resource_id except Inconsistent, ex: log.error("unable to retrieve governance header") return False, ex.message
def find_instrument_agents( requester=None): instrument_agent_find_request = { "serviceRequest": { "serviceName": "instrument_management", "serviceOp": "find_instrument_agents", "expiry": 0, "params": { } } } if requester is not None: instrument_agent_find_request["serviceRequest"]["requester"] = requester response = gateway_request('instrument_management/find_instrument_agents', simplejson.dumps(instrument_agent_find_request) ) if response['data'].has_key(GATEWAY_ERROR): log.error(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]) return response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE] response_data = response['data'][GATEWAY_RESPONSE] log.info('Number of Instrument Agent objects: %s' % (str(len(response_data)))) for res in response_data: log.debug(res) return response_data
def create_interval_timer(self, start_time="", interval=0, end_time="", event_origin="", event_subtype=""): start_time = time.time() if start_time == "now" else start_time if not start_time or not end_time: raise BadRequest( "create_interval_timer: start_time and end_time must be set") start_time = float(start_time) end_time = float(end_time) if end_time != -1 and (time.time() >= end_time): log.error('end_time != -1 or start_time < end_time') raise BadRequest('end_time != -1 or start_time < end_time') if not event_origin: log.error( "SchedulerService.create_interval_timer: event_origin is not set" ) raise BadRequest( "SchedulerService.create_interval_timer: event_origin is not set" ) log.debug( "SchedulerService:create_interval_timer start_time: %s interval: %s end_time: %s event_origin: %s" % (start_time, interval, end_time, event_origin)) interval_timer = IonObject( "IntervalTimer", { "start_time": start_time, "interval": interval, "end_time": end_time, "event_origin": event_origin, "event_subtype": event_subtype }) se = IonObject(RT.SchedulerEntry, {"entry": interval_timer}) return self.create_timer(se)
def insert_values(self, coverage, rdt, stream_id): elements = len(rdt) start_index = coverage.num_timesteps - elements for k,v in rdt.iteritems(): if isinstance(v, SparseConstantValue): continue slice_ = slice(start_index, None) try: coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v) except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) except IndexError as e: log.error("Value set: %s", v[:]) data_products, _ = self.container.resource_registry.find_subjects(object=stream_id, predicate=PRED.hasStream, subject_type=RT.DataProduct) for data_product in data_products: log.exception("Index exception with %s, trying to insert %s into coverage with shape %s", data_product.name, k, v.shape) if 'ingestion_timestamp' in coverage.list_parameters(): t_now = time.time() ntp_time = TimeUtils.ts_to_units(coverage.get_parameter_context('ingestion_timestamp').uom, t_now) coverage.set_parameter_values(param_name='ingestion_timestamp', tdoa=slice_, value=ntp_time)
def assert_state_change(self, target_agent_state, timeout=10): """ Verify the agent and resource states change as expected within the timeout Fail if the state doesn't change to the expected state. @param target_agent_state: State we expect the agent to be in @param timeout: how long to wait for the driver to change states """ to = gevent.Timeout(timeout) to.start() done = False agent_state = None try: while(not done): agent_state = self._dsa_client.get_agent_state() log.error("Current agent state: %s", agent_state) if(agent_state == target_agent_state): log.debug("Current state match: %s", agent_state) done = True if not done: log.debug("state mismatch, waiting for state to transition.") gevent.sleep(1) except Timeout: log.error("Failed to transition agent state to %s, current state: %s", target_agent_state, agent_state) self.fail("Failed to transition state.") finally: to.cancel()
def load_datastore(cls, path=None, ds_name=None, ignore_errors=True): if CFG.system.mockdb: log.warn("Cannot load into MockDB") return path = path or "res/preload/default" if not os.path.exists(path): log.warn("Load path not found: %s" % path) return if not os.path.isdir(path): log.error("Path is not a directory: %s" % path) if ds_name: # Here we expect path to contain YML files for given datastore log.info("DatastoreLoader: LOAD datastore=%s" % ds_name) cls._load_datastore(path, ds_name, ignore_errors) else: # Here we expect path to have subdirs that are named according to logical # datastores, e.g. "resources" log.info("DatastoreLoader: LOAD ALL DATASTORES") for fn in os.listdir(path): fp = os.path.join(path, fn) if not os.path.exists(path): log.warn("Item %s is not a directory" % fp) continue cls._load_datastore(fp, fn, ignore_errors)
def update_time(self, data_product, t): ''' Sets the nominal_datetime for a data product correctly Accounts for things like NTP and out of order data ''' t0, t1 = self.get_datetime_bounds(data_product) #TODO: Account for non NTP-based timestamps min_t = np.min(t) - 2208988800 max_t = np.max(t) - 2208988800 if t0: t0 = min(t0, min_t) else: t0 = min_t if t1: t1 = max(t1, max_t) else: t1 = max_t if t0 > t1: log.error("This should never happen but t0 > t1") data_product.nominal_datetime.start_datetime = float(t0) data_product.nominal_datetime.end_datetime = float(t1)
def persist_or_timeout(self, stream_id, rdt): """ retry writing coverage multiple times and eventually time out """ done = False timeout = 2 start = time.time() while not done: try: self.add_granule(stream_id, rdt) done = True except: log.exception('An issue with coverage, retrying after a bit') if (time.time() - start) > MAX_RETRY_TIME: # After an hour just give up dataset_id = self.get_dataset(stream_id) log.error("We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id)) raise if stream_id in self._coverages: log.info('Popping coverage for stream %s', stream_id) self._coverages.pop(stream_id) gevent.sleep(timeout) if timeout > (60 * 5): timeout = 60 * 5 else: timeout *= 2
def persist_or_timeout(self, stream_id, rdt): ''' A loop that tries to parse and store a granule for up to five minutes, and waits an increasing amount of time each iteration. ''' done = False timeout = 2 start = time.time() while not done: if self.parse_granule(stream_id, rdt, start, done): return # We're all done, everything worked if (time.time() - start) > MAX_RETRY_TIME: # After a while, give up dataset_id = self.get_dataset(stream_id) log.error( "We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id)) raise if stream_id in self._coverages: log.info('Popping coverage for stream %s', stream_id) self._coverages.pop(stream_id) gevent.sleep(timeout) timeout = min(60 * 5, timeout * 2)
def insert_values(self, coverage, rdt, stream_id): np_dict = self.build_data_dict(rdt) if 'ingestion_timestamp' in coverage.list_parameters(): timestamps = np.array([(time.time()+2208988800) for i in rdt[rdt.temporal_parameter]]) np_dict['ingestion_timestamp'] = NumpyParameterData('ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter]) # If it's sparse only if self.sparse_only(rdt): del np_dict[rdt.temporal_parameter] try: coverage.set_parameter_values(np_dict) except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) except KeyError as e: if 'has not been initialized' in e.message: coverage.refresh() raise except Exception as e: print repr(rdt) raise
def insert_values(self, coverage, rdt, stream_id): np_dict = self.build_data_dict(rdt) if 'ingestion_timestamp' in coverage.list_parameters(): timestamps = np.array([(time.time() + 2208988800) for i in rdt[rdt.temporal_parameter]]) np_dict['ingestion_timestamp'] = NumpyParameterData( 'ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter]) # If it's sparse only if self.sparse_only(rdt): del np_dict[rdt.temporal_parameter] try: coverage.set_parameter_values(np_dict) except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) except KeyError as e: if 'has not been initialized' in e.message: coverage.refresh() raise except Exception as e: print repr(rdt) raise
def eval_alarm(alarm_def, x): """ """ alarm_def.current_val = x old_status = alarm_def.status alarm_def.status = eval(alarm_def.expr) event_data = None if old_status != alarm_def.status: event_data = { 'name' : alarm_def.name, 'message' : alarm_def.message, 'expr' : alarm_def.expr, 'stream_name' : alarm_def.stream_name, 'value_id' : alarm_def.value_id, 'value' : x } if not alarm_def.status: event_data['event_type'] = 'StreamAllClearAlarmEvent' event_data['message'] = 'The alarm %s has cleared.' % alarm_def.name elif alarm_def.type == StreamAlarmType.WARNING: event_data['event_type'] = 'StreamWarningAlaramEvent' elif alarm_def.type == StreamAlarmType.ALERT: event_data['event_type'] = 'StreamAlertAlarmEvent' else: log.error('Unknown alarm type.') event_data = None return (alarm_def, event_data)
def _construct_packet_factories(self): """ Construct packet factories from packet_config member of the driver_config. @retval None """ packet_config = self._dvr_config['packet_config'] for (name, val) in packet_config.iteritems(): if val: mod = val[0] cls = val[1] import_str = 'from %s import %s' % (mod, cls) ctor_str = 'ctor = %s' % cls try: exec import_str exec ctor_str except Exception: log.error('Instrument agent %s had error creating packet factories from %s.%s', self._proc_name, mod, cls) else: self._packet_factories[name] = ctor log.info('Instrument agent %s created packet factory for stream %s', self._proc_name, name)
def verify(self, granules): """ Verify granules passed in against result set read in the ctor. Ensure: - Verify granules as a set - Verify individual granule data store verification result in the object and return success or failure. @param particls: list of granules to verify. @return True if verification successful, False otherwise """ self._clear_report() result = True granule_data = self._extract_granule_data(granules) if self._verify_set(granule_data): result = self._verify_granules(granule_data) else: result = False if not result: log.error("Failed verification: \n%s", self.report()) return result
def _set_result_set(self, result_set): """ Take data from yaml file and store it in internal objects for verifying data. Raise an exception on error. """ log.trace("Parsing result set header: %s", result_set) self._result_set_header = result_set.get("header") if not self._result_set_header: raise IOError("Missing result set header") log.trace("Header: %s", self._result_set_header) if self._result_set_header.get("stream)name") is None: IOError("header.stream_name not defined") self._result_set_data = {} data = result_set.get("data") if not data: raise IOError("Missing result set data") for granule in data: index = granule.get("_index") if index is None: log.error("Granule definition missing _index: %s", granule) raise IOError("Granule definition missing _index") if self._result_set_data.get(index) is not None: log.error("Duplicate granule definition for _index %s: %s", index, granule) raise IOError("Duplicate definition found for index: %s" % index) self._result_set_data[index] = granule log.trace("Result set data: %s", self._result_set_data)
def __init__(self, agent): self._agent = agent self._platform_id = agent._platform_id self.resource_id = agent.resource_id self._pp = agent._pp self.CFG = agent.CFG # Dictionaries used for data publishing. self._data_streams = {} self._param_dicts = {} self._stream_defs = {} self._data_publishers = {} self._connection_ID = None self._connection_index = {} # Set of parameter names received in event notification but not # configured. Allows to log corresponding warning only once. self._unconfigured_params = set() stream_info = self.CFG.get('stream_config', None) if stream_info is None: # should not happen: PlatformAgent._validate_configuration validates this. log.error("%r: No stream_config given in CFG", self._platform_id) return for stream_name, stream_config in stream_info.iteritems(): self._construct_stream_and_publisher(stream_name, stream_config) log.debug("%r: PlatformAgentStreamPublisher complete", self._platform_id)
def _verify_set(self, granules): """ Verify the granules as a set match what we expect. - All granules are of the expected type - Check granule count - Connection IDs change as expected """ errors = [] if len(self._result_set_data) != len(granules): errors.append( "result set records != granules to verify (%d != %d)" % (len(self._result_set_data), len(granules))) for granule in granules: if not self._verify_granule_type(granule): log.error("granule type mismatch: %s", granule) errors.append('granule type mismatch') if len(errors): self._add_to_report("Header verification failure") self._add_to_report(errors, 1) return False return True
def _register_service(self): definition = self.process_definition existing_services, _ = self.container.resource_registry.find_resources( restype="Service", name=definition.name) if len(existing_services) > 0: if len(existing_services) > 1: log.warning("There is more than one service object for %s. Using the first one" % definition.name) service_id = existing_services[0]._id else: svc_obj = Service(name=definition.name, exchange_name=definition.name) service_id, _ = self.container.resource_registry.create(svc_obj) svcdefs, _ = self.container.resource_registry.find_resources( restype="ServiceDefinition", name=definition.name) if svcdefs: try: self.container.resource_registry.create_association( service_id, "hasServiceDefinition", svcdefs[0]._id) except BadRequest: log.warn("Failed to associate %s Service and ServiceDefinition. It probably exists.", definition.name) else: log.error("Cannot find ServiceDefinition resource for %s", definition.name) return service_id, definition.name
def _update_queue_resource(self): """ Retrieve and update the resource that persists the remote command queue. """ listen_name = self.CFG.process.listen_name while True: objs, ids = self.clients.resource_registry.find_resources( name=listen_name) if len(objs) != 1: log.error('Incorrect number of persistent queues for %s.', listen_name) return obj = objs[0] obj_id = ids[0] obj.queue = copy.deepcopy(self._client._queue) obj.updated = time.time() try: self.clients.resource_registry.update(obj) log.debug('Updated queue for name=%s: len=%i updated=%f.', listen_name, len(obj.queue), obj.updated) break except Conflict: log.debug('Conflict with queue resource version, rereading.')
def on_init(self): if not EEAgentCore: msg = "EEAgentCore isn't available. Use autolaunch.cfg buildout" log.error(msg) self.heartbeat_thread = None return log.debug("ExecutionEngineAgent Pyon on_init") launch_type_name = self.CFG.eeagent.launch_type.name if not launch_type_name: # TODO: Fail fast here? log.error("No launch_type.name specified") self._factory = get_exe_factory( launch_type_name, self.CFG, pyon_container=self.container, log=log) # TODO: Allow other core class? self.core = EEAgentCore(self.CFG, self._factory, log) interval = float(self.CFG.eeagent.get('heartbeat', DEFAULT_HEARTBEAT)) if interval > 0: self.heartbeater = HeartBeater( self.CFG, self._factory, self.resource_id, self, log=log) self.heartbeater.poll() self.heartbeat_thread, self._heartbeat_thread_event = looping_call(0.1, self.heartbeater.poll) else: self.heartbeat_thread = None self._heartbeat_thread_event = None
def create_interval_timer(self, start_time="", interval=0, end_time="", event_origin="", event_subtype=""): if (end_time != -1 and (self.__now_posix(self.__now()) >= end_time)) or not event_origin: log.error( "SchedulerService.create_interval_timer: event_origin is not set" ) raise BadRequest if start_time == "now": start_time = self.__now_posix(self.__now()) log.debug( "SchedulerService:create_interval_timer start_time: %s interval: %s end_time: %s event_origin: %s" % (start_time, interval, end_time, event_origin)) interval_timer = IonObject( "IntervalTimer", { "start_time": start_time, "interval": interval, "end_time": end_time, "event_origin": event_origin, "event_subtype": event_subtype }) se = IonObject(RT.SchedulerEntry, {"entry": interval_timer}) return self.create_timer(se)
def add_granule(self,stream_id, granule): ''' Appends the granule's data to the coverage and persists it. ''' #-------------------------------------------------------------------------------- # Coverage determiniation and appending #-------------------------------------------------------------------------------- dataset_id = self.get_dataset(stream_id) if not dataset_id: log.error('No dataset could be determined on this stream: %s', stream_id) return coverage = self.get_coverage(stream_id) if not coverage: log.error('Could not persist coverage from granule, coverage is None') return #-------------------------------------------------------------------------------- # Actual persistence #-------------------------------------------------------------------------------- rdt = RecordDictionaryTool.load_from_granule(granule) elements = len(rdt) if not elements: return coverage.insert_timesteps(elements) start_index = coverage.num_timesteps - elements for k,v in rdt.iteritems(): if k == 'image_obj': log.trace( '%s:', k) else: log.trace( '%s: %s', k, v) slice_ = slice(start_index, None) coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v) coverage.flush()
def on_start(self): global DEBUG if self.CFG.system.force_clean and not self.CFG.system.testing and not DEBUG: text = "system.force_clean=True. ION Preload does not support this" log.error(text) raise iex.BadRequest(text) op = self.CFG.get("op", None) path = self.CFG.get("path", None) scenario = self.CFG.get("scenario", None) DEBUG = self.CFG.get("debug", False) self.loadooi = self.CFG.get("loadooi", False) self.loadui = self.CFG.get("loadui", False) log.info("IONLoader: {op=%s, path=%s, scenario=%s}" % (op, path, scenario)) if op: if op == "load": self.load_ion(path, scenario) elif op == "loadooi": self.extract_ooi_assets(path) elif op == "loadui": self.load_ui(path) else: raise iex.BadRequest("Operation unknown") else: raise iex.BadRequest("No operation specified")
def persist(self, dataset_granule): #pragma no cover ''' Persists the dataset metadata ''' #-------------------------------------------------------------------------------- # Theres a potential that the datastore could have been deleted while ingestion # is still running. Essentially this refreshes the state #-------------------------------------------------------------------------------- try: self.db.create_doc(dataset_granule) return except ResourceNotFound as e: log.error('The datastore was removed while ingesting (retrying)') self.db = self.container.datastore_manager.get_datastore(self.datastore_name, DataStore.DS_PROFILE.SCIDATA) #-------------------------------------------------------------------------------- # The first call to create_doc attached an _id to the dictionary which causes an # error to be raised, to make this more resilient, we investigate to ensure # the dictionary does not have any of these excess keys #-------------------------------------------------------------------------------- try: if '_id' in dataset_granule: del dataset_granule['_id'] if '_rev' in dataset_granule: del dataset_granule['_rev'] self.db.create_doc(dataset_granule) except ResourceNotFound as e: log.error(e.message) # Oh well I tried
def _get_agent_instance_id(self, resource_id): dsaids, _ = self.rr.find_objects(subject=resource_id, predicate=PRED.hasAgentInstance, object_type=RT.ExternalDatasetAgentInstance, id_only=True) iaids, _ = self.rr.find_objects(subject=resource_id, predicate=PRED.hasAgentInstance, object_type=RT.InstrumentAgentInstance, id_only=True) paids, _ = self.rr.find_objects(subject=resource_id, predicate=PRED.hasAgentInstance, object_type=RT.PlatformAgentInstance, id_only=True) aids = dsaids + iaids + paids if len(aids) > 1: log.error("Multiple agent instances found") raise BadRequest("Failed to identify agent instance") if len(aids) == 0: log.error("Agent instance not found") raise BadRequest("Failed to identify agent instance") log.info("Found agent instance ID: %s", aids[0]) return aids[0]
def persist_or_timeout(self, stream_id, rdt): """ retry writing coverage multiple times and eventually time out """ done = False timeout = 2 start = time.time() while not done: try: self.add_granule(stream_id, rdt) done = True except: log.exception('An issue with coverage, retrying after a bit') if (time.time() - start) > MAX_RETRY_TIME: # After an hour just give up dataset_id = self.get_dataset(stream_id) log.error( "We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path( dataset_id)) raise if stream_id in self._coverages: log.info('Popping coverage for stream %s', stream_id) self._coverages.pop(stream_id) gevent.sleep(timeout) if timeout > (60 * 5): timeout = 60 * 5 else: timeout *= 2
def insert_sparse_values(self, coverage, rdt, stream_id): self.fill_lookup_values(rdt) for field in rdt.fields: if rdt[field] is None: continue if not isinstance(rdt.context(field).param_type, SparseConstantType): # We only set sparse values before insert continue value = rdt[field] try: coverage.set_parameter_values(param_name=field, value=value) except ValueError as e: if "'lower_bound' cannot be >= 'upper_bound'" in e.message: continue else: raise except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message)
def _update_queue_resource(self): """ Retrieve and update the resource that persists the remote command queue. """ listen_name = self.CFG.process.listen_name while True: objs, ids = self.clients.resource_registry.find_resources(name=listen_name) if len(objs) != 1: log.error('Incorrect number of persistent queues for %s.', listen_name) return obj = objs[0] obj_id = ids[0] obj.queue = copy.deepcopy(self._client._queue) obj.updated = time.time() try: self.clients.resource_registry.update(obj) log.debug('Updated queue for name=%s: len=%i updated=%f.', listen_name, len(obj.queue), obj.updated) break except Conflict: log.debug('Conflict with queue resource version, rereading.')
def aparam_set_mission(self, yaml_filename): """ Specifies mission to be executed. @param yaml_filename Mission definition """ log.debug('[mm] aparam_set_mission: yaml_filename=%s', yaml_filename) mission_loader = MissionLoader() mission_loader.load_mission_file(yaml_filename) self._agent.aparam_mission = mission_loader.mission_entries log.debug('[mm] aparam_set_mission: _ia_clients=\n%s', self._agent._pp.pformat(self._agent._ia_clients)) # get instrument IDs and clients for the valid running instruments: instruments = {} for (instrument_id, obj) in self._agent._ia_clients.iteritems(): if isinstance(obj, dict): # it's valid instrument. if instrument_id != obj.resource_id: log.error('[mm] aparam_set_mission: instrument_id=%s, ' 'resource_id=%s', instrument_id, obj.resource_id) instruments[obj.resource_id] = obj.ia_client self.mission_scheduler = MissionScheduler(self._agent, instruments, self._agent.aparam_mission) log.debug('[mm] aparam_set_mission: MissionScheduler created. entries=%s', self._agent.aparam_mission)
def build_error_response(e): if hasattr(e, 'get_stacks'): # Process potentially multiple stacks. full_error = '' for i in range(len(e.get_stacks())): full_error += e.get_stacks()[i][0] + "\n" if i == 0: full_error += string.join(traceback.format_exception(*sys.exc_info()), '') else: for ln in e.get_stacks()[i][1]: full_error += str(ln) + "\n" exec_name = e.__class__.__name__ else: exc_type, exc_obj, exc_tb = sys.exc_info() exec_name = exc_type.__name__ full_error = traceback.format_exception(*sys.exc_info()) if service_gateway_instance.log_errors: log.error(full_error) result = { GATEWAY_ERROR_EXCEPTION: exec_name, GATEWAY_ERROR_MESSAGE: str(e.message), GATEWAY_ERROR_TRACE: full_error } if RETURN_MIMETYPE_PARAM in request.args: return_mimetype = str(request.args[RETURN_MIMETYPE_PARAM]) return service_gateway_app.response_class(result, mimetype=return_mimetype) return json_response({'data': {GATEWAY_ERROR: result}})
def dead_man_timeout(self, stream_id, callback, *args, **kwargs): done = False timeout = 2 start = time.time() while not done: try: callback(*args, **kwargs) done = True except: log.exception("An issue with coverage, retrying after a bit") if (time.time() - start) > 3600: # After an hour just give up dataset_id = self.get_dataset(stream_id) log.error( "We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id), ) raise if stream_id in self._coverages: log.info("Popping coverage for stream %s", stream_id) self._coverages.pop(stream_id) gevent.sleep(timeout) if timeout > (60 * 5): timeout = 60 * 5 else: timeout *= 2
def on_init(self): # Time in between event persists self.persist_interval = float( self.CFG.get_safe("process.event_persister.persist_interval", 1.0)) self.persist_blacklist = self.CFG.get_safe( "process.event_persister.persist_blacklist", {}) self._event_type_blacklist = [ entry['event_type'] for entry in self.persist_blacklist if entry.get('event_type', None) and len(entry) == 1 ] self._complex_blacklist = [ entry for entry in self.persist_blacklist if not (entry.get('event_type', None) and len(entry) == 1) ] if self._complex_blacklist: log.warn( "EventPersister does not yet support complex blacklist expressions: %s", self._complex_blacklist) # Time in between view refreshs self.refresh_interval = float( self.CFG.get_safe("process.event_persister.refresh_interval", 60.0)) # Holds received events FIFO in syncronized queue self.event_queue = Queue() # Temporarily holds list of events to persist while datastore operation are not yet completed # This is where events to persist will remain if datastore operation fails occasionally. self.events_to_persist = None # Number of unsuccessful attempts to persist in a row self.failure_count = 0 # bookkeeping for greenlet self._persist_greenlet = None self._terminate_persist = Event( ) # when set, exits the persister greenlet self._refresh_greenlet = None self._terminate_refresh = Event( ) # when set, exits the refresher greenlet # The event subscriber self.event_sub = None # Registered event process plugins self.process_plugins = {} for plugin_name, plugin_cls, plugin_args in PROCESS_PLUGINS: try: plugin = named_any(plugin_cls)(**plugin_args) self.process_plugins[plugin_name] = plugin log.info("Loaded event processing plugin %s (%s)", plugin_name, plugin_cls) except Exception as ex: log.error( "Cannot instantiate event processing plugin %s (%s): %s", plugin_name, plugin_cls, ex)
def update_data_product_metadata(self, dataset_id, rdt): data_products = self._get_data_products(dataset_id) for data_product in data_products: self.update_time(data_product, rdt[rdt.temporal_parameter][:]) self.update_geo(data_product, rdt) try: self.container.resource_registry.update(data_product) except: # TODO: figure out WHICH Exception gets raised here when the bounds are off log.error("Problem updating the data product metadata", exc_info=True)
def _call_target(self, target, value=None, resource_id=None, res_type=None, cmd_args=None, cmd_kwargs=None): """ Makes a call to a specified function. Function specification can be of varying type. """ try: if not target: return None match = re.match( "(func|serviceop):([\w.]+)\s*\(\s*([\w,$\s]*)\s*\)\s*(?:->\s*([\w\.]+))?\s*$", target) if match: func_type, func_name, func_args, res_path = match.groups() func = None if func_type == "func": if func_name.startswith("self."): func = getattr(self, func_name[5:]) else: func = named_any(func_name) elif func_type == "serviceop": svc_name, svc_op = func_name.split('.', 1) try: svc_client_cls = get_service_registry( ).get_service_by_name(svc_name).client except Exception as ex: log.error("No service client found for service: %s", svc_name) else: svc_client = svc_client_cls(process=self) func = getattr(svc_client, svc_op) if not func: return None args = self._get_call_args(func_args, resource_id, res_type, value, cmd_args) kwargs = {} if not cmd_kwargs else cmd_kwargs func_res = func(*args, **kwargs) log.info("Function %s result: %s", func, func_res) if res_path and isinstance(func_res, dict): func_res = get_safe(func_res, res_path, None) return func_res else: log.error("Unknown call target expression: %s", target) except Exception as ex: log.exception("_call_target exception") return None
def wrapper(*args, **kwargs): stack = extract_stack(limit=4) try: func(*args, **kwargs) return True except AssertionError as e: log.error('\n%s\n%s\n%s', sep_bar, ''.join(format_list(stack[:-1])), e.message) return False