def _start_data_subscribers(self, count, raw_count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._raw_samples_received = [] self._async_sample_result = AsyncResult() self._async_raw_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received parsed data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() def recv_raw_data(message, stream_route, stream_id): log.info('Received raw data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._raw_samples_received.append(message) if len(self._raw_samples_received) == raw_count: self._async_raw_sample_result.set() from pyon.util.containers import create_unique_identifier stream_name = 'parsed' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id], timeout=120.2) pubsub_client.activate_subscription(sub_id, timeout=120.3) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice) stream_name = 'raw' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_raw_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id], timeout=120.4) pubsub_client.activate_subscription(sub_id, timeout=120.5) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def _start_data_subscribers(self, count, raw_count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._raw_samples_received = [] self._async_sample_result = AsyncResult() self._async_raw_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received parsed data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() def recv_raw_data(message, stream_route, stream_id): log.info('Received raw data on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) self._raw_samples_received.append(message) if len(self._raw_samples_received) == raw_count: self._async_raw_sample_result.set() from pyon.util.containers import create_unique_identifier stream_name = 'parsed' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice) stream_name = 'raw' parsed_config = self._stream_config[stream_name] stream_id = parsed_config['stream_id'] exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_raw_data) sub.start() self._data_subscribers.append(sub) sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def create_stream_definition(self, name='', parameter_dictionary=None, parameter_dictionary_id='', stream_type='', description='', available_fields=None, stream_configuration=None): parameter_dictionary = parameter_dictionary or {} stream_configuration = stream_configuration or {} existing = self.clients.resource_registry.find_resources(restype=RT.StreamDefinition, name=name, id_only=True)[0] if name and existing: if parameter_dictionary_id: pdict_ids, _ = self.clients.resource_registry.find_objects(subject=existing[0], predicate=PRED.hasParameterDictionary, id_only=True) if pdict_ids and parameter_dictionary_id==pdict_ids[0]: return existing[0] else: raise Conflict('StreamDefinition with the specified name already exists. (%s)' % name) stream_def = self.read_stream_definition(existing[0]) if self._compare_pdicts(parameter_dictionary,stream_def.parameter_dictionary): return existing[0] raise Conflict('StreamDefinition with the specified name already exists. (%s)' % name) name = name or create_unique_identifier() stream_definition = StreamDefinition(parameter_dictionary=parameter_dictionary, stream_type=stream_type, name=name, description=description, available_fields=available_fields, stream_configuration=stream_configuration) stream_definition_id,_ = self.clients.resource_registry.create(stream_definition) if parameter_dictionary_id: self._associate_pdict_with_definition(parameter_dictionary_id, stream_definition_id) return stream_definition_id
def create_event_process_definition(self, version='', module='', class_name='', uri='', arguments=None, event_types = None, sub_types = None, origin_types = None): """ Create a resource which defines the processing of events. @param version str @param module str @param class_name str @param uri str @param arguments list @return procdef_id str """ # Create the event process detail object event_process_definition_detail = EventProcessDefinitionDetail() event_process_definition_detail.event_types = event_types event_process_definition_detail.sub_types = sub_types event_process_definition_detail.origin_types = origin_types # Create the process definition process_definition = ProcessDefinition(name=create_unique_identifier('event_process')) process_definition.executable = { 'module':module, 'class': class_name, 'url': uri } process_definition.version = version process_definition.arguments = arguments process_definition.definition = event_process_definition_detail procdef_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition) return procdef_id
def create_stream(self, name='', exchange_point='', topic_ids=None, credentials=None, stream_definition_id='', description='', stream_name='', stream_type=''): # Argument Validation if name and self.clients.resource_registry.find_resources(restype=RT.Stream, name=name, id_only=True)[0]: raise Conflict("The named stream '%s' already exists on XP '%s'" % (name, exchange_point)) validate_true(exchange_point, 'An exchange point must be specified') exchange_point_id = None if re.match(r'[0-9a-f]{32}', exchange_point): # It's a uuid xp_obj = self.clients.exchange_management.read_exchange_point(exchange_point) exchange_point_id = exchange_point exchange_point = xp_obj.name else: self.container.ex_manager.create_xp(exchange_point) xp_objs, _ = self.clients.resource_registry.find_resources(restype=RT.ExchangePoint, name=exchange_point, id_only=True) if not xp_objs: raise BadRequest('failed to create an ExchangePoint: ' + exchange_point) exchange_point_id = xp_objs[0] topic_ids = topic_ids or [] if not name: name = create_unique_identifier() # Get topic names and topics topic_names = [] associated_topics = [] for topic_id in topic_ids: topic = self.read_topic(topic_id) if topic.exchange_point == exchange_point: topic_names.append(self._sanitize(topic.name)) associated_topics.append(topic_id) else: log.warning('Attempted to attach stream %s to topic %s with different exchange points', name, topic.name) stream = Stream(name=name, description=description) routing_key = '.'.join([self._sanitize(name)] + topic_names + ['stream']) if len(routing_key) > 255: raise BadRequest('There are too many topics for this.') stream.stream_route.exchange_point = exchange_point stream.stream_route.routing_key = routing_key #@todo: validate credentials stream.stream_route.credentials = credentials stream.stream_name = stream_name stream.stream_type = stream_type stream_id, rev = self.clients.resource_registry.create(stream) self._associate_stream_with_exchange_point(stream_id,exchange_point_id) if stream_definition_id: #@Todo: what if the stream has no definition?! self._associate_stream_with_definition(stream_id, stream_definition_id) for topic_id in associated_topics: self._associate_topic_with_stream(topic_id, stream_id) log.info('Stream %s: %s', name, routing_key) return stream_id, stream.stream_route
def create_subscription(self, name='', stream_ids=None, exchange_points=None, topic_ids=None, exchange_name='', credentials=None, description=''): stream_ids = stream_ids or [] exchange_points = exchange_points or [] topic_ids = topic_ids or [] exchange_name = exchange_name or name validate_true(exchange_name, 'Clients must provide an exchange name') log.info('Creating Subscription %s for %s <- %s', name, exchange_name, stream_ids or exchange_points or topic_ids) if not name: name = create_unique_identifier() if stream_ids: validate_is_instance(stream_ids, list, 'stream ids must be in list format') if exchange_points: validate_is_instance(exchange_points, list, 'exchange points must be in list format') if topic_ids: validate_is_instance(topic_ids, list, 'topic ids must be in list format') subscription = Subscription(name=name, description=description) subscription.exchange_points = exchange_points subscription.exchange_name = exchange_name subscription_id, rev = self.clients.resource_registry.create( subscription) self.container.ex_manager.create_xn_queue(exchange_name) xn_ids, _ = self.clients.resource_registry.find_resources( restype=RT.ExchangeName, name=exchange_name, id_only=True) if xn_ids: xn_id = xn_ids[0] self.clients.resource_registry.create_association( xn_id, PRED.hasSubscription, subscription_id) #--------------------------------- # Associations #--------------------------------- for stream_id in stream_ids: self._associate_stream_with_subscription(stream_id, subscription_id) for topic_id in topic_ids: self._associate_topic_with_subscription(topic_id, subscription_id) return subscription_id
def create_topic(self, name='', exchange_point='', parent_topic_id='', description=''): validate_true(exchange_point, 'An exchange point must be provided for the topic') name = name or create_unique_identifier() topic = Topic(name=name, description=description, exchange_point=exchange_point) if parent_topic_id: parent_topic = self.read_topic(parent_topic_id) validate_equal(exchange_point, parent_topic.exchange_point, 'Can not make a sub-topic with a different exchange point') topic_id, rev = self.clients.resource_registry.create(topic) if parent_topic_id: self._associate_topic_with_topic(parent_topic_id, topic_id) return topic_id
def create_stream_definition(self, name='', parameter_dictionary=None, parameter_dictionary_id='', stream_type='', description='', available_fields=None, stream_configuration=None): parameter_dictionary = parameter_dictionary or {} stream_configuration = stream_configuration or {} existing = self.clients.resource_registry.find_resources( restype=RT.StreamDefinition, name=name, id_only=True)[0] if name and existing: if parameter_dictionary_id: pdict_ids, _ = self.clients.resource_registry.find_objects( subject=existing[0], predicate=PRED.hasParameterDictionary, id_only=True) if pdict_ids and parameter_dictionary_id == pdict_ids[0]: return existing[0] else: raise Conflict( 'StreamDefinition with the specified name already exists. (%s)' % name) stream_def = self.read_stream_definition(existing[0]) if self._compare_pdicts(parameter_dictionary, stream_def.parameter_dictionary): return existing[0] raise Conflict( 'StreamDefinition with the specified name already exists. (%s)' % name) name = name or create_unique_identifier() stream_definition = StreamDefinition( parameter_dictionary=parameter_dictionary, stream_type=stream_type, name=name, description=description, available_fields=available_fields, stream_configuration=stream_configuration) stream_definition_id, _ = self.clients.resource_registry.create( stream_definition) if parameter_dictionary_id: self._associate_pdict_with_definition(parameter_dictionary_id, stream_definition_id) return stream_definition_id
def create_subscription(self, name='', stream_ids=None, exchange_points=None, topic_ids=None, exchange_name='', credentials=None, description='', data_product_ids=[]): stream_ids = stream_ids or [] exchange_points = exchange_points or [] topic_ids = topic_ids or [] data_product_ids = data_product_ids or [] exchange_name = exchange_name or name validate_true(exchange_name, 'Clients must provide an exchange name') log.info('Creating Subscription %s for %s <- %s', name, exchange_name, stream_ids or exchange_points or topic_ids) if not name: name = create_unique_identifier() if stream_ids: validate_is_instance(stream_ids, list, 'stream ids must be in list format') if exchange_points: validate_is_instance(exchange_points, list, 'exchange points must be in list format') if topic_ids: validate_is_instance(topic_ids, list, 'topic ids must be in list format') subscription = Subscription(name=name, description=description) subscription.exchange_points = exchange_points subscription.exchange_name = exchange_name subscription_id, rev = self.clients.resource_registry.create(subscription) self.container.ex_manager.create_xn_queue(exchange_name) xn_ids, _ = self.clients.resource_registry.find_resources(restype=RT.ExchangeName, name=exchange_name, id_only=True) if xn_ids: xn_id = xn_ids[0] self.clients.resource_registry.create_association(xn_id, PRED.hasSubscription, subscription_id) #--------------------------------- # Associations #--------------------------------- for stream_id in stream_ids: self._associate_stream_with_subscription(stream_id, subscription_id) for topic_id in topic_ids: self._associate_topic_with_subscription(topic_id, subscription_id) for data_product_id in data_product_ids: self._associate_data_product_with_subscription(data_product_id, subscription_id) return subscription_id
def create_event_process_definition(self, version='', module='', class_name='', uri='', arguments=None, event_types=None, sub_types=None, origin_types=None): """ Create a resource which defines the processing of events. @param version str @param module str @param class_name str @param uri str @param arguments list @return procdef_id str """ # Create the event process detail object event_process_definition_detail = EventProcessDefinitionDetail() event_process_definition_detail.event_types = event_types event_process_definition_detail.sub_types = sub_types event_process_definition_detail.origin_types = origin_types # Create the process definition process_definition = ProcessDefinition( name=create_unique_identifier('event_process')) process_definition.executable = { 'module': module, 'class': class_name, 'url': uri } process_definition.version = version process_definition.arguments = arguments process_definition.definition = event_process_definition_detail procdef_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition) return procdef_id
def _start_data_subscribers(self, count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._async_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info('Received message on %s (%s,%s)', stream_id, stream_route.exchange_point, stream_route.routing_key) #print '######################## stream message:' #print str(message) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() for (stream_name, stream_config) in self._stream_config.iteritems(): if stream_name == 'parsed': # Create subscription for parsed stream only. stream_id = stream_config['stream_id'] from pyon.util.containers import create_unique_identifier # exchange_name = '%s_queue' % stream_name exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) print 'stream_id: %s' % stream_id sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = sub_id # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def create_topic(self, name='', exchange_point='', parent_topic_id='', description=''): validate_true(exchange_point, 'An exchange point must be provided for the topic') name = name or create_unique_identifier() topic = Topic(name=name, description=description, exchange_point=exchange_point) if parent_topic_id: parent_topic = self.read_topic(parent_topic_id) validate_equal( exchange_point, parent_topic.exchange_point, 'Can not make a sub-topic with a different exchange point') topic_id, rev = self.clients.resource_registry.create(topic) if parent_topic_id: self._associate_topic_with_topic(parent_topic_id, topic_id) return topic_id
def _start_data_subscribers(self, count): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) # Create streams and subscriptions for each stream named in driver. self._data_subscribers = [] self._samples_received = [] self._async_sample_result = AsyncResult() # A callback for processing subscribed-to data. def recv_data(message, stream_route, stream_id): log.info("Received message on %s (%s,%s)", stream_id, stream_route.exchange_point, stream_route.routing_key) self._samples_received.append(message) if len(self._samples_received) == count: self._async_sample_result.set() for (stream_name, stream_config) in self._stream_config.iteritems(): stream_id = stream_config["stream_id"] # Create subscriptions for each stream. from pyon.util.containers import create_unique_identifier # exchange_name = '%s_queue' % stream_name exchange_name = create_unique_identifier("%s_queue" % stream_name) self._purge_queue(exchange_name) sub = StandaloneStreamSubscriber(exchange_name, recv_data) sub.start() self._data_subscribers.append(sub) print "stream_id: %s" % stream_id sub_id = pubsub_client.create_subscription(name=exchange_name, stream_ids=[stream_id]) pubsub_client.activate_subscription(sub_id) sub.subscription_id = ( sub_id ) # Bind the subscription to the standalone subscriber (easier cleanup, not good in real practice)
def create_subscription(self, name='', stream_ids=None, exchange_points=None, topic_ids=None, exchange_name='', credentials=None, description=''): stream_ids = stream_ids or [] exchange_points = exchange_points or [] topic_ids = topic_ids or [] exchange_name = exchange_name or name validate_true(exchange_name, 'Clients must provide an exchange name') log.info('Creating Subscription %s for %s <- %s', name, exchange_name, stream_ids or exchange_points or topic_ids) if not name: name = create_unique_identifier() if stream_ids: validate_is_instance(stream_ids, list, 'stream ids must be in list format') if exchange_points: validate_is_instance(exchange_points, list, 'exchange points must be in list format') if topic_ids: validate_is_instance(topic_ids, list, 'topic ids must be in list format') subscription = Subscription(name=name, description=description) subscription.exchange_points = exchange_points subscription.exchange_name = exchange_name subscription_id, rev = self.clients.resource_registry.create(subscription) #--------------------------------- # Associations #--------------------------------- for stream_id in stream_ids: self._associate_stream_with_subscription(stream_id, subscription_id) for topic_id in topic_ids: self._associate_topic_with_subscription(topic_id, subscription_id) return subscription_id
def create_data_process(self, data_process_definition_id='', in_data_product_ids=None, out_data_products=None, configuration=None): """ @param data_process_definition_id: Object with definition of the transform to apply to the input data product @param in_data_product_ids: ID of the input data products @param out_data_products: list of IDs of the output data products @retval data_process_id: ID of the newly created data process object """ inform = "Input Data Product: "+str(in_data_product_ids)+\ "Transformed by: "+str(data_process_definition_id)+\ "To create output Product: "+str(out_data_products) log.debug("DataProcessManagementService:create_data_process()\n" + inform) if configuration is None: configuration = {} if in_data_product_ids is None: in_data_product_ids = [] if out_data_products is None: out_data_products = {} # Create and store a new DataProcess with the resource registry log.debug("DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry") data_process_def_obj = self.read_data_process_definition(data_process_definition_id) data_process_name = create_unique_identifier("process_" + data_process_def_obj.name) self.data_process = IonObject(RT.DataProcess, name=data_process_name) data_process_id, version = self.clients.resource_registry.create(self.data_process) log.debug("DataProcessManagementService:create_data_process - Create and store a new DataProcess with the resource registry data_process_id: %s" + str(data_process_id)) # Associate with dataProcessDefinition aid = self.clients.resource_registry.create_association(data_process_id, PRED.hasProcessDefinition, data_process_definition_id) log.debug("DataProcessManagementService:create_data_process assoc to data process definition: %s", str(aid)) # Register the data process instance as a data producer with DataAcquisitionMgmtSvc data_producer_id = self.clients.data_acquisition_management.register_process(data_process_id) log.debug("DataProcessManagementService:create_data_process register process with DataAcquisitionMgmtSvc: data_producer_id: %s (L4-CI-SA-RQ-181)", str(data_producer_id) ) # Register each output data product with DAMS to create DataProducer links output_stream_dict = {} if out_data_products is None: raise BadRequest("Data Process must have output product(s) specified %s", str(data_process_definition_id) ) for name, out_data_product_id in out_data_products.iteritems(): # check that the product is not already associated with a producer producer_ids, _ = self.clients.resource_registry.find_objects(out_data_product_id, PRED.hasDataProducer, RT.DataProducer, True) if producer_ids: raise BadRequest("Data Product should not already be associated to a DataProducer %s hasDataProducer %s", str(data_process_id), str(producer_ids[0])) #Assign each output Data Product to this producer resource out_data_product_obj = self.clients.resource_registry.read(out_data_product_id) if not out_data_product_obj: raise NotFound("Output Data Product %s does not exist" % out_data_product_id) # Associate with DataProcess: register as an output product for this process log.debug("DataProcessManagementService:create_data_process link data process %s and output out data product: %s (L4-CI-SA-RQ-260)", str(data_process_id), str(out_data_product_id)) self.clients.data_acquisition_management.assign_data_product(data_process_id, out_data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.clients.resource_registry.find_objects(out_data_product_id, PRED.hasStream, RT.Stream, True) log.debug("DataProcessManagementService:create_data_process retrieve out data prod streams: %s", str(stream_ids)) if not stream_ids: raise NotFound("No Stream created for output Data Product " + str(out_data_product_id)) if len(stream_ids) != 1: raise BadRequest("Data Product should only have ONE stream at this time" + str(out_data_product_id)) output_stream_dict[name] = stream_ids[0] log.debug("DataProcessManagementService:create_data_process -Register the data process instance as a data producer with DataAcquisitionMgmtSvc, then retrieve the id of the OUTPUT stream out_stream_id: " + str(output_stream_dict[name])) #check if data process definition has lookup tables attached that should be passed to the tansfrom as part of configuration self._find_lookup_tables(data_process_definition_id, configuration) #Todo: currently this is handled explicitly after creating the data product, that code then calls DMAS:assign_data_product log.debug("DataProcessManagementService:create_data_process associate data process workflows with source data products %s hasInputProducts %s (L4-CI-SA-RQ-260)", str(data_process_id), str(in_data_product_ids)) for in_data_product_id in in_data_product_ids: self.clients.resource_registry.create_association(data_process_id, PRED.hasInputProduct, in_data_product_id) #check if in data product is attached to an instrument, check instrumentDevice and InstrumentModel for lookup table attachments instdevice_ids, _ = self.clients.resource_registry.find_subjects(RT.InstrumentDevice, PRED.hasOutputProduct, in_data_product_id, True) for instdevice_id in instdevice_ids: log.debug("DataProcessManagementService:create_data_process instrument device_id assoc to the input data product of this data process: %s (L4-CI-SA-RQ-231)", str(instdevice_id)) self._find_lookup_tables(instdevice_id, configuration) instmodel_ids, _ = self.clients.resource_registry.find_objects(instdevice_id, PRED.hasModel, RT.InstrumentModel, True) for instmodel_id in instmodel_ids: log.debug("DataProcessManagementService:create_data_process instmodel_id assoc to the instDevice: %s", str(instmodel_id)) self._find_lookup_tables(instmodel_id, configuration) # Create subscription from in_data_product, which should already be associated with a stream via the Data Producer in_stream_ids = [] # get the streams associated with this IN data products for in_data_product_id in in_data_product_ids: log.debug("DataProcessManagementService:create_data_process - get the stream associated with this IN data product") stream_ids, _ = self.clients.resource_registry.find_objects(in_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound("No Stream created for this IN Data Product " + str(in_data_product_id)) if len(stream_ids) != 1: raise BadRequest("IN Data Product should only have ONE stream at this time" + str(in_data_product_id)) log.debug("DataProcessManagementService:create_data_process - get the stream associated with this IN data product: %s in_stream_id: %s ", str(in_data_product_id), str(stream_ids[0])) in_stream_ids.append(stream_ids[0]) # create a subscription to the input stream log.debug("DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream") query = StreamQuery(stream_ids=in_stream_ids) input_subscription_id = self.clients.pubsub_management.create_subscription(query=query, exchange_name=data_process_name) log.debug("DataProcessManagementService:create_data_process - Finally - create a subscription to the input stream input_subscription_id" + str(input_subscription_id)) # add the subscription id to the resource for clean up later data_process_obj = self.clients.resource_registry.read(data_process_id) data_process_obj.input_subscription_id = input_subscription_id self.clients.resource_registry.update(data_process_obj) procdef_ids,_ = self.clients.resource_registry.find_objects(data_process_definition_id, PRED.hasProcessDefinition, RT.ProcessDefinition, id_only=True) if not procdef_ids: raise BadRequest("Cannot find associated ProcessDefinition for DataProcessDefinition id=%s" % data_process_definition_id) process_definition_id = procdef_ids[0] # Launch the transform process log.debug("DataProcessManagementService:create_data_process - Launch the transform process: ") log.debug("DataProcessManagementService:create_data_process - input_subscription_id: " + str(input_subscription_id) ) log.debug("DataProcessManagementService:create_data_process - out_stream_id: " + str(output_stream_dict) ) log.debug("DataProcessManagementService:create_data_process - process_definition_id: " + str(process_definition_id) ) log.debug("DataProcessManagementService:create_data_process - data_process_id: " + str(data_process_id) ) transform_id = self.clients.transform_management.create_transform( name=data_process_id, description=data_process_id, in_subscription_id=input_subscription_id, out_streams=output_stream_dict, process_definition_id=process_definition_id, configuration=configuration) log.debug("DataProcessManagementService:create_data_process - transform_id: " + str(transform_id) ) self.clients.resource_registry.create_association(data_process_id, PRED.hasTransform, transform_id) log.debug("DataProcessManagementService:create_data_process - Launch the first transform process transform_id" + str(transform_id)) # TODO: Flesh details of transform mgmt svc schedule method # self.clients.transform_management_service.schedule_transform(transform_id) return data_process_id
def test_create_unique_identifier(self): id = create_unique_identifier('abc123') self.assertIn('abc123', id) id = create_unique_identifier() self.assertNotIn('abc123', id)
def create_stream(self, name='', exchange_point='', topic_ids=None, credentials=None, stream_definition_id='', description=''): # Argument Validation if name and self.clients.resource_registry.find_resources( restype=RT.Stream, name=name, id_only=True)[0]: raise Conflict('The named stream already exists') validate_true(exchange_point, 'An exchange point must be specified') exchange_point_id = None try: xp_obj = self.clients.exchange_management.read_exchange_point( exchange_point) exchange_point_id = exchange_point exchange_point = xp_obj.name except NotFound: self.container.ex_manager.create_xp(exchange_point) xp_objs, _ = self.clients.resource_registry.find_resources( restype=RT.ExchangePoint, name=exchange_point, id_only=True) if not xp_objs: raise BadRequest('failed to create an ExchangePoint: ' + exchange_point) exchange_point_id = xp_objs[0] topic_ids = topic_ids or [] if not name: name = create_unique_identifier() # Get topic names and topics topic_names = [] associated_topics = [] for topic_id in topic_ids: topic = self.read_topic(topic_id) if topic.exchange_point == exchange_point: topic_names.append(self._sanitize(topic.name)) associated_topics.append(topic_id) else: log.warning( 'Attempted to attach stream %s to topic %s with different exchange points', name, topic.name) stream = Stream(name=name, description=description) routing_key = '.'.join([self._sanitize(name)] + topic_names + ['stream']) if len(routing_key) > 255: raise BadRequest('There are too many topics for this.') stream.stream_route.exchange_point = exchange_point stream.stream_route.routing_key = routing_key #@todo: validate credentials stream.stream_route.credentials = credentials stream_id, rev = self.clients.resource_registry.create(stream) self._associate_stream_with_exchange_point(stream_id, exchange_point_id) if stream_definition_id: #@Todo: what if the stream has no definition?! self._associate_stream_with_definition(stream_id, stream_definition_id) for topic_id in associated_topics: self._associate_topic_with_stream(topic_id, stream_id) log.info('Stream %s: %s', name, routing_key) return stream_id, stream.stream_route
def initiate_realtime_visualization(self, data_product_id='', visualization_parameters=None, callback=""): """Initial request required to start a realtime chart for the specified data product. Returns a user specific token associated with the request that will be required for subsequent requests when polling data. Note : The in_product_type is a temporary fix for specifying the type of data product to the the service @param data_product_id str @param in_product_type str @param query str @param callback str @throws NotFound Throws if specified data product id or its visualization product does not exist """ query = None if visualization_parameters: if visualization_parameters.has_key('query'): query=visualization_parameters['query'] # Perform a look up to check and see if the DP is indeed a realtime GDT stream if not data_product_id: raise BadRequest("The data_product_id parameter is missing") data_product = self.clients.resource_registry.read(data_product_id) if not data_product: raise NotFound("Data product %s does not exist" % data_product_id) data_product_stream_id = None workflow_def_id = None # Check to see if the workflow defnition already exist workflow_def_ids,_ = self.clients.resource_registry.find_resources(restype=RT.WorkflowDefinition, name='Realtime_Google_DT', id_only=True) if len(workflow_def_ids) > 0: workflow_def_id = workflow_def_ids[0] else: workflow_def_id = self._create_google_dt_workflow_def() #Create and start the workflow. Take about 4 secs .. wtf workflow_id, workflow_product_id = self.clients.workflow_management.create_data_process_workflow(workflow_def_id, data_product_id, timeout=20) # detect the output data product of the workflow workflow_dp_ids,_ = self.clients.resource_registry.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True) if len(workflow_dp_ids) != 1: raise ValueError("Workflow Data Product ids representing output DP must contain exactly one entry") # find associated stream id with the output workflow_output_stream_ids, _ = self.clients.resource_registry.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasStream, None, True) data_product_stream_id = workflow_output_stream_ids # Create a queue to collect the stream granules - idempotency saves the day! query_token = create_unique_identifier('user_queue') xq = self.container.ex_manager.create_xn_queue(query_token) subscription_id = self.clients.pubsub_management.create_subscription( stream_ids=data_product_stream_id, exchange_name = query_token, name = query_token ) # after the queue has been created it is safe to activate the subscription self.clients.pubsub_management.activate_subscription(subscription_id) if callback == "": return query_token else: return callback + "(\"" + query_token + "\")"
def template_tst_deployment_context(self, context=None): """ Creates a minimal deployment: 1 instrument, 1 site. deployment context must be provided """ c = self.client c2 = DotDict() c2.resource_registry = self.client.RR log.info("Create a instrument model") instrument_model_id = self.perform_fcruf_script(RT.InstrumentModel, "instrument_model", self.client.IMS) log.info("Create an instrument device") instrument_device_id = self.perform_fcruf_script(RT.InstrumentDevice, "instrument_device", self.client.IMS) log.info("Create instrument site") instrument_site_id = self.perform_fcruf_script(RT.InstrumentSite, "instrument_site", self.client.OMS) log.info("Associate instrument model with instrument site") self.perform_association_script(c.OMS.assign_instrument_model_to_instrument_site, self.RR2.find_instrument_sites_by_instrument_model_using_has_model, self.RR2.find_instrument_models_of_instrument_site_using_has_model, instrument_site_id, instrument_model_id) log.info("Associate instrument model with instrument device") self.perform_association_script(c.IMS.assign_instrument_model_to_instrument_device, c.IMS.find_instrument_device_by_instrument_model, c.IMS.find_instrument_model_by_instrument_device, instrument_device_id, instrument_model_id) log.info("Create a stream definition for the data from the ctd simulator") pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.client.PSMS.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) log.info("Create an IonObject for a data products") dp_obj = self.create_data_product_obj() dp_obj.name = create_unique_identifier('Inst Data Product') inst_data_product_id = c.DPMS.create_data_product(dp_obj, ctd_stream_def_id) #assign data products appropriately c.DAMS.assign_data_product(input_resource_id=instrument_device_id, data_product_id=inst_data_product_id) deployment_obj = any_old(RT.Deployment, dict(context=context)) deployment_id = c.OMS.create_deployment(deployment_obj) c.OMS.assign_site_to_deployment(instrument_site_id, deployment_id) c.OMS.assign_device_to_deployment(instrument_device_id, deployment_id) c.OMS.activate_deployment(deployment_id, True) # cleanup self.RR2.pluck(instrument_model_id) self.RR2.pluck(deployment_id) self.RR2.pluck(instrument_device_id) c.IMS.force_delete_instrument_model(instrument_model_id) c.IMS.force_delete_instrument_device(instrument_device_id) c.OMS.force_delete_instrument_site(instrument_site_id) c.OMS.force_delete_deployment(deployment_id)
def test_create_unique_identifier(self): id = create_unique_identifier('abc123') self.assertIn('abc123', id) id = create_unique_identifier() self.assertNotIn('abc123', id)
def create_data_process(self, data_process_definition_id='', in_data_product_ids=None, out_data_products=None, configuration=None): """ @param data_process_definition_id: Object with definition of the process to apply to the input data product @param in_data_product_ids: ID of the input data products @param out_data_products: list of IDs of the output data products @retval data_process_id: ID of the newly created data process object """ inform = "Input Data Product: "+str(in_data_product_ids)+\ "\nTransformed by: "+str(data_process_definition_id)+\ "\nTo create output Product: "+str(out_data_products) + "\n" log.debug("DataProcessManagementService:create_data_process() method called with parameters:\n" + inform) #--------------------------------------------------------------------------------------- # Initialize #--------------------------------------------------------------------------------------- configuration = configuration or DotDict() validate_is_not_none( out_data_products, "No output data products passed in") #--------------------------------------------------------------------------------------- # Read the data process definition #--------------------------------------------------------------------------------------- data_process_definition = self.read_data_process_definition(data_process_definition_id) #--------------------------------------------------------------------------------------- # Read the output bindings from the definition #--------------------------------------------------------------------------------------- output_bindings = data_process_definition.output_bindings #--------------------------------------------------------------------------------------- # Find the process definition associated with this data process definition. # From the process definition, we can get the module and class to run.... #--------------------------------------------------------------------------------------- procdef_ids,_ = self.clients.resource_registry.find_objects(data_process_definition_id, PRED.hasProcessDefinition, RT.ProcessDefinition, id_only=True) if not procdef_ids: raise BadRequest("Cannot find associated ProcessDefinition for DataProcessDefinition id=%s" % data_process_definition_id) process_definition_id = procdef_ids[0] #--------------------------------------------------------------------------------------- # Create a data process object and register it #--------------------------------------------------------------------------------------- # get the name of the data process and create an IONObject for it data_process_name = create_unique_identifier("process_" + data_process_definition.name) data_process_obj = IonObject(RT.DataProcess, name=data_process_name) # register the data process data_process_id, version = self.clients.resource_registry.create(data_process_obj) data_process_obj = self.clients.resource_registry.read(data_process_id) #--------------------------------------------------------------------------------------- # Make the necessary associations, registering #--------------------------------------------------------------------------------------- #todo check if this assoc is needed? # Associate the data process with the data process definition self.clients.resource_registry.create_association(data_process_id, PRED.hasProcessDefinition, data_process_definition_id) # Register the data process instance as a data producer with DataAcquisitionMgmtSvc data_producer_id = self.clients.data_acquisition_management.register_process(data_process_id) log.debug("DataProcessManagementService:create_data_process register process " "with DataAcquisitionMgmtSvc: data_producer_id: %s (L4-CI-SA-RQ-181)", str(data_producer_id) ) #--------------------------------------------------------------------------------------- # Register each output data product with DAMS to create DataProducer links #--------------------------------------------------------------------------------------- output_stream_dict = {} if out_data_products is None: raise BadRequest("Data Process must have output product(s) specified %s", str(data_process_definition_id) ) for binding, output_data_product_id in out_data_products.iteritems(): # check that the product is not already associated with a producer producer_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasDataProducer, RT.DataProducer, True) if producer_ids: raise BadRequest("Data Product should not already be associated to a DataProducer %s hasDataProducer %s", str(data_process_id), str(producer_ids[0])) #Assign each output Data Product to this producer resource output_data_product_obj = self.clients.resource_registry.read(output_data_product_id) if not output_data_product_obj: raise NotFound("Output Data Product %s does not exist" % output_data_product_id) # Associate with DataProcess: register as an output product for this process log.debug("Link data process %s and output out data product: %s (L4-CI-SA-RQ-260)", str(data_process_id), str(output_data_product_id)) self.clients.data_acquisition_management.assign_data_product(input_resource_id= data_process_id,data_product_id= output_data_product_id) # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.clients.resource_registry.find_objects(output_data_product_id, PRED.hasStream, RT.Stream, True) if not stream_ids: raise NotFound("No Stream created for output Data Product " + str(output_data_product_id)) if len(stream_ids) != 1: raise BadRequest("Data Product should only have ONE stream at this time" + str(output_data_product_id)) output_stream_dict[binding] = stream_ids[0] #------------------------------------------------------------------------------------------------------------------------------------------ #Check for attached objects and put them into the configuration #------------------------------------------------------------------------------------------------------------------------------------------ # check for attachments in data process definition configuration = self._find_lookup_tables(data_process_definition_id, configuration) input_stream_ids = [] if in_data_product_ids: for in_data_product_id in in_data_product_ids: self.clients.resource_registry.create_association(data_process_id, PRED.hasInputProduct, in_data_product_id) log.debug("Associate data process workflows with source data products %s " "hasInputProducts %s (L4-CI-SA-RQ-260)", str(data_process_id), str(in_data_product_ids)) #check if in data product is attached to an instrument, check instrumentDevice and InstrumentModel for lookup table attachments instdevice_ids, _ = self.clients.resource_registry.find_subjects(RT.InstrumentDevice, PRED.hasOutputProduct, in_data_product_id, True) for instdevice_id in instdevice_ids: log.debug("Instrument device_id assoc to the input data product of this data process: %s (L4-CI-SA-RQ-231)", str(instdevice_id)) # check for attachments in instrument device configuration = self._find_lookup_tables(instdevice_id, configuration) instmodel_ids, _ = self.clients.resource_registry.find_objects(instdevice_id, PRED.hasModel, RT.InstrumentModel, True) for instmodel_id in instmodel_ids: # check for attachments in instrument model configuration = self._find_lookup_tables(instmodel_id, configuration) #------------------------------------------------------------------------------------------------------------------------------------------ # Get the input stream from the input_data_product, which should already be associated with a stream via the Data Producer #------------------------------------------------------------------------------------------------------------------------------------------ input_stream_ids = self._get_input_stream_ids(in_data_product_ids) #------------------------------------------------------------------------------------------------------------------------------------------ # Create subscription to the input stream #------------------------------------------------------------------------------------------------------------------------------------------ input_subscription_id = self.clients.pubsub_management.create_subscription(name=data_process_name, stream_ids=input_stream_ids) #------------------------------------------------------------------------------------------------------------------------------------------ # Add the subscription id to the data process #------------------------------------------------------------------------------------------------------------------------------------------ data_process_obj.input_subscription_id = input_subscription_id log.info("Launching the process") debug_str = "\n\tQueue Name: %s\n\tOutput Streams: %s\n\tProcess Definition ID: %s\n\tConfiguration: %s" % (data_process_name, output_stream_dict, process_definition_id, configuration) log.debug(debug_str) pid = self._launch_process( queue_name=data_process_name, out_streams=output_stream_dict, process_definition_id=process_definition_id, configuration=configuration) data_process_obj.process_id = pid self.clients.resource_registry.update(data_process_obj) return data_process_id
def create_data_process_workflow(self, workflow_definition_id='', input_data_product_id='', persist_workflow_data_product=True, output_data_product_name='', configuration={}): """Instantiates a Data Process Workflow specified by a Workflow Definition resource and an input data product id. Returns the id of the workflow and the data product id for the final output product. @param workflow_definition_id str @param input_data_product_id str @param persist_workflow_data_product bool @param output_data_product_name str @param configuration IngestionConfiguration @retval workflow_id str @retval output_data_product_id str @throws BadRequest if any of the required parameters are not set @throws NotFound object with specified id does not exist """ if not workflow_definition_id: raise BadRequest("The workflow_definition_id parameter is missing") workflow_definition = self.clients.resource_registry.read(workflow_definition_id) if not workflow_definition: raise NotFound("WorkflowDefinition %s does not exist" % workflow_definition_id) if not input_data_product_id: raise BadRequest("The input_data_product_id parameter is missing") input_data_product = self.clients.resource_registry.read(input_data_product_id) if not input_data_product: raise NotFound("The input data product %s does not exist" % input_data_product_id) if output_data_product_name: workflow_name = '%s_%s' % (workflow_definition.name, output_data_product_name) else: workflow_name = create_unique_identifier('workflow_%s' % (workflow_definition.name)) #Create Workflow object and associations to track the instantiation of a work flow definition. workflow = IonObject(RT.Workflow, name=workflow_name, persist_process_output_data=persist_workflow_data_product, output_data_product_name=output_data_product_name, configuration=configuration) workflow_id, _ = self.clients.resource_registry.create(workflow) self.clients.resource_registry.create_association(workflow_id, PRED.hasDefinition,workflow_definition_id ) self.clients.resource_registry.create_association(workflow_id, PRED.hasInputProduct,input_data_product_id ) #Setup the input data product id as the initial input product stream data_process_input_dp_id = input_data_product_id output_data_products = {} output_data_product_id = None # Overall product id to return #Iterate through the workflow steps to setup the data processes and connect them together. for wf_step in workflow_definition.workflow_steps: log.debug("wf_step.data_process_definition_id: %s" , wf_step.data_process_definition_id) data_process_definition = self.clients.resource_registry.read(wf_step.data_process_definition_id) for binding, stream_definition_id in data_process_definition.output_bindings.iteritems(): #-------------------------------------------------------------------------------- # Create an output data product for each binding/stream definition #-------------------------------------------------------------------------------- #Handle the last step differently as it is the output of the workflow. if wf_step == workflow_definition.workflow_steps[-1] and output_data_product_name: data_product_name = output_data_product_name else: data_product_name = create_unique_identifier('%s_%s_%s' % (workflow_name, data_process_definition.name, binding)) tdom, sdom = time_series_domain() data_product_obj = IonObject(RT.DataProduct, name = data_product_name, description = data_process_definition.description, temporal_domain = tdom.dump(), spatial_domain = sdom.dump()) data_product_id = self.clients.data_product_management.create_data_product(data_product_obj, stream_definition_id=stream_definition_id) # Persist if necessary if wf_step == workflow_definition.workflow_steps[-1] and persist_workflow_data_product: self.clients.data_product_management.activate_data_product_persistence(data_product_id=data_product_id) else: if wf_step.persist_process_output_data: self.clients.data_product_management.activate_data_product_persistence(data_product_id=data_product_id) #Associate the intermediate data products with the workflow self.clients.resource_registry.create_association(workflow_id, PRED.hasDataProduct, data_product_id ) output_data_products[binding] = data_product_id #May have to merge configuration blocks where the workflow entries will override the configuration in a step if configuration: process_config = dict(wf_step.configuration.items() + configuration.items()) else: process_config = wf_step.configuration data_process_id = self.clients.data_process_management.create_data_process(data_process_definition._id, [data_process_input_dp_id], output_data_products, configuration=process_config) self.clients.data_process_management.activate_data_process(data_process_id) #Track the the data process with an association to the workflow self.clients.resource_registry.create_association(workflow_id, PRED.hasDataProcess, data_process_id ) #last one out of the for loop is the output product id output_data_product_id = output_data_products.values()[0] #Save the id of the output data stream for input to the next process in the workflow. data_process_input_dp_id = output_data_products.values()[0] #Track the output data product with an association self.clients.resource_registry.create_association(workflow_id, PRED.hasOutputProduct, output_data_product_id ) return workflow_id, output_data_product_id
def create_data_process_workflow(self, workflow_definition_id='', input_data_product_id=''): """Instantiates a Data Process Workflow specified by a Workflow Definition resource and an input data product id. Returns the id of the workflow and the data product id for the final output product. @param workflow_definition_id str @param input_data_product_id str @retval workflow_id str @retval output_data_product_id str @throws BadRequest if any of the required parameters are not set @throws NotFound object with specified id does not exist """ if not workflow_definition_id: raise BadRequest("The workflow_definition_id parameter is missing") workflow_definition = self.clients.resource_registry.read(workflow_definition_id) if not workflow_definition: raise NotFound("WorkflowDefinition %s does not exist" % workflow_definition_id) if not input_data_product_id: raise BadRequest("The input_data_product_id parameter is missing") input_data_product = self.clients.resource_registry.read(input_data_product_id) if not input_data_product: raise NotFound("The input data product %s does not exist" % input_data_product_id) #Create Workflow object and associations to track the instantiation of a work flow definition. workflow = IonObject(RT.Workflow, name=workflow_definition.name) workflow_id, _ = self.clients.resource_registry.create(workflow) self.clients.resource_registry.create_association(workflow_id, PRED.hasDefinition,workflow_definition_id ) self.clients.resource_registry.create_association(workflow_id, PRED.hasInputProduct,input_data_product_id ) #Setup the input data product id as the initial input product stream data_process_input_dp_id = input_data_product_id output_data_product_id = None #Iterate through the workflow steps to setup the data processes and connect them together. for wf_step in workflow_definition.workflow_steps: log.debug("wf_step.data_process_definition_id: " + wf_step.data_process_definition_id) data_process_definition = self.clients.resource_registry.read(wf_step.data_process_definition_id) # Find the link between the output Stream Definition resource and the Data Process Definition resource stream_ids,_ = self.clients.resource_registry.find_objects(data_process_definition._id, PRED.hasStreamDefinition, RT.StreamDefinition, id_only=True) if not stream_ids: raise Inconsistent("The data process definition %s is missing an association to an output stream definition" % data_process_definition._id ) process_output_stream_def_id = stream_ids[0] #If an output name has been specified than use it for the final output product name if wf_step.output_data_product_name is not '': data_product_name = wf_step.output_data_product_name else: #Concatenate the name of the workflow and data process definition for the name of the data product output + plus #a unique identifier for multiple instances of a workflow definition. data_product_name = create_unique_identifier(workflow_definition.name + '_' + data_process_definition.name) # Create the output data product of the transform transform_dp_obj = IonObject(RT.DataProduct, name=data_product_name,description=data_process_definition.description) transform_dp_id = self.clients.data_product_management.create_data_product(transform_dp_obj, process_output_stream_def_id) if wf_step.persist_process_output_data: self.clients.data_product_management.activate_data_product_persistence(data_product_id=transform_dp_id, persist_data=wf_step.persist_process_output_data, persist_metadata=wf_step.persist_process_output_data) #Associate the intermediate data products with the workflow self.clients.resource_registry.create_association(workflow_id, PRED.hasDataProduct, transform_dp_id ) # Create the transform data process log.debug("create data_process and start it") data_process_id = self.clients.data_process_management.create_data_process(data_process_definition._id, [data_process_input_dp_id], {'output':transform_dp_id}, configuration=wf_step.configuration) self.clients.data_process_management.activate_data_process(data_process_id) #Track the the data process with an association to the workflow self.clients.resource_registry.create_association(workflow_id, PRED.hasDataProcess, data_process_id ) #last one out of the for loop is the output product id output_data_product_id = transform_dp_id #Save the id of the output data stream for input to the next process in the workflow. data_process_input_dp_id = transform_dp_id #Track the output data product with an association self.clients.resource_registry.create_association(workflow_id, PRED.hasOutputProduct, output_data_product_id ) return workflow_id, output_data_product_id
def initiate_realtime_visualization_data(self, data_product_id='', visualization_parameters=None): """Initial request required to start a realtime chart for the specified data product. Returns a user specific token associated with the request that will be required for subsequent requests when polling data. Note : The in_product_type is a temporary fix for specifying the type of data product to the the service @param data_product_id str @param in_product_type str @param query str @throws NotFound Throws if specified data product id or its visualization product does not exist """ log.debug("initiate_realtime_visualization Vis worker: %s ", self.id) # Load the visualization_parameter, which is passed as string in to its representative dict query = None vp = None if visualization_parameters: vp = simplejson.loads(visualization_parameters) if vp: if vp.has_key('query'): query = vp['query'] # Perform a look up to check and see if the DP is indeed a realtime GDT stream if not data_product_id: raise BadRequest("The data_product_id parameter is missing") data_product = self.clients.resource_registry.read(data_product_id) if not data_product: raise NotFound("Data product %s does not exist" % data_product_id) #Look for a workflow which is already executing for this data product workflow_ids, _ = self.clients.resource_registry.find_subjects( subject_type=RT.Workflow, predicate=PRED.hasInputProduct, object=data_product_id) #If an existing workflow is not found then create one if len(workflow_ids) == 0: #TODO - Add this workflow definition to preload data # Check to see if the workflow defnition already exist workflow_def_ids, _ = self.clients.resource_registry.find_resources( restype=RT.WorkflowDefinition, name='Realtime_HighCharts', id_only=True) if len(workflow_def_ids) > 0: workflow_def_id = workflow_def_ids[0] else: raise NotFound( " Workflow definition named 'Realtime_HighCharts' does not exist" ) #Create and start the workflow workflow_id, workflow_product_id = self.clients.workflow_management.create_data_process_workflow( workflow_definition_id=workflow_def_id, input_data_product_id=data_product_id, persist_workflow_data_product=PERSIST_REALTIME_DATA_PRODUCTS, timeout=self.create_workflow_timeout) else: workflow_id = workflow_ids[0]._id # detect the output data product of the workflow workflow_dp_ids, _ = self.clients.resource_registry.find_objects( subject=workflow_id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct, id_only=True) if len(workflow_dp_ids) < 1: log.error( "Could not find output data product for existing workflow : ", workflow_id) raise NotFound( "Could not find output data product for existing workflow : ", workflow_id) workflow_product_id = workflow_dp_ids[0] # find associated stream id with the output workflow_output_stream_ids, _ = self.clients.resource_registry.find_objects( subject=workflow_product_id, predicate=PRED.hasStream, object_type=None, id_only=True) # Create a queue to collect the stream granules - idempotency saves the day! query_token = create_unique_identifier(USER_VISUALIZATION_QUEUE) xq = self.container.ex_manager.create_xn_queue(query_token) subscription_id = self.clients.pubsub_management.create_subscription( stream_ids=workflow_output_stream_ids, exchange_name=query_token, name=query_token) # after the queue has been created it is safe to activate the subscription self.clients.pubsub_management.activate_subscription(subscription_id) ret_dict = {'rt_query_token': query_token} return simplejson.dumps(ret_dict)
def get_data_product_kml(self, visualization_parameters = None): kml_content = "" ui_server = "http://localhost:3000" # This server hosts the UI and is used for creating all embedded links within KML #observatory_icon_file = "/static/img/r2/li.observatories.png" starting_altitude = "20000" observatory_icon_file = "/static/img/Observatory-icon1.png" if visualization_parameters: if "ui_server" in visualization_parameters: ui_server = visualization_parameters["ui_server"] # First step. Discover all Data products in the system dps,_ = self.clients.resource_registry.find_resources(RT.DataProduct, None, None, False) # Start creating the kml in memory # Common KML tags kml_content += "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" kml_content += "<kml xmlns=\"http://www.opengis.net/kml/2.2\">\n" kml_content += "<Document>\n" kml_content += "<name>DataProduct geo-reference information, ID: " + str(create_unique_identifier('google_flush_key')) + "</name>\n" # define line styles. Used for polygons kml_content += "<Style id=\"yellowLine\">\n<LineStyle>\n<color>ff61f2f2</color>\n<width>4</width>\n</LineStyle>\n</Style>" # Embed the icon images. Each as a separate style kml_content += "<Style id=\"observatory-icon\">\n<IconStyle>\n<Icon>\n<href>" kml_content += ui_server + observatory_icon_file kml_content += "</href>\n</Icon>\n</IconStyle>\n</Style>\n" dp_cluster = {} # Several Dataproducts are basically coming from the same geo-location. Start clustering them for dp in dps: _lat_center = _lon_center = 0.0 bounds = dp.geospatial_bounds if bounds == None: continue # approximate placemark position in the middle of the geo spatial bounds _lon_center = bounds.geospatial_longitude_limit_west + (bounds.geospatial_longitude_limit_east - bounds.geospatial_longitude_limit_west) / 2.0 _lat_center = bounds.geospatial_latitude_limit_south + (bounds.geospatial_latitude_limit_north - bounds.geospatial_latitude_limit_south) / 2.0 # generate a key from lon-lat info. Truncate floats to one digit after decimal to keep things sane key = ("%.1f"%_lon_center) + "," + ("%.1f"%_lat_center) # All co-located data products are put in a list if not key in dp_cluster: dp_cluster[key] = [] dp_cluster[key].append(dp) # Enter all DP in to KML placemarks for set_key in dp_cluster: # create only one placemark per set but populate information from all entries within the set _lat_center = _lon_center = 0.0 bounds = dp_cluster[set_key][0].geospatial_bounds if bounds == None: continue # approximate placemark position in the middle of the geo spatial bounds _lon_center = bounds.geospatial_longitude_limit_west + (bounds.geospatial_longitude_limit_east - bounds.geospatial_longitude_limit_west) / 2.0 _lat_center = bounds.geospatial_latitude_limit_south + (bounds.geospatial_latitude_limit_north - bounds.geospatial_latitude_limit_south) / 2.0 # Start Placemark tag for point kml_content += "<Placemark>\n" # name of placemark kml_content += "<name>" kml_content += "Data Products at : " + str(_lat_center) + "," + str(_lon_center) kml_content += "</name>\n" # style /icon for placemark kml_content += "<styleUrl>#observatory-icon</styleUrl>\n" # Description kml_content += "<description>\n<![CDATA[\n" # insert HTML here as a table containing info about the data products html_description = "<table border='1' align='center'>\n" html_description += "<tr> <td><b>Data Product</b></td> <td> <b>ID</b> </b> <td> <b>Preview</b> </td> </tr>\n" for dp in dp_cluster[set_key]: html_description += "<tr>" html_description += "<td>" + dp.name + "</td>" html_description += "<td><a class=\"external\" href=\"" + ui_server + "/DataProduct/face/" + str(dp._id) + "/\">" + str(dp._id) + "</a> </td> " html_description += "<td> </td>" html_description += "</tr>" # Close the table html_description += "</table>" kml_content += html_description kml_content += "\n]]>\n</description>\n" # Point information #_lat_center = _lon_center = 0 ############# REMOVE THIS ************* kml_content += "<Point>\n<coordinates>" + str(_lon_center) + "," + str(_lat_center) + "," + starting_altitude + "</coordinates>\n</Point>\n" # Close Placemark kml_content += "</Placemark>\n" # ------ kml_content += "</Document>\n" kml_content += "</kml>\n" return kml_content
def initiate_realtime_visualization(self, data_product_id='', visualization_parameters=None, callback=""): """Initial request required to start a realtime chart for the specified data product. Returns a user specific token associated with the request that will be required for subsequent requests when polling data. Note : The in_product_type is a temporary fix for specifying the type of data product to the the service @param data_product_id str @param in_product_type str @param query str @param callback str @throws NotFound Throws if specified data product id or its visualization product does not exist """ query = None if visualization_parameters: if visualization_parameters.has_key('query'): query=visualization_parameters['query'] # Perform a look up to check and see if the DP is indeed a realtime GDT stream if not data_product_id: raise BadRequest("The data_product_id parameter is missing") data_product = self.clients.resource_registry.read(data_product_id) if not data_product: raise NotFound("Data product %s does not exist" % data_product_id) data_product_stream_id = None workflow_def_id = None # Check to see if the workflow defnition already exist workflow_def_ids,_ = self.clients.resource_registry.find_resources(restype=RT.WorkflowDefinition, name='Realtime_Google_DT', id_only=True) if len(workflow_def_ids) > 0: workflow_def_id = workflow_def_ids[0] else: workflow_def_id = self._create_google_dt_workflow_def() #Create and start the workflow workflow_id, workflow_product_id = self.clients.workflow_management.create_data_process_workflow(workflow_def_id, data_product_id, timeout=20) # detect the output data product of the workflow workflow_dp_ids,_ = self.clients.resource_registry.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True) if len(workflow_dp_ids) != 1: raise ValueError("Workflow Data Product ids representing output DP must contain exactly one entry") # find associated stream id with the output workflow_output_stream_ids, _ = self.clients.resource_registry.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasStream, None, True) data_product_stream_id = workflow_output_stream_ids # Create a queue to collect the stream granules - idempotency saves the day! query_token = create_unique_identifier('user_queue') xq = self.container.ex_manager.create_xn_queue(query_token) subscription_id = self.clients.pubsub_management.create_subscription( stream_ids=data_product_stream_id, exchange_name = query_token, name = query_token ) # after the queue has been created it is safe to activate the subscription self.clients.pubsub_management.activate_subscription(subscription_id) if callback == "": return query_token else: return callback + "(\"" + query_token + "\")"
def test_create_unique_identifier(self): id = create_unique_identifier("abc123") self.assertIn("abc123", id) id = create_unique_identifier() self.assertNotIn("abc123", id)
def initiate_realtime_visualization(self, data_product_id='', in_product_type='', visualization_parameters={}, callback=""): """Initial request required to start a realtime chart for the specified data product. Returns a user specific token associated with the request that will be required for subsequent requests when polling data. Note : The in_product_type is a temporary fix for specifying the type of data product to the the service @param data_product_id str @param in_product_type str @param query str @param callback str @throws NotFound Throws if specified data product id or its visualization product does not exist """ query = None if visualization_parameters: if visualization_parameters.has_key('query'): query=visualization_parameters['query'] # Perform a look up to check and see if the DP is indeed a realtime GDT stream if not data_product_id: raise BadRequest("The data_product_id parameter is missing") data_product = self.clients.resource_registry.read(data_product_id) if not data_product: raise NotFound("Data product %s does not exist" % data_product_id) data_product_stream_id = None workflow_def_id = None # If not in_product_type was specified, assume the data product passed represents a pure data stream # and needs to be converted to some form of visualization .. Google DataTable by default unless specified # in the future by additional parameters. Create appropriate workflow to do the conversion if in_product_type == '': # Check to see if the workflow defnition already exist workflow_def_ids,_ = self.rrclient.find_resources(restype=RT.WorkflowDefinition, name='Realtime_Google_DT', id_only=True) if len(workflow_def_ids) > 0: workflow_def_id = workflow_def_ids[0] else: workflow_def_id = self._create_google_dt_workflow_def() #Create and start the workflow workflow_id, workflow_product_id = self.workflowclient.create_data_process_workflow(workflow_def_id, data_product_id, timeout=20) # detect the output data product of the workflow workflow_dp_ids,_ = self.rrclient.find_objects(workflow_id, PRED.hasDataProduct, RT.DataProduct, True) if len(workflow_dp_ids) != 1: raise ValueError("Workflow Data Product ids representing output DP must contain exactly one entry") # find associated stream id with the output workflow_output_stream_ids, _ = self.rrclient.find_objects(workflow_dp_ids[len(workflow_dp_ids) - 1], PRED.hasStream, None, True) data_product_stream_id = workflow_output_stream_ids # TODO check if is a real time GDT stream automatically if in_product_type == 'google_dt': # Retrieve the id of the OUTPUT stream from the out Data Product stream_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStream, None, True) if not stream_ids: raise Inconsistent("Could not find Stream Id for Data Product %s" % data_product_id) data_product_stream_id = stream_ids # Create a queue to collect the stream granules - idempotency saves the day! query_token = create_unique_identifier('user_queue') xq = self.container.ex_manager.create_xn_queue(query_token) subscription_id = self.pubsubclient.create_subscription( query=StreamQuery(data_product_stream_id), exchange_name = query_token, exchange_point = 'science_data', name = query_token ) # after the queue has been created it is safe to activate the subscription self.clients.pubsub_management.activate_subscription(subscription_id) if callback == "": return query_token else: return callback + "(\"" + query_token + "\")"
def template_tst_deployment_context(self, context=None): """ Creates a minimal deployment: 1 instrument, 1 site. deployment context must be provided """ c = self.client c2 = DotDict() c2.resource_registry = self.client.RR log.info("Create a instrument model") instrument_model_id = self.perform_fcruf_script(RT.InstrumentModel, "instrument_model", self.client.IMS) log.info("Create an instrument device") instrument_device_id = self.perform_fcruf_script(RT.InstrumentDevice, "instrument_device", self.client.IMS) log.info("Create instrument site") instrument_site_id = self.perform_fcruf_script(RT.InstrumentSite, "instrument_site", self.client.OMS) log.info("Associate instrument model with instrument site") self.perform_association_script(c.OMS.assign_instrument_model_to_instrument_site, self.RR2.find_instrument_sites_by_instrument_model_using_has_model, self.RR2.find_instrument_models_of_instrument_site_using_has_model, instrument_site_id, instrument_model_id) log.info("Associate instrument model with instrument device") self.perform_association_script(c.IMS.assign_instrument_model_to_instrument_device, c.IMS.find_instrument_device_by_instrument_model, c.IMS.find_instrument_model_by_instrument_device, instrument_device_id, instrument_model_id) log.info("Create a stream definition for the data from the ctd simulator") pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.client.PSMS.create_stream_definition(name='Simulated CTD data', parameter_dictionary_id=pdict_id) log.info("Create an IonObject for a data products") dp_obj = self.create_data_product_obj() dp_obj.name = create_unique_identifier('Inst Data Product') inst_data_product_id = c.DPMS.create_data_product(dp_obj, ctd_stream_def_id) #assign data products appropriately c.DAMS.assign_data_product(input_resource_id=instrument_device_id, data_product_id=inst_data_product_id) deployment_obj = any_old(RT.Deployment, dict(context=context)) deployment_id = c.OMS.create_deployment(deployment_obj) c.OMS.assign_site_to_deployment(instrument_site_id, deployment_id) c.OMS.assign_device_to_deployment(instrument_device_id, deployment_id) c.OMS.activate_deployment(deployment_id, True) # cleanup self.RR2.pluck(instrument_model_id) self.RR2.pluck(deployment_id) self.RR2.pluck(instrument_device_id) c.IMS.force_delete_instrument_model(instrument_model_id) c.IMS.force_delete_instrument_device(instrument_device_id) c.OMS.force_delete_instrument_site(instrument_site_id) c.OMS.force_delete_deployment(deployment_id)
def initiate_realtime_visualization_data(self, data_product_id='', visualization_parameters=None): """Initial request required to start a realtime chart for the specified data product. Returns a user specific token associated with the request that will be required for subsequent requests when polling data. Note : The in_product_type is a temporary fix for specifying the type of data product to the the service @param data_product_id str @param in_product_type str @param query str @throws NotFound Throws if specified data product id or its visualization product does not exist """ log.debug( "initiate_realtime_visualization Vis worker: %s " , self.id) # Load the visualization_parameter, which is passed as string in to its representative dict query = None vp = None if visualization_parameters: vp = simplejson.loads(visualization_parameters) if vp: if vp.has_key('query'): query=vp['query'] # Perform a look up to check and see if the DP is indeed a realtime GDT stream if not data_product_id: raise BadRequest("The data_product_id parameter is missing") data_product = self.clients.resource_registry.read(data_product_id) if not data_product: raise NotFound("Data product %s does not exist" % data_product_id) #Look for a workflow which is already executing for this data product workflow_ids, _ = self.clients.resource_registry.find_subjects(subject_type=RT.Workflow, predicate=PRED.hasInputProduct, object=data_product_id) #If an existing workflow is not found then create one if len(workflow_ids) == 0: #TODO - Add this workflow definition to preload data # Check to see if the workflow defnition already exist workflow_def_ids,_ = self.clients.resource_registry.find_resources(restype=RT.WorkflowDefinition, name='Realtime_HighCharts', id_only=True) if len(workflow_def_ids) > 0: workflow_def_id = workflow_def_ids[0] else: raise NotFound(" Workflow definition named 'Realtime_HighCharts' does not exist") #Create and start the workflow workflow_id, workflow_product_id = self.clients.workflow_management.create_data_process_workflow(workflow_definition_id=workflow_def_id, input_data_product_id=data_product_id, persist_workflow_data_product=PERSIST_REALTIME_DATA_PRODUCTS, timeout=self.create_workflow_timeout) else: workflow_id = workflow_ids[0]._id # detect the output data product of the workflow workflow_dp_ids,_ = self.clients.resource_registry.find_objects(subject=workflow_id, predicate=PRED.hasOutputProduct, object_type=RT.DataProduct, id_only=True) if len(workflow_dp_ids) < 1: log.error("Could not find output data product for existing workflow : ", workflow_id) raise NotFound("Could not find output data product for existing workflow : ", workflow_id) workflow_product_id = workflow_dp_ids[0] # find associated stream id with the output workflow_output_stream_ids, _ = self.clients.resource_registry.find_objects(subject=workflow_product_id, predicate=PRED.hasStream, object_type=None, id_only=True) # Create a queue to collect the stream granules - idempotency saves the day! query_token = create_unique_identifier(USER_VISUALIZATION_QUEUE) xq = self.container.ex_manager.create_xn_queue(query_token) subscription_id = self.clients.pubsub_management.create_subscription( stream_ids=workflow_output_stream_ids, exchange_name = query_token, name = query_token ) # after the queue has been created it is safe to activate the subscription self.clients.pubsub_management.activate_subscription(subscription_id) ret_dict = {'rt_query_token': query_token} return simplejson.dumps(ret_dict)