def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a') coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)
def setUp(self): mock_clients = self._create_service_mock('data_retriever') self.data_retriever_service = DataRetrieverService() self.data_retriever_service.clients = mock_clients self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition self.data_retriever_service.container = DotDict({ 'id': '123', 'spawn_process': Mock(), 'proc_manager': DotDict({ 'terminate_process': Mock(), 'procs': [] }), 'datastore_manager': DotDict({'get_datastore': Mock()}) }) self.datastore = DotDict({'query_view': Mock()}) self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore self.mock_cc_spawn = self.data_retriever_service.container.spawn_process self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset self.data_retriever_service.process_definition = ProcessDefinition() self.data_retriever_service.process_definition.executable[ 'module'] = 'ion.processes.data.replay_process' self.data_retriever_service.process_definition.executable[ 'class'] = 'ReplayProcess' self.data_retriever_service.process_definition_id = 'mock_procdef_id'
def test_out_of_band_retrieve(self): # Setup the environemnt stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(stream_id, dataset_id) # Fill the dataset self.publish_fake_data(stream_id, route) self.wait_until_we_have_enough_granules(dataset_id,40) # Retrieve the data granule = DataRetrieverService.retrieve_oob(dataset_id) rdt = RecordDictionaryTool.load_from_granule(granule) self.assertTrue((rdt['time'] == np.arange(40)).all())
def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_simplex_coverage( dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue( dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue( dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id, route, 1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set())
def test_retrieve_cache(self): DataRetrieverService._refresh_interval = 1 datasets = [self.make_simple_dataset() for i in xrange(10)] for stream_id, route, stream_def_id, dataset_id in datasets: coverage = DatasetManagementService._get_simplex_coverage(dataset_id) coverage.insert_timesteps(10) coverage.set_parameter_values('time', np.arange(10)) coverage.set_parameter_values('temp', np.arange(10)) # Verify cache hit and refresh dataset_ids = [i[3] for i in datasets] self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]] # Verify that it was hit and it's now in there self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache) gevent.sleep(DataRetrieverService._refresh_interval + 0.2) DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]] self.assertTrue(age2 != age) for dataset_id in dataset_ids: DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache) stream_id, route, stream_def, dataset_id = datasets[0] self.start_ingestion(stream_id, dataset_id) DataRetrieverService._get_coverage(dataset_id) self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache) DataRetrieverService._refresh_interval = 100 self.publish_hifi(stream_id,route,1) self.wait_until_we_have_enough_granules(dataset_id, data_size=20) event = gevent.event.Event() with gevent.Timeout(20): while not event.wait(0.1): if dataset_id not in DataRetrieverService._retrieve_cache: event.set() self.assertTrue(event.is_set())
def setUp(self): mock_clients = self._create_service_mock('data_retriever') self.data_retriever_service = DataRetrieverService() self.data_retriever_service.clients = mock_clients self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition self.data_retriever_service.container = DotDict({ 'id':'123', 'spawn_process':Mock(), 'proc_manager':DotDict({ 'terminate_process':Mock(), 'procs':[] }), 'datastore_manager':DotDict({ 'get_datastore':Mock() }) }) self.datastore = DotDict({ 'query_view':Mock() }) self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore self.mock_cc_spawn = self.data_retriever_service.container.spawn_process self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset self.data_retriever_service.process_definition = ProcessDefinition() self.data_retriever_service.process_definition.executable['module'] = 'ion.processes.data.replay_process' self.data_retriever_service.process_definition.executable['class'] = 'ReplayProcess' self.data_retriever_service.process_definition_id = 'mock_procdef_id'
class DataRetrieverServiceTest(PyonTestCase): def setUp(self): mock_clients = self._create_service_mock('data_retriever') self.data_retriever_service = DataRetrieverService() self.data_retriever_service.clients = mock_clients self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition self.data_retriever_service.container = DotDict({ 'id':'123', 'spawn_process':Mock(), 'proc_manager':DotDict({ 'terminate_process':Mock(), 'procs':[] }), 'datastore_manager':DotDict({ 'get_datastore':Mock() }) }) self.datastore = DotDict({ 'query_view':Mock() }) self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore self.mock_cc_spawn = self.data_retriever_service.container.spawn_process self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset self.data_retriever_service.process_definition = ProcessDefinition() self.data_retriever_service.process_definition.executable['module'] = 'ion.processes.data.replay_process' self.data_retriever_service.process_definition.executable['class'] = 'ReplayProcess' self.data_retriever_service.process_definition_id = 'mock_procdef_id' @unittest.skip('Can we mock the datastore manager?') def test_define_replay(self): #mocks self.mock_ps_create_stream.return_value = '12345' self.mock_rr_create.return_value = ('replay_id','garbage') self.mock_ds_read.return_value = DotDict({ 'datastore_name':'unittest', 'view_name':'garbage', 'primary_view_key':'primary key'}) document = DotDict({'stream_resource_id':'0'}) self.mock_pd_schedule.return_value = 'process_id' self.datastore.query_view.return_value = [{'doc':document}] config = {'process':{ 'query':'myquery', 'datastore_name':'unittest', 'view_name':'garbage', 'key_id':'primary key', 'delivery_format':None, 'publish_streams':{'output':'12345'} }} # execution r,s = self.data_retriever_service.define_replay(dataset_id='dataset_id', query='myquery') # assertions self.assertTrue(self.mock_ps_create_stream_definition.called) self.assertTrue(self.mock_ps_create_stream.called) self.assertTrue(self.mock_rr_create.called) self.mock_rr_create_assoc.assert_called_with('replay_id',PRED.hasStream,'12345',None) self.assertTrue(self.mock_pd_schedule.called) self.assertTrue(self.mock_rr_update.called) self.assertEquals(r,'replay_id') self.assertEquals(s,'12345') def test_define_replay_no_data(self): #mocks self.mock_ps_create_stream.return_value = '12345' self.mock_rr_create.return_value = ('replay_id','garbage') self.mock_ds_read.return_value = DotDict({ 'datastore_name':'unittest', 'view_name':'garbage', 'primary_view_key':'primary key'}) document = DotDict({'stream_resource_id':'0'}) self.mock_pd_schedule.return_value = 'process_id' self.datastore.query_view.return_value = [] # Raises index error config = {'process':{ 'query':'myquery', 'datastore_name':'unittest', 'view_name':'garbage', 'key_id':'primary key', 'delivery_format':None, 'publish_streams':{'output':'12345'} }} with self.assertRaises(NotFound): self.data_retriever_service.define_replay(dataset_id='dataset_id', query='myquery') @unittest.skip('Can\'t do unit test here') def test_start_replay(self): pass def test_cancel_replay(self): #mocks self.mock_rr_find_assoc.return_value = [1,2,3] replay = Replay() replay.process_id = '1' self.mock_rr_read.return_value = replay #execution self.data_retriever_service.cancel_replay('replay_id') #assertions self.assertEquals(self.mock_rr_delete_assoc.call_count,3) self.mock_rr_delete.assert_called_with('replay_id') self.mock_pd_cancel.assert_called_with('1')
def _get_highcharts_data(self, data_product_id='', visualization_parameters=None): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # An empty dict is returned in case there is no data in coverage empty_hc = [] # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: query['start_time'] = int(visualization_parameters['start_time']) if 'end_time' in visualization_parameters: query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: try: query['stride_time'] = int(visualization_parameters['stride_time']) except TypeError: # There are some (rare) situations where the AJAX request has 'null' in the request # Example: # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0} query['stride_time'] = 1 else: query['stride_time'] = 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound("Could not find dataset associated with data product") stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: raise NotFound('Could not find stream definition associated with data product') stream_def_id = stream_def_ids[0] if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id) # If thereis no data, return an empty dict if retrieved_granule is None: return simplejson.dumps(empty_hc) # send the granule through the transform to get the google datatable hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('highcharts',id_only=True) hc_stream_def = self.clients.pubsub_management.create_stream_definition('HighCharts_out', parameter_dictionary_id=hc_pdict_id) hc_data_granule = VizTransformHighChartsAlgorithm.execute(retrieved_granule, params=hc_stream_def, config=visualization_parameters) if hc_data_granule == None: return simplejson.dumps(empty_hc) hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule) # Now go through this redundant step of converting the hc_data into a non numpy version hc_data_np = (get_safe(hc_rdt, "hc_data"))[0] hc_data = [] for series in hc_data_np: s = {} for key in series: if key == "data": s["data"] = series["data"].tolist() continue s[key] = series[key] hc_data.append(s) # return the json version of the table return json.dumps(hc_data)
def _get_google_dt(self, data_product_id='', visualization_parameters=None): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # An empty dict is returned in case there is no data in coverage empty_gdt = gviz_api.DataTable([('time', 'datetime', 'time')]) # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: query['start_time'] = int(visualization_parameters['start_time']) if 'end_time' in visualization_parameters: query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: try: query['stride_time'] = int(visualization_parameters['stride_time']) except TypeError: # There are some (rare) situations where the AJAX request has 'null' in the request # Example: # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0} query['stride_time'] = 1 else: query['stride_time'] = 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound("Could not find dataset associated with data product") stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: raise NotFound('Could not find stream definition associated with data product') stream_def_id = stream_def_ids[0] if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id) # If thereis no data, return an empty dict if retrieved_granule is None: return empty_gdt.ToJSon() # send the granule through the transform to get the google datatable gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters) if gdt_data_granule == None: return empty_gdt.ToJSon() gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) gdt_components = get_safe(gdt_rdt, 'google_dt_components') gdt_component = gdt_components[0] temp_gdt_description = gdt_component["data_description"] temp_gdt_content = gdt_component["data_content"] # adjust the 'float' time to datetime in the content gdt_description = [('time', 'datetime', 'time')] gdt_content = [] for idx in range(1,len(temp_gdt_description)): temp_arr = temp_gdt_description[idx] if temp_arr != None and temp_arr[0] != 'time': if len(temp_arr) == 3: gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2])) if len(temp_arr) == 4: gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2], temp_arr[3])) for tempTuple in temp_gdt_content: # sometimes there are inexplicable empty tuples in the content. Drop them if tempTuple == [] or len(tempTuple) == 0: continue varTuple = [] varTuple.append(datetime.fromtimestamp(tempTuple[0])) for idx in range(1,len(tempTuple)): varTuple.append(tempTuple[idx]) gdt_content.append(varTuple) # now generate the Google datatable out of the description and content gdt = gviz_api.DataTable(gdt_description) gdt.LoadData(gdt_content) # return the json version of the table return gdt.ToJSon()
def retrieve_stream(dataset_id="", query=None): return DataRetrieverService.retrieve_oob(dataset_id, query)
def get_visualization_data(self, data_product_id='', visualization_parameters=None, callback='', tqx=""): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @param callback str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None reqId = 0 # If a reqId was passed in tqx, extract it if tqx: tqx_param_list = tqx.split(";") for param in tqx_param_list: key, value = param.split(":") if key == 'reqId': reqId = value # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: #query['start_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['start_time']))) query['start_time'] = int(visualization_parameters['start_time']) if 'end_time' in visualization_parameters: #query['end_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['end_time']))) query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: query['stride_time'] = int(visualization_parameters['stride_time']) else: query['stride_time'] == 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound("Could not find dataset associated with data product") if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query) if retrieved_granule is None: return None #temp_rdt = RecordDictionaryTool.load_from_granule(retrieved_granule) # send the granule through the transform to get the google datatable gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True) gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id) gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters) if gdt_data_granule == None: return None gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) gdt_components = get_safe(gdt_rdt, 'google_dt_components') gdt_component = gdt_components[0] temp_gdt_description = gdt_component["data_description"] temp_gdt_content = gdt_component["data_content"] # adjust the 'float' time to datetime in the content gdt_description = [('time', 'datetime', 'time')] gdt_content = [] for idx in range(1,len(temp_gdt_description)): temp_arr = temp_gdt_description[idx] if temp_arr != None and temp_arr[0] != 'time': gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2])) for tempTuple in temp_gdt_content: # sometimes there are inexplicable empty tuples in the content. Drop them if tempTuple == [] or len(tempTuple) == 0: continue varTuple = [] varTuple.append(datetime.fromtimestamp(tempTuple[0])) for idx in range(1,len(tempTuple)): # some silly numpy format won't go away so need to cast numbers to floats if(gdt_description[idx][1] == 'number'): if tempTuple[idx] == None: varTuple.append(0.0) else: # Precision hardcoded for now. Needs to be on a per parameter basis varTuple.append(round(float(tempTuple[idx]),5)) else: varTuple.append(tempTuple[idx]) gdt_content.append(varTuple) # now generate the Google datatable out of the description and content gdt = gviz_api.DataTable(gdt_description) gdt.LoadData(gdt_content) # return the json version of the table if callback == '': return gdt.ToJSonResponse(req_id = reqId) else: return callback + "(\"" + gdt.ToJSonResponse(req_id = reqId) + "\")"
def test_transform_data(self): module = __name__ cls = 'FakeTransform' retval = DataRetrieverService._transform_data(0, module, cls) self.assertEquals(retval, 1)
def _get_highcharts_data(self, data_product_id='', visualization_parameters=None): """Retrieves the data for the specified DP @param data_product_id str @param visualization_parameters str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # An empty dict is returned in case there is no data in coverage empty_hc = [] # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") use_direct_access = False if visualization_parameters == {}: visualization_parameters = None # Extract the parameters. Definitely init first query = None if visualization_parameters: #query = {'parameters':[]} query = {} # Error check and damage control. Definitely need time if 'parameters' in visualization_parameters and len( visualization_parameters['parameters']) > 0: if not 'time' in visualization_parameters['parameters']: visualization_parameters['parameters'].append('time') query['parameters'] = visualization_parameters['parameters'] # The times passed from UI are system times so convert them to NTP if 'start_time' in visualization_parameters: query['start_time'] = int( visualization_parameters['start_time']) if 'end_time' in visualization_parameters: query['end_time'] = int((visualization_parameters['end_time'])) # stride time if 'stride_time' in visualization_parameters: try: query['stride_time'] = int( visualization_parameters['stride_time']) except TypeError: # There are some (rare) situations where the AJAX request has 'null' in the request # Example: # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0} query['stride_time'] = 1 else: query['stride_time'] = 1 # direct access parameter if 'use_direct_access' in visualization_parameters: if (int(visualization_parameters['use_direct_access']) == 1): use_direct_access = True else: use_direct_access = False # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval ds_ids, _ = self.clients.resource_registry.find_objects( data_product_id, PRED.hasDataset, RT.Dataset, True) if ds_ids is None or not ds_ids: raise NotFound( "Could not find dataset associated with data product") stream_def_ids, _ = self.clients.resource_registry.find_objects( data_product_id, PRED.hasStreamDefinition, id_only=True) if not stream_def_ids: raise NotFound( 'Could not find stream definition associated with data product' ) stream_def_id = stream_def_ids[0] if use_direct_access: retrieved_granule = DataRetrieverService.retrieve_oob( ds_ids[0], query=query, delivery_format=stream_def_id) else: #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.clients.data_retriever.retrieve( ds_ids[0], query=query, delivery_format=stream_def_id) # If thereis no data, return an empty dict if retrieved_granule is None: return simplejson.dumps(empty_hc) # send the granule through the transform to get the google datatable hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name( 'highcharts', id_only=True) hc_stream_def = self.clients.pubsub_management.create_stream_definition( 'HighCharts_out', parameter_dictionary_id=hc_pdict_id) hc_data_granule = VizTransformHighChartsAlgorithm.execute( retrieved_granule, params=hc_stream_def, config=visualization_parameters) if hc_data_granule == None: return simplejson.dumps(empty_hc) hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule) # Now go through this redundant step of converting the hc_data into a non numpy version hc_data_np = (get_safe(hc_rdt, "hc_data"))[0] hc_data = [] for series in hc_data_np: s = {} for key in series: if key == "data": s["data"] = series["data"].tolist() continue s[key] = series[key] hc_data.append(s) # return the json version of the table return json.dumps(hc_data)
def test_transform_data(self): module = __name__ cls = 'FakeTransform' retval = DataRetrieverService._transform_data(0,module,cls) self.assertEquals(retval,1)
class DataRetrieverServiceTest(PyonTestCase): def setUp(self): mock_clients = self._create_service_mock('data_retriever') self.data_retriever_service = DataRetrieverService() self.data_retriever_service.clients = mock_clients self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition self.data_retriever_service.container = DotDict({ 'id': '123', 'spawn_process': Mock(), 'proc_manager': DotDict({ 'terminate_process': Mock(), 'procs': [] }), 'datastore_manager': DotDict({'get_datastore': Mock()}) }) self.datastore = DotDict({'query_view': Mock()}) self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore self.mock_cc_spawn = self.data_retriever_service.container.spawn_process self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset self.data_retriever_service.process_definition = ProcessDefinition() self.data_retriever_service.process_definition.executable[ 'module'] = 'ion.processes.data.replay_process' self.data_retriever_service.process_definition.executable[ 'class'] = 'ReplayProcess' self.data_retriever_service.process_definition_id = 'mock_procdef_id' @unittest.skip('Can we mock the datastore manager?') def test_define_replay(self): #mocks self.mock_ps_create_stream.return_value = '12345' self.mock_rr_create.return_value = ('replay_id', 'garbage') self.mock_ds_read.return_value = DotDict({ 'datastore_name': 'unittest', 'view_name': 'garbage', 'primary_view_key': 'primary key' }) document = DotDict({'stream_resource_id': '0'}) self.mock_pd_schedule.return_value = 'process_id' self.datastore.query_view.return_value = [{'doc': document}] config = { 'process': { 'query': 'myquery', 'datastore_name': 'unittest', 'view_name': 'garbage', 'key_id': 'primary key', 'delivery_format': None, 'publish_streams': { 'output': '12345' } } } # execution r, s = self.data_retriever_service.define_replay( dataset_id='dataset_id', query='myquery') # assertions self.assertTrue(self.mock_ps_create_stream_definition.called) self.assertTrue(self.mock_ps_create_stream.called) self.assertTrue(self.mock_rr_create.called) self.mock_rr_create_assoc.assert_called_with('replay_id', PRED.hasStream, '12345', None) self.assertTrue(self.mock_pd_schedule.called) self.assertTrue(self.mock_rr_update.called) self.assertEquals(r, 'replay_id') self.assertEquals(s, '12345') def test_define_replay_no_data(self): #mocks self.mock_ps_create_stream.return_value = '12345' self.mock_rr_create.return_value = ('replay_id', 'garbage') self.mock_ds_read.return_value = DotDict({ 'datastore_name': 'unittest', 'view_name': 'garbage', 'primary_view_key': 'primary key' }) document = DotDict({'stream_resource_id': '0'}) self.mock_pd_schedule.return_value = 'process_id' self.datastore.query_view.return_value = [] # Raises index error config = { 'process': { 'query': 'myquery', 'datastore_name': 'unittest', 'view_name': 'garbage', 'key_id': 'primary key', 'delivery_format': None, 'publish_streams': { 'output': '12345' } } } with self.assertRaises(NotFound): self.data_retriever_service.define_replay(dataset_id='dataset_id', query='myquery') @unittest.skip('Can\'t do unit test here') def test_start_replay(self): pass def test_cancel_replay(self): #mocks self.mock_rr_find_assoc.return_value = [1, 2, 3] replay = Replay() replay.process_id = '1' self.mock_rr_read.return_value = replay #execution self.data_retriever_service.cancel_replay('replay_id') #assertions self.assertEquals(self.mock_rr_delete_assoc.call_count, 3) self.mock_rr_delete.assert_called_with('replay_id') self.mock_pd_cancel.assert_called_with('1')
def retrieve_stream(dataset_id='', query=None): return DataRetrieverService.retrieve_oob(dataset_id, query)