def _trigger_func(self, stream_id): #@todo - add lots of comments in here while True: length = 10 #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) self.last_time = max(tvar) rdt = RecordDictionaryTool(taxonomy=tx) # This is an example of using groups it is not a normative statement about how to use groups rdt0 = RecordDictionaryTool(taxonomy=tx) rdt0['temp'] = t rdt0['cond'] = c rdt0['pres'] = p #add a value sequence of raw bytes - not sure the type below is correct? with open('/dev/urandom','r') as rand: rdt0['raw_fixed'] = numpy.array([rand.read(32) for i in xrange(length)], dtype='a32') #add a value sequence of raw bytes - not sure the type below is correct? with open('/dev/urandom','r') as rand: rdt0['raw_blob'] = numpy.array([rand.read(random.randint(1,40)) for i in xrange(length)], dtype=object) rdt1 = RecordDictionaryTool(taxonomy=tx) rdt1['time'] = tvar rdt1['lat'] = lat rdt1['lon'] = lon rdt['group1'] = rdt1 rdt['group0'] = rdt0 log.info("logging published Record Dictionary:\n %s", rdt.pretty_print()) g = build_granule(data_producer_id='Bobs Potatoes', taxonomy=tx, record_dictionary=rdt) log.info('Sending %d values!' % length) self.publisher.publish(g) time.sleep(2.0)
def test_build_granule_and_load_from_granule(self): #Define a taxonomy and add sets. add_taxonomy_set takes one or more names and assigns them to one handle tx = TaxyTool() tx.add_taxonomy_set('temp', 'long_temp_name') tx.add_taxonomy_set('cond', 'long_cond_name') tx.add_taxonomy_set('pres', 'long_pres_name') tx.add_taxonomy_set('rdt') # map is {<local name>: <granule name or path>} #Use RecordDictionaryTool to create a record dictionary. Send in the taxonomy so the Tool knows what to expect rdt = RecordDictionaryTool(taxonomy=tx) #Create some arrays and fill them with random values temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc. rdt['temp'] = temp_array rdt['cond'] = cond_array rdt['pres'] = pres_array #You can also add in another RecordDictionaryTool, providing the taxonomies are the same. rdt2 = RecordDictionaryTool(taxonomy=tx) rdt2['temp'] = temp_array rdt['rdt'] = rdt2 g = build_granule(data_producer_id='john', taxonomy=tx, record_dictionary=rdt) l_tx = TaxyTool.load_from_granule(g) l_rd = RecordDictionaryTool.load_from_granule(g) # Make sure we got back the same Taxonomy Object self.assertEquals(l_tx._t, tx._t) self.assertEquals(l_tx.get_handles('temp'), tx.get_handles('temp')) self.assertEquals(l_tx.get_handles('testing_2'), tx.get_handles('testing_2')) # Now test the record dictionary object self.assertEquals(l_rd._rd, rdt._rd) self.assertEquals(l_rd._tx._t, rdt._tx._t) for k, v in l_rd.iteritems(): self.assertIn(k, rdt) if isinstance(v, numpy.ndarray): self.assertTrue((v == rdt[k]).all()) else: self.assertEquals(v._rd, rdt[k]._rd)
def setUp(self): self._tx = TaxyTool() self._tx.add_taxonomy_set('temp', 'long_temp_name') self._tx.add_taxonomy_set('cond', 'long_cond_name') self._tx.add_taxonomy_set('pres', 'long_pres_name') self._tx.add_taxonomy_set('rdt') self._tx.add_taxonomy_set('rdt2') # map is {<local name>: <granule name or path>} self._rdt = RecordDictionaryTool(taxonomy=self._tx)
def granule_example(nsdict): """ Usage: from examples.granule import granule_example granule_example(locals()) tx, g, rdt, rdt2... etc, are now local variables in your shell! """ #Define a taxonomy and add sets. add_taxonomy_set takes one or more names and assigns them to one handle tx = TaxyTool() tx.add_taxonomy_set('temp', 'long_temp_name') tx.add_taxonomy_set('cond', 'long_cond_name') tx.add_taxonomy_set('pres', 'long_pres_name') tx.add_taxonomy_set('rdt') # map is {<local name>: <granule name or path>} #Use RecordDictionaryTool to create a record dictionary. Send in the taxonomy so the Tool knows what to expect rdt = RecordDictionaryTool(taxonomy=tx) #Create some arrays and fill them with random values temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc. rdt['temp'] = temp_array rdt['cond'] = cond_array rdt['pres'] = pres_array #You can also add in another RecordDictionaryTool, providing the taxonomies are the same. rdt2 = RecordDictionaryTool(taxonomy=tx) rdt2['temp'] = temp_array rdt['rdt'] = rdt2 #you can get a string representation of the RecordDictionaryTool print rdt print rdt.pretty_print() #Determine the length of the RecordDictionary using the len function print len(rdt) #Delete an item in the RecordDictionary del rdt['temp'] g = build_granule(data_producer_id='john', taxonomy=tx, record_dictionary=rdt) nsdict.update(locals())
def process(self, packet): if not isinstance(packet,Granule): log.warn('Invalid packet received: Type "%s"' % type(packet)) return rd_in = RecordDictionaryTool.load_from_granule(packet) tt = TaxyTool.load_from_granule(packet) rd_out = RecordDictionaryTool(tt) for nickname, v_sequence in rd_in.iteritems(): rd_out[nickname] = self.shift(v_sequence) g_out = build_granule(data_producer_id='dp_id',taxonomy=tt,record_dictionary=rd_out) self.publish(g_out)
def _get_data(cls, config): parser = get_safe(config, 'parser', None) ext_dset_res = get_safe(config, 'external_dataset_res', None) if ext_dset_res and parser: #CBM: Not in use yet... # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') tx_yml = get_safe(config, 'taxonomy') ttool = TaxyTool.load( tx_yml ) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool cnt = cls._calc_iter_cnt(len(parser.sensor_map), max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(taxonomy=ttool) for name in parser.sensor_map: d = parser.data_map[name][x * max_rec:(x + 1) * max_rec] rdt[name] = d g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) yield g else: log.warn('No parser object found in config')
def test_update(self): # Update this granule with the content of another. Assert that the taxonomies are the same... pres_array = numpy.random.standard_normal(100) self._rdt['pres'] = pres_array rdt2 = RecordDictionaryTool(taxonomy=self._tx) temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) rdt2['temp'] = temp_array rdt2['cond'] = cond_array self._rdt.update(rdt2) self.assertIn('pres', self._rdt) self.assertIn('temp', self._rdt) self.assertIn('cond', self._rdt) self.assertTrue((self._rdt['pres'] == pres_array).all()) self.assertTrue((self._rdt['cond'] == cond_array).all()) self.assertTrue((self._rdt['temp'] == temp_array).all()) self.assertEquals(len(self._rdt), 3)
def message_received(granule, h): rdt = RecordDictionaryTool.load_from_granule(granule) log.warn( 'Logging Record Dictionary received in logger subscription \n%s', rdt.pretty_print())
def _validate_google_dt_results(self, results_stream_def, results): cc = self.container assertions = self.assertTrue for g in results: if isinstance(g, Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) #log.warn(tx.pretty_print()) #log.warn(rdt.pretty_print()) gdt_component = rdt['google_dt_components'][0] assertions( gdt_component['viz_product_type'] == 'google_realtime_dt') gdt_description = gdt_component['data_table_description'] gdt_content = gdt_component['data_table_content'] assertions(gdt_description[0][0] == 'time') assertions(len(gdt_description) > 1) assertions(len(gdt_content) >= 0) return
def _validate_google_dt_results(self, results_stream_def, results): cc = self.container assertions = self.assertTrue for g in results: if isinstance(g,Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) #log.warn(tx.pretty_print()) #log.warn(rdt.pretty_print()) gdt_component = rdt['google_dt_components'][0] assertions(gdt_component['viz_product_type'] == 'google_realtime_dt' ) gdt_description = gdt_component['data_table_description'] gdt_content = gdt_component['data_table_content'] assertions(gdt_description[0][0] == 'time') assertions(len(gdt_description) > 1) assertions(len(gdt_content) >= 0) return
def test_build_granule_and_load_from_granule(self): #Define a taxonomy and add sets. add_taxonomy_set takes one or more names and assigns them to one handle tx = TaxyTool() tx.add_taxonomy_set('temp', 'long_temp_name') tx.add_taxonomy_set('cond', 'long_cond_name') tx.add_taxonomy_set('pres', 'long_pres_name') tx.add_taxonomy_set('rdt') # map is {<local name>: <granule name or path>} #Use RecordDictionaryTool to create a record dictionary. Send in the taxonomy so the Tool knows what to expect rdt = RecordDictionaryTool(taxonomy=tx) #Create some arrays and fill them with random values temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) #Use the RecordDictionaryTool to add the values. This also would work if you used long_temp_name, etc. rdt['temp'] = temp_array rdt['cond'] = cond_array rdt['pres'] = pres_array #You can also add in another RecordDictionaryTool, providing the taxonomies are the same. rdt2 = RecordDictionaryTool(taxonomy=tx) rdt2['temp'] = temp_array rdt['rdt'] = rdt2 g = build_granule(data_producer_id='john', taxonomy=tx, record_dictionary=rdt) l_tx = TaxyTool.load_from_granule(g) l_rd = RecordDictionaryTool.load_from_granule(g) # Make sure we got back the same Taxonomy Object self.assertEquals(l_tx._t, tx._t) self.assertEquals(l_tx.get_handles('temp'), tx.get_handles('temp')) self.assertEquals(l_tx.get_handles('testing_2'), tx.get_handles('testing_2')) # Now test the record dictionary object self.assertEquals(l_rd._rd, rdt._rd) self.assertEquals(l_rd._tx._t, rdt._tx._t) for k, v in l_rd.iteritems(): self.assertIn(k, rdt) if isinstance(v, numpy.ndarray): self.assertTrue( (v == rdt[k]).all()) else: self.assertEquals(v._rd, rdt[k]._rd)
def _trigger_func(self, stream_id): self.last_time = 0 #@todo - add lots of comments in here while True: length = 10 #Explicitly make these numpy arrays... c = numpy.array([random.uniform(0.0,75.0) for i in xrange(length)]) t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)]) lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)]) self.last_time = max(tvar) rdt = RecordDictionaryTool(taxonomy=self._tx) rdt['temp'] = ExampleDataProducer_algorithm.execute(t) rdt['cond'] = ExampleDataProducer_algorithm.execute(c) rdt['pres'] = ExampleDataProducer_algorithm.execute(p) rdt['time'] = tvar rdt['lat'] = lat rdt['lon'] = lon log.info("logging published Record Dictionary:\n %s", rdt.pretty_print()) g = build_granule(data_producer_id=stream_id, taxonomy=self._tx, record_dictionary=rdt) log.info('Sending %d values!' % length) if(isinstance(g, Granule)): self.publish(g, stream_id) time.sleep(2.0)
def combine_granules(granule_a, granule_b): """ This is a method that combines granules in a very naive manner """ validate_is_instance(granule_a, Granule, "granule_a is not a proper Granule") validate_is_instance(granule_b, Granule, "granule_b is not a proper Granule") tt_a = TaxyTool.load_from_granule(granule_a) tt_b = TaxyTool.load_from_granule(granule_b) if tt_a != tt_b: raise BadRequest("Can't combine the two granules, they do not have the same taxonomy.") rdt_new = RecordDictionaryTool(tt_a) rdt_a = RecordDictionaryTool.load_from_granule(granule_a) rdt_b = RecordDictionaryTool.load_from_granule(granule_b) for k in rdt_a.iterkeys(): rdt_new[k] = np.append(rdt_a[k], rdt_b[k]) return build_granule(granule_a.data_producer_id, tt_a, rdt_new)
def test_combine_granule(self): tt = TaxyTool() tt.add_taxonomy_set('a') rdt = RecordDictionaryTool(tt) rdt['a'] = numpy.array([1, 2, 3]) granule1 = build_granule('test', tt, rdt) rdt = RecordDictionaryTool(tt) rdt['a'] = numpy.array([4, 5, 6]) granule2 = build_granule('test', tt, rdt) granule3 = combine_granules(granule1, granule2) rdt = RecordDictionaryTool.load_from_granule(granule3) self.assertTrue( numpy.allclose(rdt['a'], numpy.array([1, 2, 3, 4, 5, 6])))
def test_init(self): # initialize with a taxonomy tool rdt = RecordDictionaryTool(taxonomy=self._tx) self.assertIsInstance(rdt._tx, TaxyTool) # initialize with a taxonomy object rdt = RecordDictionaryTool(taxonomy=self._tx._t) self.assertIsInstance(rdt._tx, TaxyTool) # initialize with pooo self.assertRaises(TypeError, RecordDictionaryTool, ['foo', 'barr']) # initialize with a valid shape rdt = RecordDictionaryTool(taxonomy=self._tx, shape=(5,2)) self.assertEquals(rdt._shp, (5,2)) rdt = RecordDictionaryTool(taxonomy=self._tx, shape=(5,)) self.assertEquals(rdt._shp, (5,)) rdt = RecordDictionaryTool(taxonomy=self._tx, shape=5) self.assertEquals(rdt._shp, (5,)) # initialize with no length rdt = RecordDictionaryTool(taxonomy=self._tx) self.assertEquals(rdt._shp, None) # initialize with pooo self.assertRaises(TypeError, RecordDictionaryTool, self._tx, 'not an int')
def test_pretty_print(self): temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array rdt = RecordDictionaryTool(taxonomy=self._tx) rdt['rdt'] = temp_array self._rdt['rdt'] = rdt self.assertGreater(len(self._rdt.pretty_print()), 0)
def combine_granules(granule_a, granule_b): """ This is a method that combines granules in a very naive manner """ validate_is_instance(granule_a, Granule, 'granule_a is not a proper Granule') validate_is_instance(granule_b, Granule, 'granule_b is not a proper Granule') tt_a = TaxyTool.load_from_granule(granule_a) tt_b = TaxyTool.load_from_granule(granule_b) if tt_a != tt_b: raise BadRequest( 'Can\'t combine the two granules, they do not have the same taxonomy.' ) rdt_new = RecordDictionaryTool(tt_a) rdt_a = RecordDictionaryTool.load_from_granule(granule_a) rdt_b = RecordDictionaryTool.load_from_granule(granule_b) for k in rdt_a.iterkeys(): rdt_new[k] = np.append(rdt_a[k], rdt_b[k]) return build_granule(granule_a.data_producer_id, tt_a, rdt_new)
def launch_benchmark(transform_number=1, primer=1, message_length=4): import gevent from gevent.greenlet import Greenlet from pyon.util.containers import DotDict from pyon.net.transport import NameTrio from pyon.net.endpoint import Publisher import numpy from pyon.ion.granule.record_dictionary import RecordDictionaryTool from pyon.ion.granule.taxonomy import TaxyTool from pyon.ion.granule.granule import build_granule tt = TaxyTool() tt.add_taxonomy_set('a') import uuid num = transform_number msg_len = message_length transforms = list() pids = 1 TransformBenchTesting.message_length = message_length cc = Container.instance pub = Publisher(to_name=NameTrio(get_sys_name(), str(uuid.uuid4())[0:6])) for i in xrange(num): tbt = cc.proc_manager._create_service_instance( str(pids), 'tbt', 'prototype.transforms.linear', 'TransformInPlaceNewGranule', DotDict({ 'process': { 'name': 'tbt%d' % pids, 'transform_id': pids } })) tbt.init() tbt.start() gevent.sleep(0.2) for i in xrange(primer): rd = RecordDictionaryTool(tt, message_length) rd['a'] = numpy.arange(message_length) gran = build_granule(data_producer_id='dp_id', taxonomy=tt, record_dictionary=rd) pub.publish(gran) g = Greenlet(tbt.perf) g.start() transforms.append(tbt) pids += 1
def get_visualization_image(self, data_product_id='', visualization_parameters={}, callback=''): # Error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") query = None if visualization_parameters: if visualization_parameters.has_key('query'): query=visualization_parameters['query'] # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True) if ds_ids == None or len(ds_ids) == 0: return None # Ideally just need the latest granule to figure out the list of images #replay_granule = self.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.data_retriever.retrieve(ds_ids[0]) if retrieved_granule == None: return None mpl_transform = VizTransformMatplotlibGraphs() mpl_data_granule = mpl_transform.execute(retrieved_granule) #mpl_data_granule = self.data_retriever.retrieve(ds_ids[0], module="ion.processes.data.transforms.viz.matplotlib_graphs", cls="VizTransformMatplotlibGraphs") #if not mpl_data_granule: # return None mpl_rdt = RecordDictionaryTool.load_from_granule(mpl_data_granule) temp_mpl_graph_list = get_safe(mpl_rdt, "matplotlib_graphs") mpl_graph = temp_mpl_graph_list[0] # restructure the mpl graphs in to a simpler dict that will be passed through ret_dict = {} ret_dict['content_type'] = mpl_graph['content_type'] ret_dict['image_name'] = mpl_graph['image_name'] # reason for encoding as base64 string is otherwise message pack complains about the bit stream ret_dict['image_obj'] = base64.encodestring(mpl_graph['image_obj']) #ret_dict['image_obj'] = mpl_graph['image_obj'] if callback == '': return ret_dict else: return callback + "(" + simplejson.dumps(ret_dict) + ")"
def test_combine_granule(self): tt = TaxyTool() tt.add_taxonomy_set('a') rdt = RecordDictionaryTool(tt) rdt['a'] = numpy.array([1,2,3]) granule1 = build_granule('test',tt,rdt) rdt = RecordDictionaryTool(tt) rdt['a'] = numpy.array([4,5,6]) granule2 = build_granule('test',tt,rdt) granule3 = combine_granules(granule1,granule2) rdt = RecordDictionaryTool.load_from_granule(granule3) self.assertTrue(numpy.allclose(rdt['a'],numpy.array([1,2,3,4,5,6])))
def _get_data(cls, config): """ A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers @param config Dict of configuration parameters - must contain ['constraints']['count'] """ cnt = get_safe(config, 'constraints.count', 1) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id') tx_yml = get_safe(config, 'taxonomy') ttool = TaxyTool.load(tx_yml) def fibGenerator(): """ A Fibonacci sequence generator """ count = 0 ret = [] a, b = 1, 1 while 1: count += 1 ret.append(a) if count == max_rec: yield np.array(ret) ret = [] count = 0 a, b = b, a + b gen = fibGenerator() cnt = cls._calc_iter_cnt(cnt, max_rec) for i in xrange(cnt): rdt = RecordDictionaryTool(taxonomy=ttool) d = gen.next() rdt['data'] = d g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) yield g
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ array_len = get_safe(config, 'constraints.array_len', 1) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id') tx_yml = get_safe(config, 'taxonomy') ttool = TaxyTool.load(tx_yml) arr = npr.random_sample(array_len) log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr)) cnt = cls._calc_iter_cnt(arr.size, max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(taxonomy=ttool) d = arr[x * max_rec:(x + 1) * max_rec] rdt['data'] = d g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) yield g
def _validate_mpl_graphs_results(self, results_stream_def, results): cc = self.container assertions = self.assertTrue for g in results: if isinstance(g,Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) #log.warn(tx.pretty_print()) #log.warn(rdt.pretty_print()) graphs = rdt['matplotlib_graphs'] for graph in graphs: assertions(graph['viz_product_type'] == 'matplotlib_graphs' ) # check to see if the list (numpy array) contians actual images assertions(imghdr.what(graph['image_name'], graph['image_obj']) == 'png') return
def _validate_mpl_graphs_results(self, results_stream_def, results): cc = self.container assertions = self.assertTrue for g in results: if isinstance(g, Granule): tx = TaxyTool.load_from_granule(g) rdt = RecordDictionaryTool.load_from_granule(g) #log.warn(tx.pretty_print()) #log.warn(rdt.pretty_print()) graphs = rdt['matplotlib_graphs'] for graph in graphs: assertions( graph['viz_product_type'] == 'matplotlib_graphs') # check to see if the list (numpy array) contians actual images assertions( imghdr.what(graph['image_name'], graph['image_obj']) == 'png') return
def execute(self, granule): log.debug('(Google DT transform): Received Viz Data Packet' ) self.dataDescription = [] self.dataTableContent = [] element_count_id = 0 expected_range = [] # NOTE : Detect somehow that this is a replay stream with a set number of expected granules. Based on this # calculate the number of expected records and set the self.realtime_window_size bigger or equal to this # number. psd = PointSupplementStreamParser(stream_definition=self.incoming_stream_def, stream_granule=granule) vardict = {} arrLen = None for varname in psd.list_field_names(): vardict[varname] = psd.get_values(varname) arrLen = len(vardict[varname]) #iinit the dataTable # create data description from the variables in the message self.dataDescription = [('time', 'datetime', 'time')] # split the data string to extract variable names for varname in psd.list_field_names(): if varname == 'time': continue self.dataDescription.append((varname, 'number', varname)) # Add the records to the datatable for i in xrange(arrLen): varTuple = [] for varname,_,_ in self.dataDescription: val = float(vardict[varname][i]) if varname == 'time': #varTuple.append(datetime.fromtimestamp(val)) varTuple.append(val) else: varTuple.append(val) # Append the tuples to the data table self.dataTableContent.append (varTuple) # Maintain a sliding window for realtime transform processes if len(self.dataTableContent) > self.realtime_window_size: # always pop the first element till window size is what we want while len(self.dataTableContent) > realtime_window_size: self.dataTableContent.pop(0) """ To Do : Do we need to figure out the how many granules have been received for a replay stream ?? if not self.realtime_flag: # This is the historical view part. Make a note of how many records were received in_data_stream_id = self.incoming_stream_def.data_stream_id element_count_id = self.incoming_stream_def.identifiables[in_data_stream_id].element_count_id # From each granule you can check the constraint on the number of records expected_range = granule.identifiables[element_count_id].constraint.intervals[0] # The number of records in a given packet is: self.total_num_of_records_recvd += packet.identifiables[element_count_id].value """ # define an output container of data # submit the partial datatable to the viz service rdt = RecordDictionaryTool(taxonomy=tx) # submit resulting table back using the out stream publisher. The data_product_id is the input dp_id # responsible for the incoming data msg = {"viz_product_type": "google_realtime_dt", "data_product_id": "FAKE_DATAPRODUCT_ID_0000", "data_table_description": self.dataDescription, "data_table_content": self.dataTableContent} rdt['google_dt_components'] = numpy.array([msg]) log.debug('Google DT transform: Sending a granule') out_granule = build_granule(data_producer_id='google_dt_transform', taxonomy=tx, record_dictionary=rdt) #self.publish(out_granule) # clear the tuple for future use self.varTuple[:] = [] return out_granule
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds=get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0,1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice,str): t_slice=eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') tx_yml = get_safe(config, 'taxonomy') ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool cnt = cls._calc_iter_cnt(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x*max_rec:(x+1)*max_rec] # Make a 'master' RecDict rdt = RecordDictionaryTool(taxonomy=ttool) # Make a 'coordinate' RecDict rdt_c = RecordDictionaryTool(taxonomy=ttool) # Make a 'data' RecDict rdt_d = RecordDictionaryTool(taxonomy=ttool) # Assign values to the coordinate RecDict rdt_c[x_vname] = lon rdt_c[y_vname] = lat rdt_c[z_vname] = z # Assign values to the data RecDict rdt_d[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x*max_rec:(x+1)*max_rec] rdt_d[key] = d # Add the coordinate and data RecDicts to the master RecDict rdt['coords'] = rdt_c rdt['data'] = rdt_d # Build and return a granule # CBM: ttool must be passed g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) yield g ds.close()
class RecordDictionaryToolTestCase(unittest.TestCase): def setUp(self): self._tx = TaxyTool() self._tx.add_taxonomy_set('temp', 'long_temp_name') self._tx.add_taxonomy_set('cond', 'long_cond_name') self._tx.add_taxonomy_set('pres', 'long_pres_name') self._tx.add_taxonomy_set('rdt') self._tx.add_taxonomy_set('rdt2') # map is {<local name>: <granule name or path>} self._rdt = RecordDictionaryTool(taxonomy=self._tx) def test_init(self): # initialize with a taxonomy tool rdt = RecordDictionaryTool(taxonomy=self._tx) self.assertIsInstance(rdt._tx, TaxyTool) # initialize with a taxonomy object rdt = RecordDictionaryTool(taxonomy=self._tx._t) self.assertIsInstance(rdt._tx, TaxyTool) # initialize with pooo self.assertRaises(TypeError, RecordDictionaryTool, ['foo', 'barr']) # initialize with a valid shape rdt = RecordDictionaryTool(taxonomy=self._tx, shape=(5,2)) self.assertEquals(rdt._shp, (5,2)) rdt = RecordDictionaryTool(taxonomy=self._tx, shape=(5,)) self.assertEquals(rdt._shp, (5,)) rdt = RecordDictionaryTool(taxonomy=self._tx, shape=5) self.assertEquals(rdt._shp, (5,)) # initialize with no length rdt = RecordDictionaryTool(taxonomy=self._tx) self.assertEquals(rdt._shp, None) # initialize with pooo self.assertRaises(TypeError, RecordDictionaryTool, self._tx, 'not an int') def test_set_and_get(self): #make sure you can set and get items in the granule by name in the taxonomy temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self.assertRaises(KeyError, self._rdt.__setitem__, 'long_temp_name',temp_array) self.assertRaises(KeyError, self._rdt.__setitem__, 'nonsense',temp_array) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array self.assertTrue(numpy.allclose(self._rdt['temp'], temp_array)) self.assertTrue(numpy.allclose(self._rdt['cond'], cond_array)) self.assertTrue(numpy.allclose(self._rdt['pres'], pres_array)) #want to check to make sure a KeyError is raised when a non-nickname key is used, but it's not working correctly self.assertRaises(KeyError, self._rdt.__getitem__, 'long_temp_name') self.assertRaises(KeyError, self._rdt.__getitem__,'nonsense!') taxy_tool_obj =self._tx rdt = RecordDictionaryTool(taxonomy=taxy_tool_obj) rdt['temp'] = temp_array self._rdt['rdt'] = rdt # Now test when the Record Dictionary Tool is created with the Taxonomy object rather than the TaxyTool # This can fail if the == method for TaxyTool is implemented incorrectly taxonomy_ion_obj = self._tx._t rdt2 = RecordDictionaryTool(taxonomy=taxonomy_ion_obj) rdt2['temp'] = temp_array self._rdt['rdt2'] = rdt2 # Now test bad values... list not numpy array... with self.assertRaises(TypeError) as te: rdt2['temp'] = [1,2,3] self.assertEquals( te.exception.message, '''Invalid type "<type 'list'>" in Record Dictionary Tool setitem with name "temp". Valid types are numpy.ndarray and RecordDictionaryTool''' ) # Now test numpy scalar array... with self.assertRaises(TypeError) as te: rdt2['temp'] = numpy.float32(3.14159) self.assertEquals( te.exception.message, '''Invalid type "<type 'numpy.float32'>" in Record Dictionary Tool setitem with name "temp". Valid types are numpy.ndarray and RecordDictionaryTool''' ) # Now test rank zero array... with self.assertRaises(ValueError) as te: rdt2['temp'] = numpy.array(22.5) self.assertEquals( te.exception.message, '''The rank of a value sequence array in a record dictionary must be greater than zero. Got name "temp" with rank "0"''' ) # Test set invalid shape pres_array = numpy.random.standard_normal(90) with self.assertRaises(ValueError) as te: rdt2['pres'] = pres_array self.assertEquals( te.exception.message, '''Invalid array shape "(90,)" for name "pres"; Record dictionary defined shape is "(100,)"''' ) # make a new RDT for testing higher rank objects... taxy_tool_obj =self._tx rdt = RecordDictionaryTool(taxonomy=taxy_tool_obj) # Now test rank 2 array... rdt['temp'] = numpy.array([[22.5,],]) self.assertTrue((rdt['temp'] == numpy.array([[22.5,],])).all()) # Now test rank 2 array... rdt['cond'] = numpy.array([[28.5,],]) self.assertTrue((rdt['cond'] == numpy.array([[28.5,],])).all()) def test_iteration(self): #Test all four iteration methods for items in the granule temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array for k, v in self._rdt.iteritems(): if k == 'temp': self.assertTrue(numpy.allclose(temp_array, v)) elif k == 'cond': self.assertTrue(numpy.allclose(cond_array, v)) elif k == 'pres': self.assertTrue(numpy.allclose(pres_array, v)) else: self.assertTrue(False) for k in self._rdt.iterkeys(): self.assertTrue(k == 'temp' or k == 'cond' or k == 'pres') for v in self._rdt.itervalues(): self.assertTrue(numpy.allclose(temp_array, v) or numpy.allclose(cond_array, v) or numpy.allclose(pres_array, v)) for k in self._rdt: self.assertTrue(k == 'temp' or k == 'cond' or k == 'pres') def test_update(self): # Update this granule with the content of another. Assert that the taxonomies are the same... pres_array = numpy.random.standard_normal(100) self._rdt['pres'] = pres_array rdt2 = RecordDictionaryTool(taxonomy=self._tx) temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) rdt2['temp'] = temp_array rdt2['cond'] = cond_array self._rdt.update(rdt2) self.assertIn('pres', self._rdt) self.assertIn('temp', self._rdt) self.assertIn('cond', self._rdt) self.assertTrue((self._rdt['pres'] == pres_array).all()) self.assertTrue((self._rdt['cond'] == cond_array).all()) self.assertTrue((self._rdt['temp'] == temp_array).all()) self.assertEquals(len(self._rdt), 3) def test_len(self): temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array self.assertEquals(len(self._rdt), 3) def test_repr(self): # Come up with a reasonable string representation of the granule for debug purposes only temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array self.assertTrue(len(repr(self._rdt)) > 0) def test_delete(self): temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array self.assertIn('pres', self._rdt) self.assertIn('temp', self._rdt) self.assertIn('cond', self._rdt) del self._rdt['pres'] self.assertNotIn('pres', self._rdt) self.assertIn('temp', self._rdt) self.assertIn('cond', self._rdt) def test_contains(self): # foobar isn't even in the taxonomy! self.assertNotIn('foobar', self._rdt) # Temp is in the taxonomy but not the record dictionary self.assertNotIn('temp', self._rdt) # Now put in some data and make sure it works... temp_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self.assertIn('temp', self._rdt) def test_pretty_print(self): temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array rdt = RecordDictionaryTool(taxonomy=self._tx) rdt['rdt'] = temp_array self._rdt['rdt'] = rdt self.assertGreater(len(self._rdt.pretty_print()), 0)
def message_received(granule, h): rdt = RecordDictionaryTool.load_from_granule(granule) log.warn('Logging Record Dictionary received in logger subscription \n%s', rdt.pretty_print())
def _process_visualization_message(self, messages): gdt_description = None gdt_content = [] viz_product_type = '' for message in messages: message_data = message.body if isinstance(message_data,Granule): tx = TaxyTool.load_from_granule(message_data) rdt = RecordDictionaryTool.load_from_granule(message_data) gdt_components = get_safe(rdt, 'google_dt_components') # IF this granule does not contain google dt, skip if gdt_components is None: continue gdt_component = gdt_components[0] viz_product_type = gdt_component['viz_product_type'] # Process Google DataTable messages if viz_product_type == 'google_dt': # If the data description is being put together for the first time, # switch the time format from float to datetime if (gdt_description == None): temp_gdt_description = gdt_component['data_description'] gdt_description = [('time', 'datetime', 'time')] for idx in range(1,len(temp_gdt_description)): # for some weird reason need to force convert to tuples temp_arr = temp_gdt_description[idx] if temp_arr != None: gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2])) # append all content to one big array temp_gdt_content = gdt_component['data_content'] for tempTuple in temp_gdt_content: # sometimes there are inexplicable empty tuples in the content. Drop them if tempTuple == [] or len(tempTuple) == 0: continue varTuple = [] varTuple.append(datetime.fromtimestamp(tempTuple[0])) for idx in range(1,len(tempTuple)): varTuple.append(tempTuple[idx]) gdt_content.append(varTuple) #TODO - what to do if this is not a valid visualization message? # Now that all the messages have been parsed, any last processing should be done here if viz_product_type == "google_dt": # Using the description and content, build the google data table gdt = gviz_api.DataTable(gdt_description) gdt.LoadData(gdt_content) return gdt.ToJSonResponse() return None
def test_set_and_get(self): #make sure you can set and get items in the granule by name in the taxonomy temp_array = numpy.random.standard_normal(100) cond_array = numpy.random.standard_normal(100) pres_array = numpy.random.standard_normal(100) self.assertRaises(KeyError, self._rdt.__setitem__, 'long_temp_name',temp_array) self.assertRaises(KeyError, self._rdt.__setitem__, 'nonsense',temp_array) self._rdt['temp'] = temp_array self._rdt['cond'] = cond_array self._rdt['pres'] = pres_array self.assertTrue(numpy.allclose(self._rdt['temp'], temp_array)) self.assertTrue(numpy.allclose(self._rdt['cond'], cond_array)) self.assertTrue(numpy.allclose(self._rdt['pres'], pres_array)) #want to check to make sure a KeyError is raised when a non-nickname key is used, but it's not working correctly self.assertRaises(KeyError, self._rdt.__getitem__, 'long_temp_name') self.assertRaises(KeyError, self._rdt.__getitem__,'nonsense!') taxy_tool_obj =self._tx rdt = RecordDictionaryTool(taxonomy=taxy_tool_obj) rdt['temp'] = temp_array self._rdt['rdt'] = rdt # Now test when the Record Dictionary Tool is created with the Taxonomy object rather than the TaxyTool # This can fail if the == method for TaxyTool is implemented incorrectly taxonomy_ion_obj = self._tx._t rdt2 = RecordDictionaryTool(taxonomy=taxonomy_ion_obj) rdt2['temp'] = temp_array self._rdt['rdt2'] = rdt2 # Now test bad values... list not numpy array... with self.assertRaises(TypeError) as te: rdt2['temp'] = [1,2,3] self.assertEquals( te.exception.message, '''Invalid type "<type 'list'>" in Record Dictionary Tool setitem with name "temp". Valid types are numpy.ndarray and RecordDictionaryTool''' ) # Now test numpy scalar array... with self.assertRaises(TypeError) as te: rdt2['temp'] = numpy.float32(3.14159) self.assertEquals( te.exception.message, '''Invalid type "<type 'numpy.float32'>" in Record Dictionary Tool setitem with name "temp". Valid types are numpy.ndarray and RecordDictionaryTool''' ) # Now test rank zero array... with self.assertRaises(ValueError) as te: rdt2['temp'] = numpy.array(22.5) self.assertEquals( te.exception.message, '''The rank of a value sequence array in a record dictionary must be greater than zero. Got name "temp" with rank "0"''' ) # Test set invalid shape pres_array = numpy.random.standard_normal(90) with self.assertRaises(ValueError) as te: rdt2['pres'] = pres_array self.assertEquals( te.exception.message, '''Invalid array shape "(90,)" for name "pres"; Record dictionary defined shape is "(100,)"''' ) # make a new RDT for testing higher rank objects... taxy_tool_obj =self._tx rdt = RecordDictionaryTool(taxonomy=taxy_tool_obj) # Now test rank 2 array... rdt['temp'] = numpy.array([[22.5,],]) self.assertTrue((rdt['temp'] == numpy.array([[22.5,],])).all()) # Now test rank 2 array... rdt['cond'] = numpy.array([[28.5,],]) self.assertTrue((rdt['cond'] == numpy.array([[28.5,],])).all())
def get_visualization_data(self, data_product_id='', visualization_parameters={}, callback=''): """Retrieves the data for the specified DP and sends a token back which can be checked in a non-blocking fashion till data is ready @param data_product_id str @param visualization_parameters str @param callback str @retval jsonp_visualization_data str @throws NotFound object with specified id, query does not exist """ # error check if not data_product_id: raise BadRequest("The data_product_id parameter is missing") gdt = None query = None if visualization_parameters: if visualization_parameters.has_key('query'): query=visualization_parameters['query'] # get the dataset_id associated with the data_product. Need it to do the data retrieval ds_ids,_ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, True) if ds_ids == None or len(ds_ids) == 0: return None # Ideally just need the latest granule to figure out the list of images #replay_granule = self.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2}) retrieved_granule = self.data_retriever.retrieve(ds_ids[0]) if retrieved_granule == None: return None # send the granule through the transform to get the google datatable gdt_transform = VizTransformGoogleDT() gdt_data_granule = gdt_transform.execute(retrieved_granule) # send the granule through the transform to get the google datatable #gdt_data_granule = self.data_retriever.retrieve(ds_ids[0], module="ion.processes.data.transforms.viz.google_dt", cls="VizTransformGoogleDT") #if not gdt_data_granule: # return None gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule) gdt_components = get_safe(gdt_rdt, "google_dt_components") temp_gdt_description = gdt_components[0]["data_description"] temp_gdt_content = gdt_components[0]["data_content"] # adjust the 'float' time to datetime in the content gdt_description = [('time', 'datetime', 'time')] gdt_content = [] for idx in range(1,len(temp_gdt_description)): gdt_description.append(temp_gdt_description[idx]) for tempTuple in temp_gdt_content: # sometimes there are inexplicable empty tuples in the content. Drop them if tempTuple == [] or len(tempTuple) == 0: continue varTuple = [] varTuple.append(datetime.fromtimestamp(tempTuple[0])) for idx in range(1,len(tempTuple)): varTuple.append(tempTuple[idx]) gdt_content.append(varTuple) # now generate the Google datatable out of the description and content gdt = gviz_api.DataTable(gdt_description) gdt.LoadData(gdt_content) # return the json version of the table if callback == '': return gdt.ToJSonResponse() else: return callback + "(\"" + gdt.ToJSonResponse() + "\")"