def _get_data(cls, config): new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) cnt = calculate_iteration_count(len(parser.sensor_map), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(param_dictionary=pdict) for name in parser.sensor_map: d = parser.data_map[name][x*max_rec:(x+1)*max_rec] rdt[name]=d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g except SlocumParseException as spe: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds = get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice, str): t_slice = eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x * max_rec:(x + 1) * max_rec] # Make a 'master' RecDict rdt = RecordDictionaryTool(stream_definition_id=stream_def) # Assign coordinate values to the RecDict rdt[x_vname] = lon rdt[y_vname] = lat rdt[z_vname] = z # Assign data values to the RecDict rdt[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x * max_rec:(x + 1) * max_rec] rdt[key] = d g = rdt.to_granule() yield g ds.close()
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count( len(parser.data_map[parser.data_map.keys()[0]]), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(stream_definition_id=stream_def) for name in parser.sensor_map: d = parser.data_map[name][x * max_rec:(x + 1) * max_rec] rdt[name] = d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = rdt.to_granule() yield g except SlocumParseException: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE) for f in new_flst: try: parser = SlocumParser(f[0], hdr_cnt) #CBM: Not in use yet... # ext_dset_res = get_safe(config, 'external_dataset_res', None) # t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] # x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] # y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] # z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] # var_lst = ext_dset_res.dataset_description.parameters['variables'] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(len(parser.data_map[parser.data_map.keys()[0]]), max_rec) for x in xrange(cnt): #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(stream_definition_id=stream_def) for name in parser.sensor_map: d = parser.data_map[name][x * max_rec:(x + 1) * max_rec] rdt[name] = d #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = rdt.to_granule() yield g except SlocumParseException: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file: \'{0}\''.format(f))
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ array_len = get_safe(config, 'constraints.array_len',1) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) arr = npr.random_sample(array_len) log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr)) cnt = calculate_iteration_count(arr.size, max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(param_dictionary=pdict) d = arr[x*max_rec:(x+1)*max_rec] rdt['dummy'] = d g = rdt.to_granule() yield g
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ array_len = get_safe(config, 'constraints.array_len', 1) max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id') stream_def = get_safe(config, 'stream_def') arr = npr.random_sample(array_len) #log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr)) cnt = calculate_iteration_count(arr.size, max_rec) for x in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) d = arr[x * max_rec:(x + 1) * max_rec] rdt['dummy'] = d g = rdt.to_granule() yield g
def _get_data(cls, config): """ A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers @param config Dict of configuration parameters - must contain ['constraints']['count'] """ cnt = get_safe(config,'constraints.count',1) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) def fibGenerator(): """ A Fibonacci sequence generator """ count = 0 ret = [] a, b = 1, 1 while 1: count += 1 ret.append(a) if count == max_rec: yield np.array(ret) ret=[] count = 0 a, b = b, a + b gen=fibGenerator() cnt = calculate_iteration_count(cnt, max_rec) for i in xrange(cnt): rdt = RecordDictionaryTool(param_dictionary=pdict) d = gen.next() rdt['data'] = d g = rdt.to_granule() yield g
def _get_data(cls, config): """ A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers @param config Dict of configuration parameters - must contain ['constraints']['count'] """ cnt = get_safe(config, 'constraints.count', 1) max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id') stream_def = get_safe(config, 'stream_def') def fibGenerator(): """ A Fibonacci sequence generator """ count = 0 ret = [] a, b = 1, 1 while 1: count += 1 ret.append(a) if count == max_rec: yield np.array(ret) ret = [] count = 0 a, b = b, a + b gen = fibGenerator() cnt = calculate_iteration_count(cnt, max_rec) for i in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) d = gen.next() rdt['data'] = d g = rdt.to_granule() yield g
def _get_data(cls, config): """ A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers @param config Dict of configuration parameters - must contain ['constraints']['count'] """ cnt = get_safe(config, "constraints.count", 1) max_rec = get_safe(config, "max_records", 1) # dprod_id = get_safe(config, 'data_producer_id') stream_def = get_safe(config, "stream_def") def fibGenerator(): """ A Fibonacci sequence generator """ count = 0 ret = [] a, b = 1, 1 while 1: count += 1 ret.append(a) if count == max_rec: yield np.array(ret) ret = [] count = 0 a, b = b, a + b gen = fibGenerator() cnt = calculate_iteration_count(cnt, max_rec) for i in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) d = gen.next() rdt["data"] = d g = rdt.to_granule() yield g
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) parser_mod = get_safe(config, 'parser_mod', '') parser_cls = get_safe(config, 'parser_cls', '') module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: try: #find the new data check index in config index = -1 for ndc in config['set_new_data_check']: if ndc[0] == f[0]: index = config['set_new_data_check'].index(ndc) break except: log.error('File name not found in attachment') parser = classobj(f[0], f[3]) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(parser.record_count, max_rec) file_pos = -1 for x in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) all_data = {} all_data.clear() for name in parser.sensor_names: all_data[name] = [] for y in xrange(max_rec): data_map, file_pos = parser.read_next_data() if len(data_map.items()): for name in parser.sensor_names: all_data[name].append(data_map[name]) #[x * max_rec:(x + 1) * max_rec] for name in parser.sensor_names: rdt[name] = all_data[name] g = rdt.to_granule() #update new data check with the latest file position if 'set_new_data_check' in config and index > -1: config['set_new_data_check'][index] = (f[0], f[1], f[2], file_pos) yield g parser.close() except HYPMException as ex: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds=get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0,1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice,str): t_slice=eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') #tx_yml = get_safe(config, 'taxonomy') #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary')) cnt = calculate_iteration_count(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x*max_rec:(x+1)*max_rec] # Make a 'master' RecDict #rdt = RecordDictionaryTool(taxonomy=ttool) rdt = RecordDictionaryTool(param_dictionary=pdict) # Make a 'coordinate' RecDict #rdt_c = RecordDictionaryTool(taxonomy=ttool) #rdt_c = RecordDictionaryTool(param_dictionary=pdict) # Make a 'data' RecDict #rdt_d = RecordDictionaryTool(taxonomy=ttool) #rdt_d = RecordDictionaryTool(param_dictionary=pdict) # Assign values to the coordinate RecDict rdt[x_vname] = lon rdt[y_vname] = lat rdt[z_vname] = z # Assign values to the data RecDict rdt[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x*max_rec:(x+1)*max_rec] rdt[key] = d # Add the coordinate and data RecDicts to the master RecDict #rdt['coords'] = rdt_c #rdt['data'] = rdt_d # Build and return a granule # CBM: ttool must be passed #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt) g = build_granule(data_producer_id=dprod_id, record_dictionary=rdt, param_dictionary=pdict) yield g ds.close()
def test_calculate_iteration_count_not_even(self): total_recs = 101 max_rec = 10 self.assertEqual(calculate_iteration_count(total_recs=total_recs, max_rec=max_rec), 11)
def test_calculate_iteration_count(self): total_recs = 100 max_rec = 10 self.assertEqual(calculate_iteration_count(total_recs=total_recs, max_rec=max_rec), 10)
def _get_data(cls, config): """ Iterable function that acquires data from a source iteratively based on constraints provided by config Passed into BaseDataHandler._publish_data and iterated to publish samples. @param config dict containing configuration parameters, may include constraints, formatters, etc @retval an iterable that returns well-formed Granule objects on each iteration """ new_flst = get_safe(config, 'constraints.new_files', []) parser_mod = get_safe(config, 'parser_mod', '') parser_cls = get_safe(config, 'parser_cls', '') module = __import__(parser_mod, fromlist=[parser_cls]) classobj = getattr(module, parser_cls) for f in new_flst: try: try: #find the new data check index in config index = -1 for ndc in config['set_new_data_check']: if ndc[0] == f[0]: index = config['set_new_data_check'].index(ndc) break except: log.error('File name not found in attachment') parser = classobj(f[0], f[3]) max_rec = get_safe(config, 'max_records', 1) dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(parser.record_count, max_rec) file_pos = -1 for x in xrange(cnt): rdt = RecordDictionaryTool(stream_definition_id=stream_def) all_data = {} all_data.clear() for name in parser.sensor_names: all_data[name] = [] for y in xrange(max_rec): data_map, file_pos = parser.read_next_data() if len(data_map.items()): for name in parser.sensor_names: all_data[name].append( data_map[name] ) #[x * max_rec:(x + 1) * max_rec] for name in parser.sensor_names: try: rdt[name] = all_data[name] except Exception: log.error('failed to set rdt[%s], all_data=%r', name, all_data) raise g = rdt.to_granule() #update new data check with the latest file position if 'set_new_data_check' in config and index > -1: config['set_new_data_check'][index] = (f[0], f[1], f[2], file_pos) yield g parser.close() except HYPMException as ex: # TODO: Decide what to do here, raise an exception or carry on log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
def _get_data(cls, config): """ Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len'] @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count'] """ ext_dset_res = get_safe(config, 'external_dataset_res', None) # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle) ds = get_safe(config, 'dataset_object') if ext_dset_res and ds: t_vname = ext_dset_res.dataset_description.parameters[ 'temporal_dimension'] x_vname = ext_dset_res.dataset_description.parameters[ 'zonal_dimension'] y_vname = ext_dset_res.dataset_description.parameters[ 'meridional_dimension'] z_vname = ext_dset_res.dataset_description.parameters[ 'vertical_dimension'] var_lst = ext_dset_res.dataset_description.parameters['variables'] t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1))) #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints if isinstance(t_slice, str): t_slice = eval(t_slice) lon = ds.variables[x_vname][:] lat = ds.variables[y_vname][:] z = ds.variables[z_vname][:] t_arr = ds.variables[t_vname][t_slice] data_arrays = {} for varn in var_lst: data_arrays[varn] = ds.variables[varn][t_slice] max_rec = get_safe(config, 'max_records', 1) #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer') stream_def = get_safe(config, 'stream_def') cnt = calculate_iteration_count(t_arr.size, max_rec) for x in xrange(cnt): ta = t_arr[x * max_rec:(x + 1) * max_rec] # Make a 'master' RecDict rdt = RecordDictionaryTool(stream_definition_id=stream_def) # Assign coordinate values to the RecDict rdt[x_vname] = lon rdt[y_vname] = lat rdt[z_vname] = z # Assign data values to the RecDict rdt[t_vname] = ta for key, arr in data_arrays.iteritems(): d = arr[x * max_rec:(x + 1) * max_rec] rdt[key] = d g = rdt.to_granule() yield g ds.close()