if has_frames: retrieval_index[frame_idx, snd_idx] = ret_idx+1 frame_index[ret_idx] = frame_idx + 1 else: retrieval_index[snd_idx] = ret_idx + 1 sounding_index[ret_idx] = snd_idx + 1 out_retrieval_index_ds = splice_file.create_dataset(RET_INDEX_DS, data=retrieval_index) copy_attrs(l1b_sounding_ids, out_retrieval_index_ds) if has_frames: out_frame_index_ds = splice_file.create_dataset(OUT_FRAME_INDEX_DS, data=frame_index) copy_attrs(ret_sounding_ids, out_frame_index_ds) out_sounding_index_ds = splice_file.create_dataset(OUT_SOUNDING_INDEX_DS, data=sounding_index) copy_attrs(ret_sounding_ids, out_sounding_index_ds) else: print("Dataset %s already exists in %s" % (RET_INDEX_DS, args.splice_filename), file=sys.stderr) # Add datasets that may be missing due to them being specific to, say a ground type, but the aggregator never # encountered any soundings that had the dataset for ds_name, ds_type in ADD_EMPTY_DATASETS: if splice_file.get(ds_name, None) is None: new_ds = splice_file.create_dataset(ds_name, data=numpy.empty(ret_sounding_ids.shape, dtype=ds_type)) new_ds[:] = FILL_VALUE.get(ds_type, FILL_VALUE[float]) l1b_file.close() splice_file.close()
out_retrieval_index_ds = splice_file.create_dataset(RET_INDEX_DS, data=retrieval_index) copy_attrs(l1b_sounding_ids, out_retrieval_index_ds) if has_frames: out_frame_index_ds = splice_file.create_dataset(OUT_FRAME_INDEX_DS, data=frame_index) copy_attrs(ret_sounding_ids, out_frame_index_ds) out_sounding_index_ds = splice_file.create_dataset(OUT_SOUNDING_INDEX_DS, data=sounding_index) copy_attrs(ret_sounding_ids, out_sounding_index_ds) else: print("Dataset %s already exists in %s" % (RET_INDEX_DS, args.splice_filename), file=sys.stderr) # Add datasets that may be missing due to them being specific to, say a ground type, but the aggregator never # encountered any soundings that had the dataset for ds_name, ds_type in ADD_EMPTY_DATASETS: if splice_file.get(ds_name, None) is None: new_ds = splice_file.create_dataset(ds_name, data=numpy.empty( ret_sounding_ids.shape, dtype=ds_type)) new_ds[:] = FILL_VALUE.get(ds_type, FILL_VALUE[float]) l1b_file.close() splice_file.close()
def create_output_dataset(self, dataset_info, splice_size=None): """Duplicates a dataset from the input file into the output hdf object as it exists except for its dimensions""" self.logger.debug("Creating new output dataset: %s" % dataset_info.out_name) dst_shape, max_shape, dst_shape_names = dataset_info.output_dataset_shape(splice_size, self.multi_source_types) # Split name into two and create a group if needed # then create the desire dataset or load existing group ds_name_clean = dataset_info.out_name.lstrip('/').rstrip('/') if ds_name_clean.find("/") > 0: dst_group, dst_name = dataset_info.out_name.lstrip('/').rstrip('/').split('/', 1) out_group_obj = self.dest_obj.require_group(dst_group) else: dst_group = "" dst_name = ds_name_clean out_group_obj = self.dest_obj # Fill new dataset with the correct fill value based on type if dataset_info.out_type != numpy.object and dataset_info.out_type.type != numpy.string_: fill_type = dataset_info.out_type.type if FILL_VALUE.has_key(fill_type): dataset_fill = FILL_VALUE[fill_type] else: self.logger.warning("Could not find specific fill value for dataset: %s of type %s" % (dst_name, fill_type)) dataset_fill = None else: # Use default fill for string types fill_type = None dataset_fill = None self.logger.debug( "Creating new dataset: %s/%s sized: %s with fill type: %s and value: %s" % (dst_group, dst_name, dst_shape, fill_type, dataset_fill) ) try: out_dataset_obj = out_group_obj.create_dataset(dst_name, shape=dst_shape, dtype=dataset_info.out_type, maxshape=max_shape, compression="gzip", compression_opts=2, fillvalue=dataset_fill) except RuntimeError as exc: raise RuntimeError("Error creating dataset %s/%s: %s" % (dst_group/dst_name, exc)) # Now create copied attributes from original dataset # Just copy from first for now, leave code to do multiple if needed for curr_file in dataset_info.inp_filenames[0:1]: with closing(h5py.File(curr_file, 'r')) as curr_hdf_obj: curr_dataset_obj = curr_hdf_obj[dataset_info.inp_name] for attr_name, attr_value in curr_dataset_obj.attrs.items(): # Skip if copied already from a file, assuming all files # have same attributes for now, we just will get # all uniquely named ones if attr_name in out_dataset_obj.attrs.keys(): continue # If the dtype of the attribute is an object dtype, then assume # its a variable length string if hasattr(attr_value, "dtype") and attr_value.dtype.kind == "O": self.logger.debug('Copying variable length string attribute: "%s" with value: "%s"' % (attr_name, attr_value[0])) vlen_dt = h5py.special_dtype(vlen=str) out_dataset_obj.attrs.create(attr_name, attr_value, dtype=vlen_dt) else: self.logger.debug('Copying attribute: "%s" with value: "%s"' % (attr_name, attr_value)) out_dataset_obj.attrs.create(attr_name, attr_value) # Add extra information for dataset, overwrite an existing shape, because we may have # reshaped the data if dst_shape_names: out_dataset_obj.attrs["Shape"] = numpy.array(["_".join(dst_shape_names) + "_Array"]) return out_dataset_obj