if has_frames:
                retrieval_index[frame_idx, snd_idx] = ret_idx+1
                frame_index[ret_idx] = frame_idx + 1
            else:
                retrieval_index[snd_idx] = ret_idx + 1

            sounding_index[ret_idx] = snd_idx + 1

    out_retrieval_index_ds = splice_file.create_dataset(RET_INDEX_DS, data=retrieval_index)
    copy_attrs(l1b_sounding_ids, out_retrieval_index_ds)

    if has_frames:
        out_frame_index_ds = splice_file.create_dataset(OUT_FRAME_INDEX_DS, data=frame_index)
        copy_attrs(ret_sounding_ids, out_frame_index_ds)

    out_sounding_index_ds = splice_file.create_dataset(OUT_SOUNDING_INDEX_DS, data=sounding_index)
    copy_attrs(ret_sounding_ids, out_sounding_index_ds)
else:
    print("Dataset %s already exists in %s" % (RET_INDEX_DS, args.splice_filename), file=sys.stderr)

# Add datasets that may be missing due to them being specific to, say a ground type, but the aggregator never
# encountered any soundings that had the dataset
for ds_name, ds_type in ADD_EMPTY_DATASETS:
    if splice_file.get(ds_name, None) is None:
        new_ds = splice_file.create_dataset(ds_name, data=numpy.empty(ret_sounding_ids.shape, dtype=ds_type))
        new_ds[:] = FILL_VALUE.get(ds_type, FILL_VALUE[float])

l1b_file.close()
splice_file.close()
Пример #2
0
    out_retrieval_index_ds = splice_file.create_dataset(RET_INDEX_DS,
                                                        data=retrieval_index)
    copy_attrs(l1b_sounding_ids, out_retrieval_index_ds)

    if has_frames:
        out_frame_index_ds = splice_file.create_dataset(OUT_FRAME_INDEX_DS,
                                                        data=frame_index)
        copy_attrs(ret_sounding_ids, out_frame_index_ds)

    out_sounding_index_ds = splice_file.create_dataset(OUT_SOUNDING_INDEX_DS,
                                                       data=sounding_index)
    copy_attrs(ret_sounding_ids, out_sounding_index_ds)
else:
    print("Dataset %s already exists in %s" %
          (RET_INDEX_DS, args.splice_filename),
          file=sys.stderr)

# Add datasets that may be missing due to them being specific to, say a ground type, but the aggregator never
# encountered any soundings that had the dataset
for ds_name, ds_type in ADD_EMPTY_DATASETS:
    if splice_file.get(ds_name, None) is None:
        new_ds = splice_file.create_dataset(ds_name,
                                            data=numpy.empty(
                                                ret_sounding_ids.shape,
                                                dtype=ds_type))
        new_ds[:] = FILL_VALUE.get(ds_type, FILL_VALUE[float])

l1b_file.close()
splice_file.close()
    def create_output_dataset(self, dataset_info, splice_size=None):
        """Duplicates a dataset from the input file into the output hdf object as it exists
        except for its dimensions"""

        self.logger.debug("Creating new output dataset: %s" % dataset_info.out_name)

        dst_shape, max_shape, dst_shape_names = dataset_info.output_dataset_shape(splice_size, self.multi_source_types) 

        # Split name into two and create a group if needed
        # then create the desire dataset or load existing group
        ds_name_clean = dataset_info.out_name.lstrip('/').rstrip('/')
        if ds_name_clean.find("/") > 0:
            dst_group, dst_name = dataset_info.out_name.lstrip('/').rstrip('/').split('/', 1)
            out_group_obj = self.dest_obj.require_group(dst_group) 
        else:
            dst_group = ""
            dst_name = ds_name_clean
            out_group_obj = self.dest_obj

        # Fill new dataset with the correct fill value based on type
        if dataset_info.out_type != numpy.object and dataset_info.out_type.type != numpy.string_:
            fill_type = dataset_info.out_type.type

            if FILL_VALUE.has_key(fill_type):
                dataset_fill = FILL_VALUE[fill_type]
            else:
                self.logger.warning("Could not find specific fill value for dataset: %s of type %s" % (dst_name, fill_type))
                dataset_fill = None
        else:
            # Use default fill for string types
            fill_type = None
            dataset_fill = None

        self.logger.debug( "Creating new dataset: %s/%s sized: %s with fill type: %s and value: %s" % (dst_group, dst_name, dst_shape, fill_type, dataset_fill) )
        try:
            out_dataset_obj = out_group_obj.create_dataset(dst_name, shape=dst_shape, dtype=dataset_info.out_type, maxshape=max_shape, compression="gzip", compression_opts=2, fillvalue=dataset_fill)
        except RuntimeError as exc:
            raise RuntimeError("Error creating dataset %s/%s: %s" % (dst_group/dst_name, exc))

        # Now create copied attributes from original dataset
        # Just copy from first for now, leave code to do multiple if needed
        for curr_file in dataset_info.inp_filenames[0:1]:
            with closing(h5py.File(curr_file, 'r')) as curr_hdf_obj:
                curr_dataset_obj = curr_hdf_obj[dataset_info.inp_name]
                for attr_name, attr_value in curr_dataset_obj.attrs.items():
                    # Skip if copied already from a file, assuming all files
                    # have same attributes for now, we just will get
                    # all uniquely named ones
                    if attr_name in out_dataset_obj.attrs.keys():
                        continue
            
                    # If the dtype of the attribute is an object dtype, then assume
                    # its a variable length string
                    if hasattr(attr_value, "dtype") and attr_value.dtype.kind == "O":
                        self.logger.debug('Copying variable length string attribute: "%s" with value: "%s"' % (attr_name, attr_value[0]))
                        vlen_dt = h5py.special_dtype(vlen=str)
                        out_dataset_obj.attrs.create(attr_name, attr_value, dtype=vlen_dt)
                    else:
                        self.logger.debug('Copying attribute: "%s" with value: "%s"' % (attr_name, attr_value))
                        out_dataset_obj.attrs.create(attr_name, attr_value)

        # Add extra information for dataset, overwrite an existing shape, because we may have
        # reshaped the data
        if dst_shape_names:
            out_dataset_obj.attrs["Shape"] = numpy.array(["_".join(dst_shape_names) + "_Array"]) 

        return out_dataset_obj