def save_fields(cls, fname, flds, **kwargs): assert len(flds) > 0 fname = os.path.expanduser(os.path.expandvars(fname)) if isinstance(flds, list): if isinstance(flds[0], (list, tuple)): flds = OrderedDict(flds) else: flds = OrderedDict([(fld.name, fld) for fld in flds]) fld_dict = {} # setup crds # FIXME: all coordinates are saved as non-uniform, the proper # way to do this is to have let coordinate format its own # hdf5 / xdmf / numpy binary output fld0 = next(iter(flds.values())) clist = fld0.crds.get_clist(full_arrays=True) axis_names = [] for axis_name, crdarr in clist: fld_dict[axis_name] = crdarr axis_names.append(axis_name) fld_dict[cls._KEY_CRDS] = np.array(axis_names) # setup fields # dict comprehension invalid in Python 2.6 # fld_names = {key.lower(): [] for key in cls._KEY_FLDS.keys()} fld_names = {} for key in cls._KEY_FLDS.keys(): fld_names[key.lower()] = [] for name, fld in flds.items(): fld_names[fld.center.lower()].append(name) fld_dict[name] = fld.data for center, names_lst in fld_names.items(): fld_dict[cls._KEY_FLDS[center.lower()]] = np.array(names_lst) if fname.endswith(".npz"): fname = fname[:-4] np.savez(fname, **fld_dict)
def save_fields(cls, fname, flds, complevel=0, compression='gzip', compression_opts=None, **kwargs): """ save some fields using the format given by the class """ # FIXME: this is only good for writing cartesian rectilnear flds # FIXME: axes are renamed if flds[0] is 1D or 2D assert len(flds) > 0 fname = os.path.expanduser(os.path.expandvars(fname)) if complevel and compression == 'gzip' and compression_opts is None: compression_opts = complevel # TODO: what if compression != 'gzip' do_compression = compression_opts is not None if isinstance(flds, list): if isinstance(flds[0], (list, tuple)): flds = OrderedDict(flds) else: flds = OrderedDict([(fld.name, fld) for fld in flds]) # FIXME: all coordinates are saved as non-uniform, the proper # way to do this is to have let coordinate format its own # hdf5 / xdmf / numpy binary output fld0 = next(iter(flds.values())) clist = fld0.crds.get_clist(full_arrays=True) crd_arrs = [np.array([0.0])] * 3 crd_names = ["x", "y", "z"] for i, c in enumerate(clist): crd_arrs[i] = c[1] crd_shape = [len(arr) for arr in crd_arrs] time = fld0.time # write arrays to the hdf5 file with h5py.File(fname, 'w') as f: for axis_name, arr in zip(crd_names, crd_arrs): loc = cls._CRDS_GROUP + '/' + axis_name if do_compression: f.create_dataset(loc, data=arr, compression=compression, compression_opts=compression_opts) else: f[loc] = arr for name, fld in flds.items(): loc = cls._FLD_GROUPS[fld.center.lower()] + '/' + name # xdmf files use kji ordering if do_compression: f.create_dataset(loc, data=fld.data.T, compression=compression, compression_opts=compression_opts) else: f[loc] = fld.data.T # big bad openggcm time_str hack to put basetime into hdf5 file for fld in flds.values(): try: tfmt = "%Y:%m:%d:%H:%M:%S.%f" sec_td = viscid.as_timedelta64(fld.time, 's') dtime = viscid.as_datetime(fld.basetime + sec_td).strftime(tfmt) epoch = viscid.readers.openggcm.GGCM_EPOCH ts = viscid.as_timedelta(fld.basetime - epoch).total_seconds() ts += fld.time timestr = "time= {0} {1:.16e} {2} 300c".format(fld.time, ts, dtime) f.create_group('openggcm') f['openggcm'].attrs['time_str'] = np.string_(timestr) break except viscid.NoBasetimeError: pass # now write an xdmf file xdmf_fname = os.path.splitext(fname)[0] + ".xdmf" relh5fname = "./" + os.path.basename(fname) with open(xdmf_fname, 'w') as f: xloc = cls._CRDS_GROUP + '/' + crd_names[0] yloc = cls._CRDS_GROUP + '/' + crd_names[1] zloc = cls._CRDS_GROUP + '/' + crd_names[2] dim_str = " ".join([str(l) for l in crd_shape][::-1]) f.write(cls._XDMF_TEMPLATE_BEGIN.format(time=time)) s = cls._XDMF_TEMPLATE_RECTILINEAR_GRID_BEGIN.format( grid_name="vgrid", crd_dims=dim_str, h5fname=relh5fname, xdim=crd_shape[0], ydim=crd_shape[1], zdim=crd_shape[2], xloc=xloc, yloc=yloc, zloc=zloc) f.write(s) for fld in flds.values(): _crd_system = viscid.as_crd_system(fld, None) if _crd_system: f.write(cls._XDMF_INFO_TEMPLATE.format(name="crd_system", value=_crd_system)) break for name, fld in flds.items(): fld = fld.as_flat().T dt = fld.dtype.name.rstrip("0123456789").title() precision = fld.dtype.itemsize fld_dim_str = " ".join([str(l) for l in fld.shape]) loc = cls._FLD_GROUPS[fld.center.lower()] + '/' + name s = cls._XDMF_TEMPLATE_ATTRIBUTE.format( fld_name=name, fld_type=fld.fldtype, center=fld.center.title(), dtype=dt, precision=precision, fld_dims=fld_dim_str, h5fname=relh5fname, fld_loc=loc) f.write(s) f.write(cls._XDMF_TEMPLATE_GRID_END) f.write(cls._XDMF_TEMPLATE_END)
class Bucket(object): """ This is basically a glorified dict It's a convenient dict-like object if you want lots of keys for a given value. NOTE: You can add non-hashable items, but this is poorly tested. When adding / removing non-hashable items (items, not handles) the comparison is done using the object's id. This is fundamentally different than using an object's __hash__, but it should be fairly transparent. """ _ordered = False _ref_count = None # keys are hashable items, values are # of times item was added _hash_lookup = None # keys are hashable items, values are actual items _handles = None # keys are hashable items, values are list of handles _items = None # keys are handles, values are actual items # if index handle, set_item adds this number as a handle and increments it # this is useful for hiding loads that are not user initiated, such as # an xdmf file loading an h5 file under the covers _int_counter = None def __init__(self, ordered=False): self._ordered = ordered self._set_empty_dicts() self._int_counter = 0 def _set_empty_dicts(self): if self._ordered: self._ref_count = OrderedDict() self._hash_lookup = OrderedDict() self._handles = OrderedDict() self._items = OrderedDict() else: self._ref_count = {} self._hash_lookup = {} self._handles = {} self._items = {} @staticmethod def _make_hashable(item): try: hash(item) return item except TypeError: return "<{0} @ {1}>".format(type(item), hex(id(item))) def items(self): for hashable_item, item in self._hash_lookup.items(): yield self._handles[hashable_item], item def keys(self): return self._handles.values() def values(self): return self._hash_lookup.values() def set_item(self, handles, item, index_handle=True, _add_ref=False): """ if index_handle is true then the index of item will be included as a handle making the bucket indexable like a list """ # found = False if handles is None: handles = [] if not isinstance(handles, list): raise TypeError("handle must by of list type") # make sure we have a hashable "item" for doing reverse # lookups of handles using an item hashable_item = self._make_hashable(item) if hashable_item not in self._hash_lookup: if index_handle: handles += [self._int_counter] self._int_counter += 1 handles_added = [] for h in handles: # check if we're stealing a handle from another item try: hash(h) except TypeError: logger.error("A bucket says handle '{0}' is not hashable, " "ignoring it".format(h)) continue if (h in self._items) and (item is self._items[h]): continue elif h in self._items: logger.error("The handle '{0}' is being hijacked! Memory leak " "could ensue.".format(h)) # romove handle from old item, since this check is here, # there sholdn't be 2 items with the same handle in the # items dict old_item = self._items[h] old_hashable_item = self._make_hashable(old_item) self._handles[old_hashable_item].remove(h) if len(self._handles[old_hashable_item]) == 0: self.remove_item(old_item) self._items[h] = item handles_added.append(h) try: self._handles[hashable_item] += handles_added if _add_ref: self._ref_count[hashable_item] += 1 except KeyError: if len(handles_added) == 0: logger.error("No valid handles given, item '{0}' not added to " "bucket".format(hashable_item)) else: self._handles[hashable_item] = handles_added self._hash_lookup[hashable_item] = item self._ref_count[hashable_item] = 1 return None def _remove_item(self, item): """remove item no matter what You may want to use remove_ , raises ValueError if item is not found """ hashable_item = self._make_hashable(item) handles = self._handles[hashable_item] for h in handles: del self._items[h] del self._hash_lookup[hashable_item] del self._handles[hashable_item] del self._ref_count[hashable_item] def _remove_item_by_handle(self, handle): self._remove_item(self._items[handle]) def remove_item(self, item): self._remove_item(item) def remove_item_by_handle(self, handle): """ remove item by handle, raises KeyError if handle is not found """ self.remove_item(self._items[handle]) def remove_reference(self, item, _ref_count=1): hashable_item = self._make_hashable(item) try: self._ref_count[hashable_item] -= _ref_count except KeyError: item = self[item] hashable_item = self._make_hashable(item) if _ref_count: self._ref_count[hashable_item] -= _ref_count else: self._ref_count[hashable_item] = 0 # FIXME: unload_all_files breaks this assert check... probably a bug # assert self._ref_count[hashable_item] >= 0, \ # "problem with bucket ref counting {0}".format(hashable_item) if self._ref_count[hashable_item] <= 0: self._remove_item(item) def remove_all_items(self): """ unload all items """ self._set_empty_dicts() def items_as_list(self): return list(self._hash_lookup.values()) def get_primary_handles(self): """Return a list of the first handles for all items""" return [handles[0] for handles in self._handles.values()] def handle_string(self, prefix=""): """ return string representation of handles and items """ # this is inefficient, but probably doesn't matter s = "" for item, handles in self._handles.items(): hands = [repr(h) for h in handles] s += "{0}handles: {1}\n".format(prefix, ", ".join(hands)) s += "{0} item: {1}\n".format(prefix, str(item)) return s def print_tree(self, prefix=""): print(self.handle_string(prefix=prefix), end='') def __getitem__(self, handle): return self._items[handle] def __setitem__(self, key, value): if isinstance(key, (list, tuple)): key = list(key) elif key is not None: key = [key] self.set_item(key, value) def __delitem__(self, handle): try: self.remove_item_by_handle(handle) except (KeyError, TypeError): # maybe we are asking to remove an item explicitly self.remove_item(handle) def __iter__(self): return self.values().__iter__() def contains_item(self, item): hashable_item = self._make_hashable(item) return hashable_item in self._handles def contains_handle(self, handle): try: return handle in self._items except TypeError: return False def __contains__(self, handle): return self.contains_handle(handle) or self.contains_item(handle) def __len__(self): return len(self._hash_lookup) def __str__(self): return self.handle_string()
def load_files(self, fnames, index_handle=True, file_type=None, _add_ref=False, **kwargs): """Load files, and add them to the bucket Initialize obj before it's put into the list, whatever is returned is what gets stored, returning None means object init failed, do not add to the _objs list Parameters: fnames: a list of file names (can cantain glob patterns) index_handle: ?? file_type: a class that is a subclass of VFile, if given, use this file type, don't use the autodetect mechanism kwargs: passed to file constructor Returns: A list of VFile instances. The length may not be the same as the length of fnames, and the order may not be the same in order to accomidate globs and file grouping. """ orig_fnames = fnames if not isinstance(fnames, (list, tuple)): fnames = [fnames] file_lst = [] # glob and convert to absolute paths globbed_fnames = [] for fname in fnames: slglob = slice_globbed_filenames(fname) if isinstance(slglob, string_types): slglob = [slglob] globbed_fnames += slglob # print(">>", fname) # print("==", globbed_fnames) # expanded_fname = os.path.expanduser(os.path.expandvars(fname)) # absfname = os.path.abspath(expanded_fname) # if '*' in absfname or '?' in absfname: # globbed_fnames += glob(absfname) # else: # globbed_fnames += [absfname] # Is it necessary to recall abspath here? We did it before # the glob to make sure it didn't start with a '.' since that # tells glob not to fill wildcards fnames = globbed_fnames # detect file types types_detected = OrderedDict() for i, fname in enumerate(fnames): _ftype = None if file_type is None: _ftype = VFile.detect_type(fname) else: _ftype = file_type if not _ftype: raise RuntimeError("Can't determine type " "for {0}".format(fname)) value = (fname, i) try: types_detected[_ftype].append(value) except KeyError: types_detected[_ftype] = [value] # see if the file's already been loaded, or load it, and add it # to the bucket and all that good stuff file_lst = [] for ftype, vals in types_detected.items(): names = [v[0] for v in vals] # group all file names of a given type groups = ftype.group_fnames(names) # iterate all the groups and add them for group in groups: f = None handle_name = ftype.collective_name(group) try: f = self[handle_name] except KeyError: try: f = ftype(group, parent_bucket=self, **kwargs) f.handle_name = handle_name except IOError as e: s = " IOError on file: {0}\n".format(handle_name) s += " File Type: {0}\n".format(handle_name) s += " {0}".format(str(e)) logger.warn(s) except ValueError as e: # ... why am i explicitly catching ValueErrors? # i'm probably breaking something by re-raising # this exception, but i didn't document what :( s = " ValueError on file load: {0}\n".format(handle_name) s += " File Type: {0}\n".format(handle_name) s += " {0}".format(str(e)) logger.warn(s) # re-raise the last expection raise self.set_item([handle_name], f, index_handle=index_handle, _add_ref=_add_ref) file_lst.append(f) if len(file_lst) == 0: logger.warn("No files loaded for '{0}', is the path " "correct?".format(orig_fnames)) return file_lst
def save_fields(cls, fname, flds, **kwargs): """ save some fields using the format given by the class """ # FIXME: this is only good for writing cartesian rectilnear flds # FIXME: axes are renamed if flds[0] is 1D or 2D assert len(flds) > 0 fname = os.path.expanduser(os.path.expandvars(fname)) if isinstance(flds, list): if isinstance(flds[0], (list, tuple)): flds = OrderedDict(flds) else: flds = OrderedDict([(fld.name, fld) for fld in flds]) # FIXME: all coordinates are saved as non-uniform, the proper # way to do this is to have let coordinate format its own # hdf5 / xdmf / numpy binary output fld0 = next(iter(flds.values())) clist = fld0.crds.get_clist(full_arrays=True) crd_arrs = [np.array([0.0])] * 3 crd_names = ["x", "y", "z"] for i, c in enumerate(clist): crd_arrs[i] = c[1] crd_shape = [len(arr) for arr in crd_arrs] time = fld0.time # write arrays to the hdf5 file with h5py.File(fname, 'w') as f: for axis_name, arr in zip(crd_names, crd_arrs): loc = cls._CRDS_GROUP + '/' + axis_name f[loc] = arr for name, fld in flds.items(): loc = cls._FLD_GROUPS[fld.center.lower()] + '/' + name # xdmf files use kji ordering f[loc] = fld.data.T # big bad openggcm time_str hack to put basetime into hdf5 file for fld in flds.values(): try: tfmt = "%Y:%m:%d:%H:%M:%S.%f" sec_td = viscid.as_timedelta64(fld.time, 's') dtime = viscid.as_datetime(fld.basetime + sec_td).strftime(tfmt) epoch = viscid.readers.openggcm.GGCM_EPOCH ts = viscid.as_timedelta(fld.basetime - epoch).total_seconds() ts += fld.time timestr = "time= {0} {1:.16e} {2} 300c".format(fld.time, ts, dtime) f.create_group('openggcm') f['openggcm'].attrs['time_str'] = np.string_(timestr) break except viscid.NoBasetimeError: pass # now write an xdmf file xdmf_fname = os.path.splitext(fname)[0] + ".xdmf" relh5fname = "./" + os.path.basename(fname) with open(xdmf_fname, 'w') as f: xloc = cls._CRDS_GROUP + '/' + crd_names[0] yloc = cls._CRDS_GROUP + '/' + crd_names[1] zloc = cls._CRDS_GROUP + '/' + crd_names[2] dim_str = " ".join([str(l) for l in crd_shape][::-1]) f.write(cls._XDMF_TEMPLATE_BEGIN.format(time=time)) s = cls._XDMF_TEMPLATE_RECTILINEAR_GRID_BEGIN.format( grid_name="vgrid", crd_dims=dim_str, h5fname=relh5fname, xdim=crd_shape[0], ydim=crd_shape[1], zdim=crd_shape[2], xloc=xloc, yloc=yloc, zloc=zloc) f.write(s) for fld in flds.values(): _crd_system = viscid.as_crd_system(fld, None) if _crd_system: f.write(cls._XDMF_INFO_TEMPLATE.format(name="crd_system", value=_crd_system)) break for name, fld in flds.items(): fld = fld.as_flat().T dt = fld.dtype.name.rstrip("0123456789").title() precision = fld.dtype.itemsize fld_dim_str = " ".join([str(l) for l in fld.shape]) loc = cls._FLD_GROUPS[fld.center.lower()] + '/' + name s = cls._XDMF_TEMPLATE_ATTRIBUTE.format( fld_name=name, fld_type=fld.fldtype, center=fld.center.title(), dtype=dt, precision=precision, fld_dims=fld_dim_str, h5fname=relh5fname, fld_loc=loc) f.write(s) f.write(cls._XDMF_TEMPLATE_GRID_END) f.write(cls._XDMF_TEMPLATE_END)
def load_files(self, fnames, index_handle=True, file_type=None, prefer=None, force_reload=False, _add_ref=False, **kwargs): """Load files, and add them to the bucket Initialize obj before it's put into the list, whatever is returned is what gets stored, returning None means object init failed, do not add to the _objs list Parameters: fnames: a list of file names (can cantain glob patterns) index_handle: ?? file_type: a class that is a subclass of VFile, if given, use this file type, don't use the autodetect mechanism kwargs: passed to file constructor Returns: A list of VFile instances. The length may not be the same as the length of fnames, and the order may not be the same in order to accomidate globs and file grouping. """ orig_fnames = fnames if not isinstance(fnames, (list, tuple)): fnames = [fnames] file_lst = [] # glob and convert to absolute paths globbed_fnames = [] for fname in fnames: slglob = slice_globbed_filenames(fname) if isinstance(slglob, string_types): slglob = [slglob] globbed_fnames += slglob # print(">>", fname) # print("==", globbed_fnames) # expanded_fname = os.path.expanduser(os.path.expandvars(fname)) # absfname = os.path.abspath(expanded_fname) # if '*' in absfname or '?' in absfname: # globbed_fnames += glob(absfname) # else: # globbed_fnames += [absfname] # Is it necessary to recall abspath here? We did it before # the glob to make sure it didn't start with a '.' since that # tells glob not to fill wildcards fnames = globbed_fnames # detect file types types_detected = OrderedDict() for i, fname in enumerate(fnames): _ftype = None if file_type is None: _ftype = VFile.detect_type(fname, prefer=prefer) elif isinstance(file_type, string_types): _ftype = VFile.resolve_type(file_type) else: _ftype = file_type if not _ftype: raise RuntimeError("Can't determine type " "for {0}".format(fname)) value = (fname, i) try: types_detected[_ftype].append(value) except KeyError: types_detected[_ftype] = [value] # see if the file's already been loaded, or load it, and add it # to the bucket and all that good stuff file_lst = [] for ftype, vals in types_detected.items(): names = [v[0] for v in vals] # group all file names of a given type groups = ftype.group_fnames(names) # iterate all the groups and add them for group in groups: f = None handle_name = ftype.collective_name(group) try: f = self[handle_name] if force_reload: f.reload() except KeyError: try: f = ftype(group, parent_bucket=self, **kwargs) f.handle_name = handle_name except IOError as e: s = " IOError on file: {0}\n".format(handle_name) s += " File Type: {0}\n".format( handle_name) s += " {0}".format(str(e)) logger.warning(s) except ValueError as e: # ... why am i explicitly catching ValueErrors? # i'm probably breaking something by re-raising # this exception, but i didn't document what :( s = " ValueError on file load: {0}\n".format( handle_name) s += " File Type: {0}\n".format( handle_name) s += " {0}".format(str(e)) logger.warning(s) # re-raise the last expection raise self.set_item([handle_name], f, index_handle=index_handle, _add_ref=_add_ref) file_lst.append(f) if len(file_lst) == 0: logger.warning("No files loaded for '{0}', is the path " "correct?".format(orig_fnames)) return file_lst