class Group(Node): def __init__(self, node, path, nxclass="NXCollection", attrs=None, **kw): if attrs is None: attrs = {} Node.__init__(self, parent_node=node, path=path, **kw) if path.startswith("/"): # absolute path self.path = path else: # relative self.path = os.path.join(node.path, path) self.os_path = node.os_path preexisting = os.path.exists(os.path.join(self.os_path, self.path.lstrip("/"))) if not preexisting: os.mkdir(os.path.join(self.os_path, self.path.lstrip("/"))) attrs['NX_class'] = nxclass.encode('UTF-8') self.attrs = JSONBackedDict(os.path.join(self.os_path, self.path.lstrip("/"), self._attrs_filename), self.json_encoder) self.attrs.update(attrs) self.attrs._write() def __repr__(self): return "<HDZIP group \"" + self.path + "\">"
def __init__(self, parent_node, path="/", nxclass="NXCollection", attrs={}): self.root_node = self if parent_node is None else parent_node.root_node if path.startswith("/"): # absolute path self.path = path else: # relative self.path = os.path.join(parent_node.path, path) self.os_path = parent_node.os_path preexisting = os.path.exists( os.path.join(self.os_path, self.path.lstrip("/"))) if not preexisting: os.mkdir(os.path.join(self.os_path, self.path.lstrip("/"))) attrs['NX_class'] = nxclass.encode('UTF-8') #print "making things: ", os.path.join(self.os_path, self.path.lstrip("/")) self.attrs = JSONBackedDict( os.path.join(self.os_path, self.path.lstrip("/"), self._attrs_filename)) self.attrs.update(attrs) self.fields = JSONBackedDict( os.path.join(self.os_path, self.path.lstrip("/"), self._fields_filename))
class File(Node): def __init__(self, filename, mode="r", timestamp=None, creator=None, compression=zipfile.ZIP_DEFLATED, attrs={}, os_path=None, **kw): if os_path is None: fn = tempfile.mkdtemp() self.os_path = fn else: self.os_path = os_path Node.__init__(self, parent_node=None, path="/", **kw) self.attrs = JSONBackedDict(os.path.join(self.os_path, self._attrs_filename), self.json_encoder) self.filename = filename self.mode = mode self.compression = compression file_exists = os.path.exists(filename) if file_exists and (mode == "a" or mode == "r"): zipfile.ZipFile(filename).extractall(self.os_path) if mode == "a" or mode == "w": #os.mkdir(os.path.join(self.os_path, self.path.lstrip("/"))) if timestamp is None: timestr = iso8601.now() else: # If given a time string, check that it is valid try: timestamp = iso8601.parse_date(timestamp) except TypeError: pass timestr = iso8601.format_date(timestamp) attrs['NX_class'] = 'NXroot' attrs['file_name'] = filename attrs['file_time'] = timestr attrs['NeXus_version'] = __version__ if creator is not None: attrs['creator'] = creator self.attrs.update(attrs) self.attrs._write() self.flush() def flush(self): self.writezip() def __repr__(self): return "<HDZIP file \"%s\" (mode %s)>" % (self.filename, self.mode) def close(self): # there seems to be only one read-only mode if os.path.exists(self.os_path): if self.mode != "r": self.writezip() shutil.rmtree(self.os_path) def writezip(self): make_zipfile(self.filename, os.path.join(self.os_path, self.path.lstrip("/")), self.compression)
def __init__(self, parent_node, path="/", nxclass="NXCollection", attrs={}): self.root_node = self if parent_node is None else parent_node.root_node if path.startswith("/"): # absolute path self.path = path else: # relative self.path = os.path.join(parent_node.path, path) self.os_path = parent_node.os_path preexisting = os.path.exists(os.path.join(self.os_path, self.path.lstrip("/"))) if not preexisting: os.mkdir(os.path.join(self.os_path, self.path.lstrip("/"))) attrs['NX_class'] = nxclass.encode('UTF-8') #print "making things: ", os.path.join(self.os_path, self.path.lstrip("/")) self.attrs = JSONBackedDict(os.path.join(self.os_path, self.path.lstrip("/"), self._attrs_filename)) self.attrs.update(attrs) self.fields = JSONBackedDict(os.path.join(self.os_path, self.path.lstrip("/"), self._fields_filename))
def makeAttrs(self): if self.root.readonly: return json.loads( self.root.open(self.path + self._attrs_suffix, "r").read()) else: return JSONBackedDict( os.path.join(self.os_path, self.path.lstrip("/") + self._attrs_suffix))
class Node(object): _attrs_filename = ".attrs" _fields_filename = ".fields" def __init__(self, parent_node, path="/", nxclass="NXCollection", attrs={}): self.root_node = self if parent_node is None else parent_node.root_node if path.startswith("/"): # absolute path self.path = path else: # relative self.path = os.path.join(parent_node.path, path) self.os_path = parent_node.os_path preexisting = os.path.exists(os.path.join(self.os_path, self.path.lstrip("/"))) if not preexisting: os.mkdir(os.path.join(self.os_path, self.path.lstrip("/"))) attrs['NX_class'] = nxclass.encode('UTF-8') #print "making things: ", os.path.join(self.os_path, self.path.lstrip("/")) self.attrs = JSONBackedDict(os.path.join(self.os_path, self.path.lstrip("/"), self._attrs_filename)) self.attrs.update(attrs) self.fields = JSONBackedDict(os.path.join(self.os_path, self.path.lstrip("/"), self._fields_filename)) @property def parent(self): return self.root_node[os.path.dirname(self.name)] @property def groups(self): groupnames = [x for x in os.listdir(os.path.join(self.os_path, self.path.lstrip("/"))) if os.path.isdir(os.path.join(self.os_path, self.path.lstrip("/"), x))] return dict([(gn, Group(self, gn)) for gn in groupnames]) @property def name(self): return self.path def keys(self): thisdir = os.path.join(self.os_path, self.path.lstrip("/")) subgroups = [x for x in os.listdir(thisdir) if os.path.isdir(os.path.join(thisdir, x))] self.fields._read() # need to get most up to date value from disk field_keys = self.fields.keys() subgroups.extend(field_keys) return subgroups def items(self): keys = self.keys() return [(k, self[k]) for k in keys] def __contains__(self, key): return (key in self.keys()) def __delitem__(self, path): if not path.startswith("/"): path = os.path.join(self.path, path) os_path = os.path.join(self.os_path, path.lstrip("/")) parent_path = os.path.dirname(path) parent_os_path = os.path.join(self.os_path, parent_path.lstrip("/")) field_name = os.path.basename(path) print "deleting:", field_name, parent_path, os_path, parent_os_path if os.path.exists(os_path) and os.path.isdir(os_path): # it's a group: remove the whole directory shutil.rmtree(os_path) elif os.path.exists(parent_os_path) and os.path.isdir(parent_os_path): parent_group = Group(self, parent_path) print "deleting field: ", parent_group.fields if field_name in parent_group.fields: del parent_group.fields[field_name] if os.path.exists(os.path.join(parent_os_path, field_name + ".dat")): os.remove(os.path.join(parent_os_path, field_name + ".dat")) else: raise KeyError(field_name) else: raise KeyError(field_name) def __getitem__(self, path): """ get an item based only on its path. Can assume that next-to-last segment is a group (dataset is lowest level) """ if path.startswith("/"): # absolute path full_path = path else: # relative full_path = os.path.join(self.path, path) os_path = os.path.join(self.os_path, full_path.lstrip("/")) parent_path = os.path.dirname(full_path) parent_os_path = os.path.join(self.os_path, parent_path.lstrip("/")) field_name = os.path.basename(full_path) if os.path.exists(os_path) and os.path.isdir(os_path): return Group(self, full_path) elif os.path.exists(parent_os_path): parent_group = Group(self, parent_path) if field_name in parent_group.fields: if 'target' in parent_group.fields[field_name]: return FieldLink(self, path) else: return Field(self, path) else: raise KeyError(path) else: # the item doesn't exist raise KeyError(path) def add_field(self, path, **kw): Field(self, path, **kw) def add_group(self, path, nxclass, attrs={}): Group(self, path, nxclass, attrs)
class Node(object): _attrs_filename = ".attrs" _fields_filename = ".fields" def __init__(self, parent_node, path="/", nxclass="NXCollection", attrs={}): self.root_node = self if parent_node is None else parent_node.root_node if path.startswith("/"): # absolute path self.path = path else: # relative self.path = os.path.join(parent_node.path, path) self.os_path = parent_node.os_path preexisting = os.path.exists( os.path.join(self.os_path, self.path.lstrip("/"))) if not preexisting: os.mkdir(os.path.join(self.os_path, self.path.lstrip("/"))) attrs['NX_class'] = nxclass.encode('UTF-8') #print "making things: ", os.path.join(self.os_path, self.path.lstrip("/")) self.attrs = JSONBackedDict( os.path.join(self.os_path, self.path.lstrip("/"), self._attrs_filename)) self.attrs.update(attrs) self.fields = JSONBackedDict( os.path.join(self.os_path, self.path.lstrip("/"), self._fields_filename)) @property def parent(self): return self.root_node[os.path.dirname(self.name)] @property def groups(self): groupnames = [ x for x in os.listdir( os.path.join(self.os_path, self.path.lstrip("/"))) if os.path.isdir( os.path.join(self.os_path, self.path.lstrip("/"), x)) ] return dict([(gn, Group(self, gn)) for gn in groupnames]) @property def name(self): return self.path def keys(self): thisdir = os.path.join(self.os_path, self.path.lstrip("/")) subgroups = [ x for x in os.listdir(thisdir) if os.path.isdir(os.path.join(thisdir, x)) ] self.fields._read() # need to get most up to date value from disk field_keys = self.fields.keys() subgroups.extend(field_keys) return subgroups def items(self): keys = self.keys() return [(k, self[k]) for k in keys] def __contains__(self, key): return (key in self.keys()) def __delitem__(self, path): if not path.startswith("/"): path = os.path.join(self.path, path) os_path = os.path.join(self.os_path, path.lstrip("/")) parent_path = os.path.dirname(path) parent_os_path = os.path.join(self.os_path, parent_path.lstrip("/")) field_name = os.path.basename(path) print "deleting:", field_name, parent_path, os_path, parent_os_path if os.path.exists(os_path) and os.path.isdir(os_path): # it's a group: remove the whole directory shutil.rmtree(os_path) elif os.path.exists(parent_os_path) and os.path.isdir(parent_os_path): parent_group = Group(self, parent_path) print "deleting field: ", parent_group.fields if field_name in parent_group.fields: del parent_group.fields[field_name] if os.path.exists( os.path.join(parent_os_path, field_name + ".dat")): os.remove(os.path.join(parent_os_path, field_name + ".dat")) else: raise KeyError(field_name) else: raise KeyError(field_name) def __getitem__(self, path): """ get an item based only on its path. Can assume that next-to-last segment is a group (dataset is lowest level) """ if path.startswith("/"): # absolute path full_path = path else: # relative full_path = os.path.join(self.path, path) os_path = os.path.join(self.os_path, full_path.lstrip("/")) parent_path = os.path.dirname(full_path) parent_os_path = os.path.join(self.os_path, parent_path.lstrip("/")) field_name = os.path.basename(full_path) if os.path.exists(os_path) and os.path.isdir(os_path): return Group(self, full_path) elif os.path.exists(parent_os_path): parent_group = Group(self, parent_path) if field_name in parent_group.fields: if 'target' in parent_group.fields[field_name]: return FieldLink(self, path) else: return Field(self, path) else: raise KeyError(path) else: # the item doesn't exist raise KeyError(path) def add_field(self, path, **kw): Field(self, path, **kw) def add_group(self, path, nxclass, attrs={}): Group(self, path, nxclass, attrs)
def __init__(self, node, path, **kw): """ Create a data object. Returns the data set created, or None if the data is empty. :Parameters: *node* : File object Handle to a File-like object. This could be a file or a group. *path* : string Path to the data. This could be a full path from the root of the file, or it can be relative to a group. Path components are separated by '/'. *data* : array or string If the data is known in advance, then the value can be given on creation. Otherwise, use *shape* to give the initial storage size and *maxshape* to give the maximum size. *units* : string Units to display with data. Required for numeric data. *label* : string Axis label if data is numeric. Default for field dataset_name is "Dataset name (units)". *attrs* : dict Additional attributes to be added to the dataset. :Storage options: *dtype* : numpy.dtype Specify the storage type for the data. The set of datatypes is limited only by the HDF-5 format, and its h5py interface. Usually it will be 'int32' or 'float32', though others are possible. Data will default to *data.dtype* if *data* is specified, otherwise it will default to 'float32'. *shape* : [int, ...] Specify the initial shape of the storage and fill it with zeros. Defaults to [1, ...], or to the shape of the data if *data* is specified. *maxshape* : [int, ...] Maximum size for each dimension in the dataset. If any dimension is None, then the dataset is resizable in that dimension. For a 2-D detector of size (Nx,Ny) with Nt time of flight channels use *maxshape=[Nx,Ny,Nt]*. If the data is to be a series of measurements, then add an additional empty dimension at the front, giving *maxshape=[None,Nx,Ny,Nt]*. If *maxshape* is not provided, then use *shape*. *chunks* : [int, ...] Storage block size on disk, which is also the basic compression size. By default *chunks* is set from maxshape, with the first unspecified dimension set such that the chunk size is greater than nexus.CHUNK_SIZE. :func:`make_chunks` is used to determine the default value. *compression* : 'none|gzip|szip|lzf' or int Dataset compression style. If not specified, then compression defaults to 'szip' for large datasets, otherwise it defaults to 'none'. Datasets are considered large if each frame in maxshape is bigger than CHUNK_SIZE. Eventmode data, with its small frame size but large number of frames, will need to set compression explicitly. If compression is an integer, then use gzip compression with that compression level. *compression_opts* : ('ec|nn', int) szip compression options. *shuffle* : boolean Reorder the bytes before applying 'gzip' or 'hzf' compression. *fletcher32* : boolean Enable error detection of the dataset. :Returns: *dataset* : file-backed data object Reference to the created dataset. """ self.root_node = node.root_node self.os_path = node.os_path if not path.startswith("/"): # relative path: path = os.path.join(node.path, path) self.path = path preexisting = os.path.exists(os.path.join(self.os_path, self.path.lstrip("/"))) self.attrs_path = self.path + self._attrs_suffix self.attrs = JSONBackedDict(os.path.join(self.os_path, self.attrs_path.lstrip("/"))) if preexisting: pass else: data = kw.pop('data', numpy.array([])) attrs = kw.pop('attrs', {}) attrs.setdefault('description', kw.setdefault('description', None)) attrs.setdefault('dtype', kw.setdefault('dtype', None)) attrs.setdefault('units', kw.setdefault('units', None)) attrs.setdefault('label', kw.setdefault('label', None)) attrs.setdefault('binary', kw.setdefault('binary', False)) attrs['byteorder'] = sys.byteorder self.attrs.encoder = kw.setdefault('encoder', None) if attrs['dtype'] is None: raise TypeError("dtype missing when creating %s" % (path,)) self.attrs.clear() self.attrs.update(attrs) self.attrs._write() if data is not None: if numpy.isscalar(data): data = [data] data = numpy.asarray(data, dtype=attrs['dtype']) self.value = data
class FieldFile(object): _formats = { 'S': '%s', 'f': '%.8g', 'i': '%d', 'u': '%d', 'b': '%d'} _attrs_suffix = ".attrs" def __init__(self, node, path, **kw): """ Create a data object. Returns the data set created, or None if the data is empty. :Parameters: *node* : File object Handle to a File-like object. This could be a file or a group. *path* : string Path to the data. This could be a full path from the root of the file, or it can be relative to a group. Path components are separated by '/'. *data* : array or string If the data is known in advance, then the value can be given on creation. Otherwise, use *shape* to give the initial storage size and *maxshape* to give the maximum size. *units* : string Units to display with data. Required for numeric data. *label* : string Axis label if data is numeric. Default for field dataset_name is "Dataset name (units)". *attrs* : dict Additional attributes to be added to the dataset. :Storage options: *dtype* : numpy.dtype Specify the storage type for the data. The set of datatypes is limited only by the HDF-5 format, and its h5py interface. Usually it will be 'int32' or 'float32', though others are possible. Data will default to *data.dtype* if *data* is specified, otherwise it will default to 'float32'. *shape* : [int, ...] Specify the initial shape of the storage and fill it with zeros. Defaults to [1, ...], or to the shape of the data if *data* is specified. *maxshape* : [int, ...] Maximum size for each dimension in the dataset. If any dimension is None, then the dataset is resizable in that dimension. For a 2-D detector of size (Nx,Ny) with Nt time of flight channels use *maxshape=[Nx,Ny,Nt]*. If the data is to be a series of measurements, then add an additional empty dimension at the front, giving *maxshape=[None,Nx,Ny,Nt]*. If *maxshape* is not provided, then use *shape*. *chunks* : [int, ...] Storage block size on disk, which is also the basic compression size. By default *chunks* is set from maxshape, with the first unspecified dimension set such that the chunk size is greater than nexus.CHUNK_SIZE. :func:`make_chunks` is used to determine the default value. *compression* : 'none|gzip|szip|lzf' or int Dataset compression style. If not specified, then compression defaults to 'szip' for large datasets, otherwise it defaults to 'none'. Datasets are considered large if each frame in maxshape is bigger than CHUNK_SIZE. Eventmode data, with its small frame size but large number of frames, will need to set compression explicitly. If compression is an integer, then use gzip compression with that compression level. *compression_opts* : ('ec|nn', int) szip compression options. *shuffle* : boolean Reorder the bytes before applying 'gzip' or 'hzf' compression. *fletcher32* : boolean Enable error detection of the dataset. :Returns: *dataset* : file-backed data object Reference to the created dataset. """ self.root_node = node.root_node self.os_path = node.os_path if not path.startswith("/"): # relative path: path = os.path.join(node.path, path) self.path = path preexisting = os.path.exists(os.path.join(self.os_path, self.path.lstrip("/"))) self.attrs_path = self.path + self._attrs_suffix self.attrs = JSONBackedDict(os.path.join(self.os_path, self.attrs_path.lstrip("/"))) if preexisting: pass else: data = kw.pop('data', numpy.array([])) attrs = kw.pop('attrs', {}) attrs.setdefault('description', kw.setdefault('description', None)) attrs.setdefault('dtype', kw.setdefault('dtype', None)) attrs.setdefault('units', kw.setdefault('units', None)) attrs.setdefault('label', kw.setdefault('label', None)) attrs.setdefault('binary', kw.setdefault('binary', False)) attrs['byteorder'] = sys.byteorder self.attrs.encoder = kw.setdefault('encoder', None) if attrs['dtype'] is None: raise TypeError("dtype missing when creating %s" % (path,)) self.attrs.clear() self.attrs.update(attrs) self.attrs._write() if data is not None: if numpy.isscalar(data): data = [data] data = numpy.asarray(data, dtype=attrs['dtype']) self.value = data def __repr__(self): return "<HDZIP field \"%s\" %s \"%s\">" % (self.name, str(self.attrs['shape']), self.attrs['dtype']) def __getitem__(self, slice_def): return self.value.__getitem__(slice_def) def __setitem__(self, slice_def, newvalue): intermediate = self.value intermediate[slice_def] = newvalue self.value = intermediate # promote a few attrs items to python object attributes: @property def shape(self): return self.attrs.get('shape', None) @property def dtype(self): return self.attrs.get('dtype', None) @property def name(self): return self.path @property def parent(self): return self.root_node[os.path.dirname(self.name)] @property def value(self): attrs = self.attrs target = os.path.join(self.os_path, self.path.lstrip("/")) with builtin_open(target, 'rb') as infile: if attrs.get('binary', False) == True: d = numpy.fromfile(infile, dtype=attrs['format']) else: if os.path.getsize(target) == 1: # empty entry: only contains \n # this is only possible with empty string being written. d = numpy.array([''], dtype=numpy.dtype(str(attrs['format']))) else: d = numpy.loadtxt(infile, dtype=numpy.dtype(str(attrs['format']))) if 'shape' in attrs: d = d.reshape(attrs['shape']) return d @value.setter def value(self, data): attrs = self.attrs if hasattr(data, 'shape'): attrs['shape'] = data.shape elif hasattr(data, '__len__'): attrs['shape'] = [data.__len__()] if hasattr(data, 'dtype'): formatstr = '<' if attrs['byteorder'] == 'little' else '>' formatstr += data.dtype.char formatstr += "%d" % (data.dtype.itemsize,) attrs['format'] = formatstr attrs['dtype'] = data.dtype.name self._write_data(data, 'w') def _write_data(self, data, mode='w'): target = os.path.join(self.os_path, self.path.lstrip("/")) # enforce binary if dims > 2: no way to write text file like this! if data.ndim > 2: self.attrs['binary'] = True if self.attrs.get('binary', False) == True: with builtin_open(target, mode + "b") as outfile: data.tofile(outfile) else: with builtin_open(target, mode) as outfile: if data.dtype.kind == 'S': kind = data.dtype.kind # escape carriage returns and tabs data = numpy.char.replace(data, '\t', r'\t').astype(kind) data = numpy.char.replace(data, '\r', r'\r').astype(kind) data = numpy.char.replace(data, '\n', r'\n').astype(kind) numpy.savetxt(outfile, data, delimiter='\t', fmt=self._formats[data.dtype.kind]) def append(self, data, coerce_dtype=True): # add to the data... # can only append along the first axis, e.g. if shape is (3,4) # it becomes (4,4), if it is (3,4,5) it becomes (4,4,5) attrs = self.attrs if (list(data.shape) != list(attrs.get('shape', [])[1:])): raise Exception("invalid shape to append: %r can't append to %r for %s (%r)" % (data.shape, attrs.get('shape', "No shape"), self.name, data)) if data.dtype != attrs['dtype']: if coerce_dtype == False: raise Exception("dtypes do not match, and coerce is set to False") else: data = data.astype(attrs['dtype']) new_shape = list(attrs['shape']) new_shape[0] += 1 attrs['shape'] = new_shape self._write_data(data, mode='a') def extend(self, data, coerce_dtype=True): attrs = self.attrs if (list(data.shape[1:]) != list(attrs.get('shape', [])[1:])): raise Exception("invalid shape to append") #if data.dtype != attrs['dtype']: # if coerce_dtype == False: # raise Exception("dtypes do not match, and coerce is set to False") # else: # data = data.astype(attrs['dtype']) new_shape = list(attrs['shape']) new_shape[0] += data.shape[0] attrs['shape'] = new_shape self._write_data(data, "a")