def add_alias_field(self, alias, field, units=None, force_add=True): r""" Add a field as an alias to another field. Parameters ---------- alias : string Alias name. field : string The field to be aliased. units : optional, string Units in which the field will be returned. force_add : optional, bool If True, add field even if it already exists and warn the user and raise an exception if dependencies do not exist. If False, silently do nothing in both instances. Default: True. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> # "Mvir" exists on disk >>> a.add_alias_field("mass", "Mvir", units="Msun") >>> print (a["mass"]) """ if alias in self.field_info: if force_add: ftype = self.field_info[alias].get("type", "on-disk") if ftype in ["alias", "derived"]: fl = self.derived_field_list else: fl = self.field_list mylog.warn( ("Overriding field \"%s\" that already " + "exists as %s field.") % (alias, ftype)) fl.pop(fl.index(alias)) else: return if field not in self.field_info: if force_add: raise ArborFieldDependencyNotFound( field, alias, arbor=self) else: return if units is None: units = self.field_info[field].get("units") self.derived_field_list.append(alias) self.field_info[alias] = \ {"type": "alias", "units": units, "dependencies": [field]} if "aliases" not in self.field_info[field]: self.field_info[field]["aliases"] = [] self.field_info[field]["aliases"].append(alias)
def __init__(self, filename): if not os.path.exists(filename): mylog.warn(("Cannot find data file: %s. " + "Will not be able to load field data.") % filename) self.filename = filename self.fh = None
def get_fields(self, data_object, fields=None): if fields is None or len(fields) == 0: return fi = self.arbor.field_info fields_to_get = [] for field in fields: if field not in data_object._root_field_data: if fi[field].get("type") == "analysis": mylog.warn( ("Accessing analysis field \"%s\" as root field. " + "Any changes made will not be reflected here.") % field) fields_to_get.append(field) if not fields_to_get: return if fields_to_get: self.arbor._node_io_loop( self.arbor._node_io.get_fields, pbar="Getting root fields", fields=fields_to_get, root_only=True) field_data = {} for field in fields_to_get: units = fi[field].get("units", "") field_data[field] = np.empty(self.arbor.trees.size) if units: field_data[field] = \ self.arbor.arr(field_data[field], units) for i in range(self.arbor.trees.size): if fi[field].get("type") == "analysis": field_data[field][i] = \ self.arbor.trees[i]._tree_field_data[field][0] else: field_data[field][i] = \ self.arbor.trees[i]._root_field_data[field] data_object._root_field_data.update(field_data)
def add_derived_field(self, name, function, units=None, description=None, force_add=True): r""" Add a field that is a function of other fields. Parameters ---------- name : string Field name. function : callable The function to be called to generate the field. This function should take two arguments, the arbor and the data structure containing the dependent fields. See below for an example. units : optional, string The units in which the field will be returned. description : optional, string A short description of the field. force_add : optional, bool If True, add field even if it already exists and warn the user and raise an exception if dependencies do not exist. If False, silently do nothing in both instances. Default: True. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> def _redshift(arbor, data): ... return 1. / data["scale"] - 1 ... >>> a.add_derived_field("redshift", _redshift) >>> print (a["redshift"]) """ if name in self.field_info: if force_add: ftype = self.field_info[name].get("type", "on-disk") if ftype in ["alias", "derived"]: fl = self.derived_field_list else: fl = self.field_list mylog.warn(("Overriding field \"%s\" that already " + "exists as %s field.") % (name, ftype)) fl.pop(fl.index(name)) else: return if units is None: units = "" fc = FakeFieldContainer(self, name=name) try: rv = function(fc) except ArborFieldDependencyNotFound as e: if force_add: raise e else: return rv.convert_to_units(units) self.derived_field_list.append(name) self.field_info[name] = \ {"type": "derived", "function": function, "units": units, "description": description, "dependencies": list(fc.keys())}
def add_derived_field(self, name, function, units=None, dtype=None, description=None, vector_field=False, force_add=True): r""" Add a field that is a function of other fields. Parameters ---------- name : string Field name. function : callable The function to be called to generate the field. This function should take two arguments, the arbor and the data structure containing the dependent fields. See below for an example. units : optional, string The units in which the field will be returned. dtype : optional, type The data type of the field array. If none, use the default type set by Arbor._default_dtype. description : optional, string A short description of the field. vector_field: optional, bool If True, field is an xyz vector. Default: False. force_add : optional, bool If True, add field even if it already exists and warn the user and raise an exception if dependencies do not exist. If False, silently do nothing in both instances. Default: True. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> def _redshift(field, data): ... return 1. / data["scale"] - 1 ... >>> a.add_derived_field("redshift", _redshift) >>> print (a["redshift"]) """ if name in self.field_info: if force_add: ftype = self.field_info[name].get("type", "on-disk") if ftype in ["alias", "derived"]: fl = self.derived_field_list else: fl = self.field_list mylog.warn( ("Overriding field \"%s\" that already " + "exists as %s field.") % (name, ftype)) fl.pop(fl.index(name)) else: return if units is None: units = "" if dtype is None: dtype = self._default_dtype info = {"name": name, "type": "derived", "function": function, "units": units, "dtype": dtype, "vector_field": vector_field, "description": description} fc = FakeFieldContainer(self, name=name) try: rv = function(info, fc) except TypeError as e: raise RuntimeError( """ Field function syntax in ytree has changed. Field functions must now take two arguments, as in the following: def my_field(field, data): return data['mass'] Check the TypeError exception above for more details. """) raise e except ArborFieldDependencyNotFound as e: if force_add: raise e else: return rv.convert_to_units(units) info["dependencies"] = list(fc.keys()) self.derived_field_list.append(name) self.field_info[name] = info
def _parse_parameter_file(self): f = h5py.File(self.parameter_filename, mode='r') # Is the file a collection of virtual data sets # pointing to multiple data files? virtual = self._virtual_dataset if virtual: fgroup = f.get('File0') if fgroup is None: raise ArborDataFileEmpty(self.filename) else: fgroup = f if 'halos' in fgroup['Forests']: # array of structs layout mylog.warn( "This dataset was written in array of structs format. " "Field access will be significantly slower than struct " "of arrays format.") self._aos = True ftypes = fgroup['Forests/halos'].dtype my_fi = dict((ftypes.names[i], {'dtype': ftypes[i]}) for i in range(len(ftypes))) else: # struct of arrays layout self._aos = False my_fi = dict((field, {'dtype': data.dtype}) for field, data in fgroup['Forests'].items()) if virtual: aname = _access_names[self.access]['total'] self._size = f.attrs[aname] header = fgroup.attrs['Consistent Trees_metadata'].astype(str) header = header.tolist() f.close() header_fi = parse_ctrees_header( self, header, ntrees_in_file=False) # Do some string manipulation to match the header with # altered names in the hdf5 file. new_fi = {} for field in header_fi: new_field = field # remove ?| characters new_field = re.sub(r'[?|]', '', new_field) # replace []/() characters with _ new_field = re.sub(r'[\[\]\/\(\)]', '_', new_field) # remove leading/trailing underscores new_field = new_field.strip('_') # replace double underscore with single underscore new_field = new_field.replace('__', '_') new_fi[new_field] = header_fi[field].copy() if 'column' in new_fi[new_field]: del new_fi[new_field]['column'] for field in my_fi: my_fi[field].update(new_fi.get(field, {})) self.field_list = list(my_fi.keys()) self.field_info.update(my_fi)