def _plant_trees(self): """ This is where we figure out how many trees there are, create the array to hold them, and instantiate a root tree_node for each tree. """ # open file, count trees ntrees_tot = 0 for lht in self._lhtfiles: ntrees_tot += lht.ntrees self._trees = np.empty(ntrees_tot, dtype=object) pbar = get_pbar("Loading tree roots", ntrees_tot) itot = 0 for ifile, lht in enumerate(self._lhtfiles): ntrees = lht.ntrees root_uids = lht.all_uids[lht.nhalos_before_tree] for i in range(ntrees): # get a uid (unique id) from file or assign one my_node = TreeNode(root_uids[i], arbor=self, root=True) # assign any helpful attributes, such as start # index in field arrays, etc. my_node._lht = lht my_node._index_in_lht = i self._trees[itot] = my_node itot += 1 pbar.update(itot) pbar.finish()
def _plant_trees(self): fh = h5py.File(self.filename, "r") ntrees = fh.attrs["total_trees"] uids = fh["data"]["uid"].value.astype(np.int64) self._si = fh["index"]["tree_start_index"].value self._ei = fh["index"]["tree_end_index"].value fh.close() self._trees = np.empty(ntrees, dtype=np.object) for i in range(ntrees): my_node = TreeNode(uids[i], arbor=self, root=True) my_node._ai = i self._trees[i] = my_node
def _grow_tree(self, tree_node, **kwargs): """ Create an array of TreeNodes hanging off the root node and assemble the tree structure. """ # skip this if not a root or if already grown if self.is_grown(tree_node): return self._setup_tree(tree_node, **kwargs) nhalos = tree_node.uids.size nodes = np.empty(nhalos, dtype=np.object) nodes[0] = tree_node for i in range(1, nhalos): nodes[i] = TreeNode(tree_node.uids[i], arbor=self) tree_node._nodes = nodes # Add tree information to nodes uidmap = {} for i, node in enumerate(nodes): node.treeid = i node.root = tree_node uidmap[tree_node.uids[i]] = i # Link ancestor/descendents # Separate loop for trees like lhalotree where descendent # can follow in order for i, node in enumerate(nodes): descid = tree_node.descids[i] if descid != -1: desc = nodes[uidmap[descid]] desc.add_ancestor(node) node.descendent = desc
def _grow_tree(self, tree_node): """ Create an array of TreeNodes hanging off the root node and assemble the tree structure. """ # skip this if not a root or if already grown if self.is_grown(tree_node): return self._setup_tree(tree_node) nhalos = tree_node.uids.size nodes = np.empty(nhalos, dtype=np.object) nodes[0] = tree_node for i in range(1, nhalos): nodes[i] = TreeNode(tree_node.uids[i], arbor=self) tree_node._nodes = nodes uidmap = {} for i, node in enumerate(nodes): node.treeid = i node.root = tree_node descid = tree_node.descids[i] uidmap[tree_node.uids[i]] = i if descid != -1: desc = nodes[uidmap[tree_node.descids[i]]] desc.add_ancestor(node) node.descendent = desc
def _plant_trees(self): fh = h5py.File(self.filename, "r") ntrees = fh.attrs["total_trees"] uids = fh["data"]["uid"].value.astype(np.int64) self._node_io._si = fh["index"]["tree_start_index"].value self._node_io._ei = fh["index"]["tree_end_index"].value fh.close() self._node_io.data_files = \ [YTreeDataFile("%s_%04d%s" % (self._prefix, i, self._suffix)) for i in range(self._node_io._si.size)] self._trees = np.empty(ntrees, dtype=np.object) for i in range(ntrees): my_node = TreeNode(uids[i], arbor=self, root=True) my_node._ai = i self._trees[i] = my_node
def _plant_trees(self): fh = h5py.File(self.filename, "r") uids = fh["data"]["uid"].value.astype(np.int64) descids = fh["data"]["desc_id"].value.astype(np.int64) treeids = fh["data"]["tree_id"].value.astype(np.int64) fh.close() roots = uids[descids == -1] ntrees = roots.size self._trees = np.empty(ntrees, dtype=np.object) for i, root in enumerate(roots): my_node = TreeNode(root, arbor=self, root=True) my_node._fi = np.where(root == treeids)[0] my_node._tree_size = my_node._fi.size self._trees[i] = my_node self._field_cache = {} self._field_cache["uid"] = uids self._field_cache["desc_id"] = descids self._ri = roots
def _plant_trees(self): lkey = len("tree ") + 1 block_size = 32768 data_file = self._node_io.data_file data_file.open() data_file.fh.seek(0, 2) file_size = data_file.fh.tell() pbar = get_pbar("Loading tree roots", file_size) data_file.fh.seek(self._hoffset) self._trees = np.empty(self._ntrees, dtype=np.object) offset = self._hoffset itree = 0 nblocks = np.ceil(float(file_size - self._hoffset) / block_size).astype(np.int64) for ib in range(nblocks): my_block = min(block_size, file_size - offset) if my_block <= 0: break buff = data_file.fh.read(my_block) lihash = -1 for ih in range(buff.count("#")): ihash = buff.find("#", lihash + 1) inl = buff.find("\n", ihash + 1) if inl < 0: buff += data_file.fh.readline() inl = len(buff) uid = int(buff[ihash + lkey:inl]) lihash = ihash my_node = TreeNode(uid, arbor=self, root=True) my_node._si = offset + inl + 1 self._trees[itree] = my_node if itree > 0: self._trees[itree - 1]._ei = offset + ihash - 1 itree += 1 offset = data_file.fh.tell() pbar.update(offset) self._trees[-1]._ei = offset data_file.close() pbar.finish()
def _plant_trees(self): # this can be called once with the list, but fields are # not guaranteed to be returned in order. fields = \ [self.field_info.resolve_field_dependencies([field])[0][0] for field in ["halo_id", "desc_id"]] halo_id_f, desc_id_f = fields dtypes = dict((field, np.int64) for field in fields) uid = 0 trees = [] nfiles = len(self.data_files) descs = lastids = None pbar = get_pbar("Planting trees", len(self.data_files)) for i, dfl in enumerate(self.data_files): if not isinstance(dfl, list): dfl = [dfl] batches = [] bsize = [] hids = [] ancs = defaultdict(list) for data_file in dfl: data = data_file._read_fields(fields, dtypes=dtypes) nhalos = len(data[halo_id_f]) batch = np.empty(nhalos, dtype=object) for it in range(nhalos): descid = data[desc_id_f][it] root = i == 0 or descid == -1 # The data says a descendant exists, but it's not there. # This shouldn't happen, but it does sometimes. if not root and descid not in lastids: root = True descid = data[desc_id_f][it] = -1 tree_node = TreeNode(uid, arbor=self, root=root) tree_node._fi = it tree_node.data_file = data_file batch[it] = tree_node if root: trees.append(tree_node) if self.field_info["uid"]["source"] == "arbor": tree_node._root_field_data["uid"] = \ tree_node.uid tree_node._root_field_data["desc_uid"] = -1 else: ancs[descid].append(tree_node) uid += 1 data_file.trees = batch batches.append(batch) bsize.append(batch.size) hids.append(data[halo_id_f]) if i > 0: for descid, ancestors in ancs.items(): # this will not be fast descendent = descs[descid == lastids][0] descendent._ancestors = ancestors for ancestor in ancestors: ancestor.descendent = descendent if i < nfiles - 1: descs = np.empty(sum(bsize), dtype=object) lastids = np.empty(descs.size, dtype=np.int64) ib = 0 for batch, hid, bs in zip(batches, hids, bsize): descs[ib:ib + bs] = batch lastids[ib:ib + bs] = hid ib += bs pbar.update(i) pbar.finish() self._trees = np.array(trees)
def _plant_trees(self): fields, _ = \ self.field_info.resolve_field_dependencies(["halo_id", "desc_id"]) halo_id_f, desc_id_f = fields dtypes = dict((field, np.int64) for field in fields) uid = 0 trees = [] nfiles = len(self.data_files) descs = lastids = None for i, dfl in enumerate(self.data_files): if not isinstance(dfl, list): dfl = [dfl] batches = [] bsize = [] hids = [] ancs = defaultdict(list) for data_file in dfl: data = data_file._read_fields(fields, dtypes=dtypes) nhalos = len(data[halo_id_f]) batch = np.empty(nhalos, dtype=object) for it in range(nhalos): descid = data[desc_id_f][it] root = i == 0 or descid == -1 tree_node = TreeNode(uid, arbor=self, root=root) tree_node._fi = it tree_node.data_file = data_file batch[it] = tree_node if root: trees.append(tree_node) if self.field_info["uid"]["source"] == "arbor": tree_node._root_field_data["uid"] = \ tree_node.uid tree_node._root_field_data["desc_uid"] = -1 else: ancs[descid].append(tree_node) uid += 1 data_file.trees = batch batches.append(batch) bsize.append(batch.size) hids.append(data[halo_id_f]) if i > 0: for descid, ancestors in ancs.items(): # this will not be fast descendent = descs[descid == lastids][0] descendent._ancestors = ancestors for ancestor in ancestors: ancestor.descendent = descendent if i < nfiles - 1: descs = np.empty(sum(bsize), dtype=object) lastids = np.empty(descs.size, dtype=np.int64) ib = 0 for batch, hid, bs in zip(batches, hids, bsize): descs[ib:ib + bs] = batch lastids[ib:ib + bs] = hid ib += bs self._trees = np.array(trees)