def test_modify_node_identifier_root(self): tree = Tree() tree.create_node("Harry", "harry") tree.create_node("Jane", "jane", parent="harry") tree.update_node(tree['harry'].identifier, identifier='xyz', tag='XYZ') self.assertTrue(tree.root == 'xyz') self.assertTrue(tree['xyz'].tag == 'XYZ') self.assertEqual(tree.parent('jane').identifier, 'xyz')
def reset_ids(t: tl.Tree) -> tl.Tree: for node in t.all_nodes(): if node.is_leaf(): t.update_node(node.identifier, tag=node.tag, identifier=node.tag) else: t.update_node(node.identifier, identifier=uid()) return t
def trim_excess_root(tree: Tree) -> Tree: # Remove any nodes from the root that have only 1 child. # I.e, replace A → B → (C, D) with B → (C, D) root_id = tree.root branches = tree.children(root_id) if len(branches) == 1: tree.update_node(branches[0].identifier, parent=None, bpointer=None) new_tree = tree.subtree(branches[0].identifier) return trim_excess_root(new_tree) else: return tree
def test_modify_node_identifier_recursively(self): tree = Tree() tree.create_node("Harry", "harry") tree.create_node("Jane", "jane", parent="harry") n = tree.get_node("jane") self.assertTrue(n.identifier == 'jane') # Success to modify tree.update_node(n.identifier, identifier='xyz') self.assertTrue(tree.get_node("jane") is None) self.assertTrue(tree.get_node("xyz").identifier == 'xyz')
def permute_leaves(T): tree = Tree(tree=T, deep=True) leaves = get_leaf_node_ids(tree) shuf_leaves = leaves[:] random.shuffle(shuf_leaves) for k, nid in enumerate(leaves): new_nid = shuf_leaves[k] tree.update_node(nid, tag=new_nid, identifier='L' + str(k)) for k, nid in enumerate(leaves): new_nid = shuf_leaves[k] tree.update_node('L' + str(k), identifier=new_nid) return tree
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True): if depth is None: depth = self.depth if not is_fid: file_id = self.get_path_fid(file_id, auto_update=False) file_list = self._disk.get_file_list(file_id) if 'items' not in file_list: return False for i in file_list['items']: if i['type'] == 'file': file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url']) else: file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) if self._tree.get_node(file_info.id): self._tree.update_node(file_id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1) return True def tree(self, path='root', auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if not file_id: raise Exception('No such file or directory') self._tree.show(file_id) def get_path_list(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) return self.get_fid_list(file_id, auto_update=auto_update) def get_fid_list(self, file_id, auto_update=True): self.auto_update_path_list(auto_update) if not file_id: raise Exception('No such file or directory') if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', auto_update=True): self.auto_update_path_list(auto_update) path = Path(path) if str(path) in ('', '/', '\\', '.', 'root'): return 'root' flag = False for i in filter(None, path.as_posix().split('/')): flag = False for j in self._tree.children(file_id): if i == j.tag: flag = True file_id = j.identifier break if flag: return file_id return False def get_path_node(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, auto_update=True): file_id = self.get_path_fid(path, auto_update=auto_update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, auto_update=True): if auto_update and len(self._tree) == 1: return self.update_path_list()
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root') self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True): if depth is None: depth = self.depth if not is_fid: file_id = self.get_path_fid(file_id, update=False) file_list = self._disk.get_file_list(file_id) if not file_list: return False for i in file_list: if i['type'] == 'file': file_info = FileInfo( name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden'], category=i['category'], content_type=i['content_type'], size=i['size'], content_hash_name=i['content_hash_name'], content_hash=i['content_hash'], download_url=i['download_url'] if 'download_url' in i else '') else: file_info = FileInfo( name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False, ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=i['hidden']) if self._tree.get_node(file_info.id): self._tree.update_node(file_id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1) return True def tree(self, path='root'): file_id = self.get_path_fid(path, update=False) self.update_path_list(file_id) if not file_id: raise FileNotFoundError(path) self._tree.show(file_id) def get_path_list(self, path, update=True): file_id = self.get_path_fid(path, update=update) return self.get_fid_list(file_id, update=update) def get_fid_list(self, file_id, update=True): if not file_id: raise FileNotFoundError(Path) self.auto_update_path_list(update, file_id) if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', update=True): path = PurePosixPath(Path(path).as_posix()) if str(path) in ('', '/', '\\', '.', 'root'): return 'root' flag = False path_list = list(filter(None, str(path).split('/'))) if path_list[0] == 'root': path_list = path_list[1:] for i in path_list: flag = False node_list = self._tree.children(file_id) if not node_list: self.auto_update_path_list(update, file_id) node_list = self._tree.children(file_id) for j in node_list: if i == j.tag: flag = True file_id = j.identifier break if not flag: return False if flag: return file_id return False def get_path_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, update=True, file_id=None): if not update and file_id: return self.update_path_list(file_id, depth=0) elif update and len(self._tree) == 1: return self.update_path_list()
def TreeSequenceToTreeClass(simulation, tree_event_sequence, is_AA_mutation_in_root_node=False): t1 = time.time() tree_size = len(tree_event_sequence.tree_sequence) tree_class_tree = Tree() root_id = 'Unknown' array_tree = simulation.GetTree() for i in range(tree_size): if array_tree[i] == -1: root_id = i tree_class_tree.create_node(root_id, root_id, data=None) # placeholder on root break # there can be only one root if root_id == 'Unknown': raise ValueError("There is no root in this tree") for i in range(tree_size): if i != root_id: tree_class_tree.create_node( i, i, parent=root_id, data=None) # placeholder on other places for i in range(tree_size): if i != root_id: tree_class_tree.move_node(i, array_tree[i]) for i in range(tree_size): noc = len(tree_class_tree.get_node(i).fpointer) # number of children ni = tree_event_sequence.tree_sequence[i].node_id iam = tree_event_sequence.tree_sequence[i].is_a_mutation on = tree_event_sequence.tree_sequence[i].old_nucleotyde nn = tree_event_sequence.tree_sequence[i].new_nucleotyde mc = tree_event_sequence.tree_sequence[i].mutation_cite tti = tree_event_sequence.tree_sequence[i].tree_time tty = tree_event_sequence.tree_sequence[i].tree_type if (i == root_id) and (is_AA_mutation_in_root_node == True): tree_class_tree.update_node(i, data=TreeEvent(is_a_mutation=True, number_of_children=noc, old_nucleotyde=0, new_nucleotyde=0, mutation_cite=0, tree_time=0, tree_type='coalescence', node_id=ni)) else: tree_event = TreeEvent(is_a_mutation=iam, number_of_children=noc, old_nucleotyde=on, new_nucleotyde=nn, mutation_cite=mc, tree_time=tti, tree_type=tty, node_id=ni) tree_class_tree.update_node(i, data=tree_event) t2 = time.time() print('Time spent on conversion to tree class = ', t2 - t1) return tree_class_tree
def construct_celltree(nucleus_file, config): ''' Construct cell tree structure with cell names :param nucleus_file: the name list file to the tree initilization :param max_time: the maximum time point to be considered :return cell_tree: cell tree structure where each time corresponds to one cell (with specific name) ''' ## Construct cell # Add unregulized naming cell_tree = Tree() cell_tree.create_node('P0', 'P0') cell_tree.create_node('AB', 'AB', parent='P0') cell_tree.create_node('P1', 'P1', parent='P0') cell_tree.create_node('EMS', 'EMS', parent='P1') cell_tree.create_node('P2', 'P2', parent='P1') cell_tree.create_node('P3', 'P3', parent='P2') cell_tree.create_node('C', 'C', parent='P2') cell_tree.create_node('P4', 'P4', parent='P3') cell_tree.create_node('D', 'D', parent='P3') cell_tree.create_node('Z2', 'Z2', parent='P4') cell_tree.create_node('Z3', 'Z3', parent='P4') # EMS cell_tree.create_node('E', 'E', parent='EMS') cell_tree.create_node('MS', 'MS', parent='EMS') # Read the name excel and construct the tree with complete SegCell df_time = pd.read_csv(nucleus_file) # read and combine all names from different acetrees ## Get cell number try: with open('./ShapeUtil/number_dictionary.txt', 'rb') as f: number_dictionary = pickle.load(f) except: ace_files = glob.glob('./ShapeUtil/AceForLabel/*.csv') cell_list = [x for x in cell_tree.expand_tree()] for ace_file in ace_files: ace_pd = pd.read_csv(os.path.join(ace_file)) cell_list = list(ace_pd.cell.unique()) + cell_list cell_list = list(set(cell_list)) cell_list.sort() number_dictionary = dict(zip(cell_list, range(1, len(cell_list) + 1))) with open('./ShapeUtil/number_dictionary.txt', 'wb') as f: pickle.dump(number_dictionary, f) with open('./ShapeUtil/name_dictionary.txt', 'wb') as f: pickle.dump(dict(zip(range(1, len(cell_list) + 1), cell_list)), f) max_time = config.get('max_time', 100) df_time = df_time[df_time.time <= max_time] all_cell_names = list(df_time.cell.unique()) for cell_name in list(all_cell_names): if cell_name not in number_dictionary: continue times = list(df_time.time[df_time.cell == cell_name]) cell_info = cell_node() cell_info.set_number(number_dictionary[cell_name]) cell_info.set_time(times) if not cell_tree.contains(cell_name): if "Nuc" not in cell_name: parent_name = cell_name[:-1] cell_tree.create_node(cell_name, cell_name, parent=parent_name, data=cell_info) else: cell_tree.update_node(cell_name, data=cell_info) return cell_tree, max_time
class PathList: def __init__(self, disk): self._tree = Tree() self._disk = disk self._tree.create_node(tag='root', identifier='root', data=FileInfo(type=False)) self.depth = 3 def update_path_list(self, file_id='root', depth=None, is_fid=True, **kwargs): if depth is None: depth = self.depth kwargs.setdefault('max_depth', depth) max_depth = kwargs['max_depth'] kwargs.setdefault('get_file_list_bar', GetFileListBar(max_depth)) kwargs.setdefault('ratio', 0) get_file_list_bar = kwargs['get_file_list_bar'] ratio = kwargs['ratio'] get_file_list_bar.update(refresh_line=False) if not is_fid: file_id = self.get_path_fid(file_id, update=False) file_list = self._disk.get_file_list(file_id) if not file_list: if depth == max_depth: get_file_list_bar.refresh_line() return False old_file_list = self._tree.children(file_id) for i in old_file_list: if i.identifier not in [j['file_id'] for j in file_list]: self._tree.remove_node(i.identifier) for i, info in enumerate(file_list): if depth == max_depth: ratio = (i + 1) / len(file_list) if file_list else None get_file_list_bar.update(depth=max_depth - depth, ratio=ratio, refresh_line=True) file_info = self.get_file_info(info)[0] if self._tree.get_node(file_info.id): self._tree.update_node(file_info.id, data=file_info) else: self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id) if not file_info.type and depth: self.update_path_list(file_id=file_info.id, depth=depth - 1, max_depth=max_depth, get_file_list_bar=get_file_list_bar, ratio=ratio) if depth == max_depth: get_file_list_bar.refresh_line() return True def check_path_diff(self, local_path, disk_path_list): p = Path(local_path) change_file_list = [] for path in p.iterdir(): flag = False for i, path_ in enumerate(disk_path_list, 1): name, file_info = list(path_.items())[0] if p / name not in p.iterdir(): change_file_list.append(p / name) if Path(path) == p / name: if Path(path).is_dir() and file_info['data'] and path.is_dir() != file_info['data'].type: if 'children' in file_info: children = file_info['children'] change_file_list.extend(self.check_path_diff(p / name, children)) elif list(path.iterdir()): change_file_list.extend(list(path.iterdir())) if file_info and file_info['data'] and path.is_file() == file_info['data'].type: if path.is_file() and get_sha1(path).lower() != file_info['data'].content_hash.lower(): if i == len(disk_path_list): change_file_list.append(path) continue else: flag = True if not flag and i == len(disk_path_list): change_file_list.append(path) if not len(list(p.iterdir())): for path_ in disk_path_list: name, file_info = list(path_.items())[0] change_file_list.append(p / name) if not len(disk_path_list): for path_ in p.iterdir(): change_file_list.append(path_) return list(set(change_file_list)) @staticmethod def get_file_info(info): file_info_list = [] if not isinstance(info, list): info_list = [info] else: info_list = info for info in info_list: if info['type'] == 'file': file_info = FileInfo(name=info['name'], id=info['file_id'], pid=info['parent_file_id'], type=True, ctime=time.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ') if 'created_at' in info else time.localtime(), update_time=time.strptime(info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=info.get('hidden'), category=info['category'], content_type=info.get('content_type'), size=info['size'], content_hash_name=info.get('content_hash_name'), content_hash=info.get('content_hash'), download_url=info['download_url'] if 'download_url' in info else '', video_media_metadata=info[ 'video_media_metadata'] if 'video_media_metadata' in info else None, video_preview_metadata=info[ 'video_preview_metadata'] if 'video_preview_metadata' in info else None) else: file_info = FileInfo(name=info['name'], id=info['file_id'], pid=info['parent_file_id'], type=False, ctime=time.strptime(info['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ') if 'created_at' in info else time.time(), update_time=time.strptime(info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'), hidden=info.get('hidden')) file_info_list.append(file_info) return file_info_list def tree(self, path='root', stdout=sys.stdout): file_id = self.get_path_fid(path, update=False) self.update_path_list(file_id) if not file_id: raise FileNotFoundError(path) return self._tree.show(file_id, stdout=stdout) def get_path_list(self, path, update=True): file_id = self.get_path_fid(path, update=update) try: return self.get_fid_list(file_id, update=update) except FileNotFoundError: raise FileNotFoundError(path) def get_fid_list(self, file_id, update=True): if not file_id: raise FileNotFoundError try: self.auto_update_path_list(update, file_id) except NodeIDAbsentError: return list(map(self.get_file_info, self._disk.get_file_list(file_id))) if not self._tree.get_node(file_id): return [] if file_id != 'root' and self._tree.get_node(file_id).data.type: return [self._tree.get_node(file_id).data] return [i.data for i in self._tree.children(file_id)] def get_path_fid(self, path, file_id='root', update=True): if str(path) in ('', '/', '\\', '.', 'root'): return 'root' path = AliyunpanPath(path) flag = False path_list = list(filter(None, path.split())) if path_list[0] == 'root': path_list = path_list[1:] for i in path_list: flag = False node_list = self._tree.children(file_id) if not node_list: self.auto_update_path_list(update, file_id) node_list = self._tree.children(file_id) for j in node_list: if i == j.tag: flag = True file_id = j.identifier break if not flag: return False if flag: return file_id return False def get_path_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: return self._tree.get_node(file_id) return False def get_path_parent_node(self, path, update=True): file_id = self.get_path_fid(path, update=update) if file_id: node = self._tree.parent(file_id) if node: return node return False def auto_update_path_list(self, update=True, file_id=None): if not update and file_id: return self.update_path_list(file_id, depth=0) elif update and len(self._tree) == 1: return self.update_path_list()