def index(self, path, name, parentpath=None, parent=None, isdir=False): ''' index a folder and store in tree ''' if not parent: parent = noder.dir_node(name, path, parent) cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: sub = os.path.join(root, f) n = f if len(n) > self.MAXLINE: n = f[:self.MAXLINE] + '...' Logger.progr('indexing: {:80}'.format(n)) self.noder.file_node(os.path.basename(f), sub, parent, parentpath) cnt += 1 for d in dirs: base = os.path.basename(d) sub = os.path.join(root, d) dummy = self.noder.dir_node(base, sub, parent, parentpath) _, cnt2 = self.index(sub, base, parent=dummy, parentpath=parentpath) cnt += cnt2 break # clean line Logger.progr('{:80}'.format(' ')) return parent, cnt
def _rec_size(self, node, store=True): ''' recursively traverse tree and return size @store: store the size in the node ''' if self.verbose: Logger.info('getting node size recursively') if node.type == self.TYPE_FILE: return node.size size = 0 for i in node.children: if node.type == self.TYPE_DIR: sz = self._rec_size(i, store=store) if store: i.size = sz size += sz if node.type == self.TYPE_STORAGE: sz = self._rec_size(i, store=store) if store: i.size = sz size += sz else: continue if store: node.size = size return size
def _restore_pickle(self): '''restore the pickled tree''' root = pickle.load(open(self.path, 'rb')) if self.verbose: m = 'Catalog imported from pickle \"{}\"'.format(self.path) Logger.info(m) return root
def _restore_json(self, string): '''restore the tree from json''' imp = JsonImporter() root = imp.import_(string) if self.verbose: Logger.info('Catalog imported from json \"{}\"'.format(self.path)) return root
def file_node(self, name, path, parent, storagepath): '''create a new node representing a file''' if not os.path.exists(path): Logger.err('File \"{}\" does not exist'.format(path)) return None path = os.path.abspath(path) try: st = os.lstat(path) except OSError as e: Logger.err('OSError: {}'.format(e)) return None md5 = None if self.hash: md5 = utils.md5sum(path) relpath = os.path.join(os.path.basename(storagepath), os.path.relpath(path, start=storagepath)) maccess = os.path.getmtime(path) n = self._node(name, self.TYPE_FILE, relpath, parent, size=st.st_size, md5=md5, maccess=maccess) if self.arc: ext = os.path.splitext(path)[1][1:] if ext in self.decomp.get_format(): names = self.decomp.get_names(path) self.list_to_tree(n, names) return n
def _to_csv(self, node, with_header=False): '''print the tree to csv''' rend = anytree.RenderTree(node, childiter=self._sort_tree) if with_header: Logger.out(self.CSV_HEADER) for _, _, node in rend: self._node_to_csv(node)
def file_node(self, name, path, parent, storagepath): '''create a new node representing a file''' if not os.path.exists(path): Logger.err('File \"{}\" does not exist'.format(path)) return None path = os.path.abspath(path) try: st = os.lstat(path) except OSError as e: Logger.err('OSError: {}'.format(e)) return None md5 = None if self.hash: md5 = self._get_hash(path) relpath = os.sep.join([storagepath, name]) maccess = os.path.getmtime(path) n = self._node(name, self.TYPE_FILE, relpath, parent, size=st.st_size, md5=md5, maccess=maccess) if self.arc: ext = os.path.splitext(path)[1][1:] if ext.lower() in self.decomp.get_formats(): self._debug('{} is an archive'.format(path)) names = self.decomp.get_names(path) self.list_to_tree(n, names) else: self._debug('{} is NOT an archive'.format(path)) return n
def _save_json(self, node): '''export the catalog in json''' exp = JsonExporter(indent=2, sort_keys=True) with open(self.path, 'w') as f: exp.write(node, f) if self.verbose: Logger.info('Catalog saved to json \"{}\"'.format(self.path)) return True
def get_node(self, top, path): ''' get the node at path ''' r = anytree.resolver.Resolver('name') try: return r.get(top, path) except anytree.resolver.ChildResolverError: Logger.err('No node at path \"{}\"'.format(path)) return None
def get_node(self, top, path, quiet=False): '''get the node by internal tree path''' r = anytree.resolver.Resolver('name') try: return r.get(top, path) except anytree.resolver.ChildResolverError: if not quiet: Logger.err('No node at path \"{}\"'.format(path)) return None
def _log(self, string): if self.debug: return if not string: # clean Logger.progr('{:80}'.format(' ')) return if len(string) > self.MAXLINE: string = string[:self.MAXLINE] + '...' Logger.progr('indexing: {:80}'.format(string))
def find_name(self, root, key, script=False, directory=False, startpath=None, parentfromtree=False, fmt='native'): ''' find files based on their names @script: output script @directory: only search for directories @startpath: node to start with @parentfromtree: get path from parent instead of stored relpath @fmt: output format ''' self._debug('searching for \"{}\"'.format(key)) start = root if startpath: start = self.get_node(root, startpath) self.term = key found = anytree.findall(start, filter_=self._find_name) paths = [] for f in found: if f.type == self.TYPE_STORAGE: # ignore storage nodes continue if directory and f.type != self.TYPE_DIR: # ignore non directory continue # print the node if fmt == 'native': self._print_node(f, withpath=True, withdepth=True, withstorage=True, recalcparent=parentfromtree) elif fmt == 'csv': self._node_to_csv(f) if parentfromtree: paths.append(self._get_parents(f)) else: paths.append(f.relpath) if script: tmp = ['${source}/' + x for x in paths] cmd = 'op=file; source=/media/mnt; $op {}'.format(' '.join(tmp)) Logger.info(cmd) return found
def rec_size(self, node): '''recursively traverse tree and store dir size''' if self.verbose: Logger.info('getting directory size recursively') if node.type == self.TYPE_FILE: return node.size size = 0 for i in node.children: if node.type == self.TYPE_DIR: size += self.rec_size(i) if node.type == self.TYPE_STORAGE: self.rec_size(i) else: continue node.size = size return size
def _node_to_csv(self, node, sep=','): ''' print a node to csv @node: the node to consider ''' if not node: return '' if node.type == self.TYPE_TOP: return '' out = [] if node.type == self.TYPE_STORAGE: # handle storage out.append(node.name) out.append(node.type) out.append('') # full path # size sz = self._rec_size(node, store=False) out.append(utils.human(sz)) out.append(utils.epoch_to_str(node.ts)) out.append('') # maccess out.append('') # md5 else: out.append(node.name) out.append(node.type) # node full path parents = self._get_parents(node) storage = self._get_storage(node) fullpath = os.path.join(storage.name, parents) out.append(fullpath) out.append(utils.human(node.size)) out.append(utils.epoch_to_str(storage.ts)) out.append(utils.epoch_to_str(node.maccess)) # md5 if any if node.md5: out.append(node.md5) else: out.append('') line = sep.join(['"' + o + '"' for o in out]) if len(line) > 0: Logger.out(line)
def md5sum(path): ''' calculate md5 sum of a file ''' p = os.path.realpath(path) if not os.path.exists(p): Logger.err('\nunable to get md5sum on {}'.format(path)) return None try: with open(p, mode='rb') as f: d = hashlib.md5() while True: buf = f.read(4096) if not buf: break d.update(buf) return d.hexdigest() except PermissionError: pass return None
def find_name(self, root, key, script=False): '''find files based on their names''' if self.verbose: Logger.info('searching for \"{}\"'.format(key)) self.term = key found = anytree.findall(root, filter_=self._find_name) paths = [] for f in found: if f.type == self.TYPE_STORAGE: # ignore storage nodes continue self._print_node(f, withpath=True, withdepth=True, withstorage=True) paths.append(f.relpath) if script: tmp = ['${source}/' + x for x in paths] cmd = 'op=file; source=/media/mnt; $op {}'.format(' '.join(tmp)) Logger.info(cmd) return found
def walk(self, root, path, rec=False): '''walk the tree for ls based on names''' if self.verbose: Logger.info('walking path: \"{}\"'.format(path)) r = anytree.resolver.Resolver('name') found = [] try: found = r.glob(root, path) if len(found) < 1: return [] if rec: self.print_tree(found[0].parent) return found found = sorted(found, key=self._sort, reverse=self.sortsize) self._print_node(found[0].parent, withpath=False, withdepth=True) for f in found: self._print_node(f, withpath=False, pre='- ', withdepth=True) except anytree.resolver.ChildResolverError: pass return found
def find_name(self, root, key, script=False, directory=False, startpath=None, parentfromtree=False): '''find files based on their names''' if self.verbose: Logger.info('searching for \"{}\"'.format(key)) start = root if startpath: start = self.get_node(root, startpath) self.term = key found = anytree.findall(start, filter_=self._find_name) paths = [] for f in found: if f.type == self.TYPE_STORAGE: # ignore storage nodes continue if directory and f.type != self.TYPE_DIR: # ignore non directory continue self._print_node(f, withpath=True, withdepth=True, withstorage=True, recalcparent=parentfromtree) if parentfromtree: paths.append(self._get_parents(f)) else: paths.append(f.relpath) if script: tmp = ['${source}/' + x for x in paths] cmd = 'op=file; source=/media/mnt; $op {}'.format(' '.join(tmp)) Logger.info(cmd) return found
def save(self, node): '''save the catalog''' if not self.path: Logger.err('Path not defined') return False d = os.path.dirname(self.path) if d and not os.path.exists(d): os.makedirs(d) elif os.path.exists(self.path) and not self.force: if not utils.ask('Update catalog \"{}\"'.format(self.path)): Logger.info('Catalog not saved') return False if d and not os.path.exists(d): Logger.err('Cannot write to \"{}\"'.format(d)) return False if self.metanode: self.metanode.parent = node if self.pickle: return self._save_pickle(node) return self._save_json(node)
def _save_pickle(self, node): '''pickle the catalog''' pickle.dump(node, open(self.path, 'wb')) if self.verbose: Logger.info('Catalog saved to pickle \"{}\"'.format(self.path)) return True
def to_dot(self, node, path='tree.dot'): '''export to dot for graphing''' anytree.exporter.DotExporter(node).to_dotfile(path) Logger.info('dot file created under \"{}\"'.format(path)) return 'dot {} -T png -o /tmp/tree.png'.format(path)
def _print_node(self, node, pre='', withpath=False, withdepth=False, withstorage=False): '''print a node''' if node.type == self.TYPE_TOP: Logger.out('{}{}'.format(pre, node.name)) elif node.type == self.TYPE_FILE: name = node.name if withpath: name = node.relpath if withstorage: storage = self._get_storage(node) attr = '' if node.md5: attr = ', md5:{}'.format(node.md5) compl = 'size:{}{}'.format(utils.human(node.size), attr) if withstorage: compl += ', storage:{}'.format(Logger.bold(storage.name)) Logger.file(pre, name, compl) elif node.type == self.TYPE_DIR: name = node.name if withpath: name = node.relpath depth = '' if withdepth: depth = len(node.children) if withstorage: storage = self._get_storage(node) attr = [] if node.size: attr.append(['totsize', utils.human(node.size)]) if withstorage: attr.append(['storage', Logger.bold(storage.name)]) Logger.dir(pre, name, depth=depth, attr=attr) elif node.type == self.TYPE_STORAGE: hf = utils.human(node.free) ht = utils.human(node.total) dt = '' if self._has_attr(node, 'ts'): dt = ', date:' dt += utils.epoch_to_str(node.ts) name = '{} (free:{}, total:{}{})'.format(node.name, hf, ht, dt) Logger.storage(pre, name, node.attr) elif node.type == self.TYPE_ARC: if self.arc: Logger.arc(pre, node.name, node.archive) else: Logger.err('Weird node encountered: {}'.format(node))
def _debug(self, string): if not self.debug: return Logger.log(string)
def _debug(self, string): '''print to debug''' if not self.debug: return Logger.debug(string)
def _print_node(self, node, pre='', withpath=False, withdepth=False, withstorage=False, recalcparent=False): ''' print a node @node: the node to print @pre: string to print before node @withpath: print the node path @withdepth: print the node depth info @withstorage: print the node storage it belongs to @recalcparent: get relpath from tree instead of relpath field ''' if node.type == self.TYPE_TOP: # top node Logger.out('{}{}'.format(pre, node.name)) elif node.type == self.TYPE_FILE: # node of type file name = node.name if withpath: if recalcparent: name = os.sep.join([self._get_parents(node.parent), name]) else: name = node.relpath name = name.lstrip(os.sep) if withstorage: storage = self._get_storage(node) attr = '' if node.md5: attr = ', md5:{}'.format(node.md5) compl = 'size:{}{}'.format(utils.human(node.size), attr) if withstorage: compl += ', storage:{}'.format(Logger.bold(storage.name)) Logger.file(pre, name, compl) elif node.type == self.TYPE_DIR: # node of type directory name = node.name if withpath: if recalcparent: name = os.sep.join([self._get_parents(node.parent), name]) else: name = node.relpath name = name.lstrip(os.sep) depth = '' if withdepth: depth = len(node.children) if withstorage: storage = self._get_storage(node) attr = [] if node.size: attr.append(['totsize', utils.human(node.size)]) if withstorage: attr.append(['storage', Logger.bold(storage.name)]) Logger.dir(pre, name, depth=depth, attr=attr) elif node.type == self.TYPE_STORAGE: # node of type storage hf = utils.human(node.free) ht = utils.human(node.total) nbchildren = len(node.children) # get the date dt = '' if self._has_attr(node, 'ts'): dt = 'date:{}'.format(utils.epoch_to_str(node.ts)) ds = '' # the children size sz = self._rec_size(node, store=False) sz = utils.human(sz) ds = 'totsize:{}'.format(sz) # format the output name = '{}'.format(node.name) args = [ 'nbfiles:{}'.format(nbchildren), 'free:{}/{}'.format(hf, ht), dt, ds ] Logger.storage(pre, name, '({})'.format(','.join(args)), node.attr) elif node.type == self.TYPE_ARC: # archive node if self.arc: Logger.arc(pre, node.name, node.archive) else: Logger.err('bad node encountered: {}'.format(node))
def _log2file(self, string): '''log to file''' if not self.lpath: return line = '{}\n'.format(string) Logger.flog(self.lpath, line, append=True)
def _debug(self, text): if not self.debug: return Logger.debug(text)