def isToBeIgnored(self,entry:os.DirEntry): if not entry.is_dir(): return False if entry.name in self.ignoreDirList: lgg.info(f" Ignoring {entry.name}",lgg.cR) return True return False
def buildExtDicts(self,sdir): lgg.info(f" buildExtDicts dir:{sdir}",lgg.cP) self.extDicts = {} for entry in self.getFiles(sdir): if entry.is_dir(): lgg.info(f" dir:{entry.path}",lgg.cB) elif entry.is_file(): # lgg.info(f" file:{entry.path}",lgg.cC) self.digestEntry(entry) return
def getFiles(self,base_dir): for entry in os.scandir(base_dir): if entry.is_file(): yield entry elif entry.is_dir(): lgg.info(f" Directory {entry.name}",lgg.cC) if not self.isToBeIgnored(entry): yield from self.getFiles(entry.path) else: print(f"Neither a file, nor a dir: {entry.path}")
def buildClsDicts(self): lgg.info(f" buildClsDicts",lgg.cP) self.clsDicts = {} for extkey in self.extDicts.keys(): exd = self.extDicts[extkey] clskey = self.getClass(extkey) if not clskey in self.clsDicts: self.clsDicts[clskey] = { "num":0, "bytes":0 } cld = self.clsDicts[clskey] cld["num"] += exd["num"] cld["bytes"] += exd["bytes"]
def digestEntry(self, entry:os.DirEntry ): _,ext = os.path.splitext(entry.name) if not ext in self.extDicts: self.extDicts[ext] = { "num":0, "bytes":0,"maxbytes":0,"maxname":"" } exd = self.extDicts[ext] exd["num"] += 1 esize = entry.stat().st_size exd["bytes"] += esize if esize>exd["maxbytes"]: exd["maxbytes"] = esize exd["maxname"] = entry.path if esize>10e6: self.bigFileList.append(entry) emb = "%.3f" % round(esize/1e6,3) lgg.info(f" big file: {emb} mb - {entry.path}")
def main(self): sdir = self.args.sdir lgg.info(f"FileClassing {sdir}",lgg.cY) stime = timeit.time.time() self.buildExtDicts(sdir) self.dumpExtDicts() self.buildClsDicts() self.dumpClsDicts() # (ovfiles,ovbytes) = copyFromTo(sdir,ddir,execute) elap = timeit.time.time()-stime #exword = "" if execute else "Would have " #lgg.info(f"{exword} Overwritten files:{ovfiles}/{tfiles} overwritenbytes:{ovbytes} secs:{round(elap,3)} ",lgg.cY) lgg.info(f"file class done - secs:{round(elap,3)} ",lgg.cY)
def copyFromTo(sdir, ddir, execute): overwrittenfiles = 0 overwrittenbytes = 0 i = 0 for (fdname, fname) in listfiles: sfname = f"{sdir}/{fdname}{fname}" dfname = f"{ddir}/{fdname}{fname}" dddir = f"{ddir}/{fdname}" fclr = lgg.cC ovbytes = 0 ddiristhere = os.path.exists(dddir) if not ddiristhere: if execute: lgg.info(f"Creating directory {dddir}", lgg.cR) os.makedirs(dddir) else: lgg.info(f"Would have created directory {dddir}", lgg.cR) if os.path.exists(dfname): fclr = lgg.cG ftats = os.stat(dfname) ovbytes = ftats.st_size overwrittenbytes += ovbytes overwrittenfiles += 1 execword = "execute" if execute else "fake" lgg.info( f"{i}: {execword} copy from {sfname} to {dfname} overwrittenbytes:{ovbytes}", fclr) if execute: shutil.copyfile(sfname, dfname) i += 1 return (overwrittenfiles, overwrittenbytes)
def dumpClsDicts(self): lgg.info(f" dumpClsDicts",lgg.cP) nfiles = 0 nbytes = 0 for clskey in self.clsDicts.keys(): cld = self.clsDicts[clskey] nfilescls = cld["num"] nbytescls = cld["bytes"] nfiles += nfilescls nbytes += nbytescls mbytes = "%.3f" % round(nbytescls/1e6,3) lgg.info(f" {clskey:>6} - num:{nfilescls:>4} tot-mb:{mbytes:>9}",lgg.cG) mbytes = "%.3f" % round(nbytes / 1e6,3) lgg.info(f"totals - files{nfiles} bytes:{nbytes} mb:{mbytes}",lgg.cG)
def dumpExtDicts(self): lgg.info(f" dumpExtDicts",lgg.cP) cd nfiles = 0 nbytes = 0 mxkeylen = self.getLongestExtKeyLength() sortedExtDict = self.getSortedExtDict("bytes") #for extkey in self.extDicts.keys(): for extkey in sortedExtDict: exd = self.extDicts[extkey] nfilesext = exd["num"] nbytesext = exd["bytes"] maxbytesext = exd["maxbytes"] avgbytesext = int(maxbytesext/nfilesext) cls = self.getClass(extkey) nfiles += nfilesext nbytes += nbytesext mbytes = "%.3f" % round(nbytesext/1e6,3) extkeypad = extkey.rjust(mxkeylen) lgg.info(f" {extkeypad} {cls:>6} - num:{nfilesext:>4} size-max:{maxbytesext:>10} avg:{avgbytesext:>10} tot-mb:{mbytes:>9}",lgg.cB) mbytes = "%.3f" % round(nbytes / 1e6,3) lgg.info(f"totals - files{nfiles} bytes:{nbytes} mb:{mbytes}",lgg.cB)
ovbytes = ftats.st_size overwrittenbytes += ovbytes overwrittenfiles += 1 execword = "execute" if execute else "fake" lgg.info( f"{i}: {execword} copy from {sfname} to {dfname} overwrittenbytes:{ovbytes}", fclr) if execute: shutil.copyfile(sfname, dfname) i += 1 return (overwrittenfiles, overwrittenbytes) sdir = args.sdir ddir = args.ddir execute = args.exec lgg.info( f"Copying {len(listfiles)} files from {sdir} to {ddir} execute:{execute}", lgg.cY) start = timeit.timeit() (ovfiles, ovbytes) = copyFromTo(sdir, ddir, execute) elap = timeit.timeit() - start tfiles = len(listfiles) exword = "" if execute else "Would have " lgg.info( f"{exword} Overwritten files:{ovfiles}/{tfiles} overwritenbytes:{ovbytes} secs:{round(elap,3)} ", lgg.cY)