示例#1
0
 def isToBeIgnored(self,entry:os.DirEntry):
     if not entry.is_dir():
         return False
     if entry.name in self.ignoreDirList:
         lgg.info(f"    Ignoring {entry.name}",lgg.cR)
         return True
     return False
示例#2
0
 def buildExtDicts(self,sdir):
     lgg.info(f"  buildExtDicts dir:{sdir}",lgg.cP)          
     self.extDicts = {}
     for entry in self.getFiles(sdir):
         if entry.is_dir():
             lgg.info(f"    dir:{entry.path}",lgg.cB)
         elif entry.is_file():
             # lgg.info(f"  file:{entry.path}",lgg.cC)
             self.digestEntry(entry)
     return 
示例#3
0
 def getFiles(self,base_dir):
     for entry in os.scandir(base_dir):
         if entry.is_file():
             yield entry
         elif entry.is_dir():
             lgg.info(f"    Directory {entry.name}",lgg.cC)
             if not self.isToBeIgnored(entry):
                 yield from self.getFiles(entry.path)
         else:
             print(f"Neither a file, nor a dir: {entry.path}")
示例#4
0
 def buildClsDicts(self):
     lgg.info(f"  buildClsDicts",lgg.cP)          
     self.clsDicts = {}
     for extkey in self.extDicts.keys():
         exd = self.extDicts[extkey]
         clskey = self.getClass(extkey)
         if not clskey in self.clsDicts:
             self.clsDicts[clskey] = { "num":0, "bytes":0 }
         cld = self.clsDicts[clskey]
         cld["num"] += exd["num"]
         cld["bytes"] += exd["bytes"]
示例#5
0
 def digestEntry(self, entry:os.DirEntry ):
     _,ext = os.path.splitext(entry.name)
     if not ext in self.extDicts:
         self.extDicts[ext] = { "num":0, "bytes":0,"maxbytes":0,"maxname":"" }
     exd = self.extDicts[ext]
     exd["num"] += 1
     esize = entry.stat().st_size
     exd["bytes"] += esize
     if esize>exd["maxbytes"]:
         exd["maxbytes"] = esize
         exd["maxname"] = entry.path
     if esize>10e6:
         self.bigFileList.append(entry)
         emb = "%.3f" % round(esize/1e6,3)
         lgg.info(f"    big file: {emb} mb - {entry.path}")
示例#6
0
    def main(self):
        sdir = self.args.sdir

        lgg.info(f"FileClassing {sdir}",lgg.cY)

        stime = timeit.time.time()
        self.buildExtDicts(sdir)
        self.dumpExtDicts()
        self.buildClsDicts()
        self.dumpClsDicts()

        # (ovfiles,ovbytes) = copyFromTo(sdir,ddir,execute)
        elap = timeit.time.time()-stime 

        #exword = "" if execute else "Would have "
        #lgg.info(f"{exword} Overwritten files:{ovfiles}/{tfiles}  overwritenbytes:{ovbytes} secs:{round(elap,3)} ",lgg.cY)
        lgg.info(f"file class done - secs:{round(elap,3)} ",lgg.cY)
示例#7
0
def copyFromTo(sdir, ddir, execute):
    overwrittenfiles = 0
    overwrittenbytes = 0
    i = 0
    for (fdname, fname) in listfiles:
        sfname = f"{sdir}/{fdname}{fname}"
        dfname = f"{ddir}/{fdname}{fname}"
        dddir = f"{ddir}/{fdname}"
        fclr = lgg.cC
        ovbytes = 0
        ddiristhere = os.path.exists(dddir)
        if not ddiristhere:
            if execute:
                lgg.info(f"Creating directory {dddir}", lgg.cR)
                os.makedirs(dddir)
            else:
                lgg.info(f"Would have created directory {dddir}", lgg.cR)

        if os.path.exists(dfname):
            fclr = lgg.cG
            ftats = os.stat(dfname)
            ovbytes = ftats.st_size
            overwrittenbytes += ovbytes
            overwrittenfiles += 1
        execword = "execute" if execute else "fake"
        lgg.info(
            f"{i}: {execword} copy from {sfname} to {dfname} overwrittenbytes:{ovbytes}",
            fclr)
        if execute:
            shutil.copyfile(sfname, dfname)
        i += 1
    return (overwrittenfiles, overwrittenbytes)
示例#8
0
 def dumpClsDicts(self):
     lgg.info(f"  dumpClsDicts",lgg.cP)  
     nfiles = 0
     nbytes = 0        
     for clskey in self.clsDicts.keys():
         cld = self.clsDicts[clskey]
         nfilescls = cld["num"]
         nbytescls = cld["bytes"]
         nfiles += nfilescls
         nbytes += nbytescls
         mbytes = "%.3f" % round(nbytescls/1e6,3)
         lgg.info(f"  {clskey:>6} - num:{nfilescls:>4}    tot-mb:{mbytes:>9}",lgg.cG)            
     mbytes = "%.3f" % round(nbytes / 1e6,3)
     lgg.info(f"totals - files{nfiles} bytes:{nbytes} mb:{mbytes}",lgg.cG)
示例#9
0
 def dumpExtDicts(self):
     lgg.info(f"  dumpExtDicts",lgg.cP)  cd
     nfiles = 0
     nbytes = 0        
     mxkeylen = self.getLongestExtKeyLength()
     sortedExtDict = self.getSortedExtDict("bytes")
     #for extkey in self.extDicts.keys():
     for extkey in sortedExtDict:            
         exd = self.extDicts[extkey]
         nfilesext = exd["num"]
         nbytesext = exd["bytes"]
         maxbytesext = exd["maxbytes"]
         avgbytesext = int(maxbytesext/nfilesext)
         cls = self.getClass(extkey)
         nfiles += nfilesext
         nbytes += nbytesext
         mbytes = "%.3f" % round(nbytesext/1e6,3)
         extkeypad = extkey.rjust(mxkeylen)
         lgg.info(f"  {extkeypad} {cls:>6} - num:{nfilesext:>4}  size-max:{maxbytesext:>10} avg:{avgbytesext:>10}  tot-mb:{mbytes:>9}",lgg.cB)
     mbytes = "%.3f" %  round(nbytes / 1e6,3)
     lgg.info(f"totals - files{nfiles} bytes:{nbytes} mb:{mbytes}",lgg.cB)
示例#10
0
            ovbytes = ftats.st_size
            overwrittenbytes += ovbytes
            overwrittenfiles += 1
        execword = "execute" if execute else "fake"
        lgg.info(
            f"{i}: {execword} copy from {sfname} to {dfname} overwrittenbytes:{ovbytes}",
            fclr)
        if execute:
            shutil.copyfile(sfname, dfname)
        i += 1
    return (overwrittenfiles, overwrittenbytes)


sdir = args.sdir
ddir = args.ddir
execute = args.exec

lgg.info(
    f"Copying {len(listfiles)} files from {sdir} to {ddir} execute:{execute}",
    lgg.cY)

start = timeit.timeit()
(ovfiles, ovbytes) = copyFromTo(sdir, ddir, execute)
elap = timeit.timeit() - start

tfiles = len(listfiles)
exword = "" if execute else "Would have "
lgg.info(
    f"{exword} Overwritten files:{ovfiles}/{tfiles}  overwritenbytes:{ovbytes} secs:{round(elap,3)} ",
    lgg.cY)