Пример #1
0
def removeallemptydir(path, verbose=False, removetrash=False):
    trashes=('^desktop\.ini$','^thumbs\.db$','^\.picasa\.ini$','^.*?\.thm$')
    result=True
    for f in os.listdir(path):
        fullname = os.path.join(path,f)
        if os.path.isfile(fullname):
            if not removetrash:
                result=False
            elif any(re.match(t,f,re.I|re.U) for t in trashes):
                try:
                    os.chmod(fullname, stat.S_IRWXU| stat.S_IRWXG| stat.S_IRWXO)
                    os.remove(fullname)
                    if verbose:
                        print GREEN+"del", fullname
                except Exception as e:
                    gprint("Cannot remove trash file [<>]: <>", fullname, RED+str(e))
                    result = False
        else:
            result = removeallemptydir(fullname, verbose, removetrash) and result

    if result:
        os.chmod(path, stat.S_IRWXU| stat.S_IRWXG| stat.S_IRWXO)
        try:
            os.rmdir(path)
            if verbose:
                print GREEN+"rmdir", path
        except Exception as e:
            gprint("Cannot rmdir [<>]: <>\n", path, str(e))
    
    return result
Пример #2
0
 def printme(self):
     lastp=u''
     for t,fns in self.result.iteritems():
         print "\n\n", RED+t, len(fns)
         for fullpath in fns:
             p,fn= os.path.split(fullpath)
             if p!=lastp:
                 print p
                 lastp=p
             print "\t", fn
         
     for t,fns in self.result.iteritems():
         gprint("\n\n<> can be guessed as:",t)
         for p in sorted(set(os.path.split(fn)[0] for fn in fns)):
             dt0,dt1,guess=self.pathdts[p][0],self.pathdts[p][1],self.guessdts[p]
             c0,c1,color =  (guess is None) and ("","",RED) or\
                            (dt0 is None) and ("","",YELLOW) or\
                            {(True,True):("","",GREEN), (False,False):("","",RED), 
                               (True,False):("",RED,""),  (False,True):(RED,"",""),
                            }[(abs( (dt0-guess).days )<90, abs( (dt1-guess).days )<90)]
                         
             gprint('\n\t<>\n\t\t[<>, <>] <>\n', p,
                    (c0 + dt0.strftime("%Y-%m%d")) if dt0 else '',
                    (c1 + dt1.strftime("%Y-%m%d")) if dt1 else '',
                    color + (guess.strftime("%Y-%m%d") if guess else "GUESSFAILED") )
     
     c=sum([len(fns) for (k,fns) in self.result.iteritems() if k !='dtmayerror'])
     print RED+"{} files cannot be archived.".format(c) if c else GREEN+"All files can be archived."
     if self.datetimegener is not None:
         print GREEN + "After gen, You can check again."
Пример #3
0
def walkfiles(path, condition=lambda x: True, debug=False,pre=""):
    for root, dirs, files in os.walk(path): 
        myfiles = filter(condition, files)
        c = len(myfiles)
        if debug and c:
            gprint("<><> [<>]\n",pre, root, BLUE+str(c) )
            
        for f in myfiles: 
            yield os.path.join(root, f)
Пример #4
0
 def adddir(self, path, rootdir):
     gprint("\nAdding [<>] to archive\n", BLUE+BWHITE+path) 
     fns= [f for f in gpylib.misc.walkfiles(path, _WALK_CONDITION, True,"\n\n")]
     gprint(RED+"Randoming the files\n")
     random.shuffle(fns)
     for fn in fns:
     #for fn in gpylib.misc.walkfiles(path, _WALK_CONDITION, True,"\n\n"):
         try:
             self.add(fn, rootdir)
         except PicException as e:
             self.failed.append(fn )
     print "\n"
Пример #5
0
 def __call__(self, fn):
     md5 = self.cache.get(fn,None)
     if md5 is None:
         gprint(YELLOW+"5\b")
         h = hashlib.new('md5')
         with open(fn,'rb') as f: #如果不用rb会出现重复的概率。
             h.update(f.read())
         md5 = h.hexdigest()
         self.cache[fn]=md5   
     else:
         reachcount +=1
     return md5
Пример #6
0
def grouppicdir(dir, dst, best=True, dstw=2970, dsth=2100, margin=20):
    """拼接某个目录下的jpg
    
    """

    gprint(u"组合照片 [<>] [<>]\n\n", dir, GREEN+"Best" if best else "") 
    (dd, df) = os.path.split(dst)
    if not df.startswith("_"):
        print "[%s] must start with _"%df
        return

    files = [os.path.join(dir, f) for f in os.listdir(dir) if re.match(r"[^_].+\.jpg",f,re.I) != None]
    grouppic(files, dst, dstw, dsth, margin, best)
Пример #7
0
 def selftest(self, verbose=False):
     lastpg=None
     errorcount=0
     
     md5 = hashlib.new('md5')
         
     print BLUE+BWHITE+"\n\nSelf Testing"
     for pg in self.groups:
         if verbose:
             gprint("[<>] - [<>] : <> Files.\n", pg.pics[0].datetime, pg.pics[-1].datetime, BLUE+"{:03d}".format(len(pg.pics)) )
             
         if lastpg is not None and lastpg.pics[-1].cmptime(pg.pics[0])>=0:
             errorcount+=1
             gprint("<>\n\t<>\n\t<>\n", RED+"Error Found: Group split", lastpg.path, pg.path)
         lastf=None
         for f in pg.pics:
             md5.update(f.easyhash.encode("gb2312"))
             if lastf is not None and lastf.cmptime(f)>0:
                 errorcount+=1
                 gprint("<>\n\t<>\n\t<>\n", RED+"Error Found: File Order", lastf.fullname, f.fullname)
             lastf=f
         lastpg=pg
         
     gprint("\nCheck end, [<>] errors found.\nDigest[<>]\n", 
                         (RED if errorcount else GREEN) + str(errorcount),
                         BLUE+md5.hexdigest() )
Пример #8
0
 def dump(self,newdir,fake,movenewfile):
     newfullname = os.path.join(newdir,self.name)
     if newfullname.lower()==self.fullname.lower():
         return "File No Action"
         
     '''gprint("<>\n\t<>\n\t<>\n", 
             "move" if (movenewfile or self.rootdir is None) else "copy", 
             self.fullname,newfullname)
     '''
     (out,action,ret) =  (self.rootdir is None) and (GREEN+"M", shutil.move,  "Move Archive") or \
                                    movenewfile and (  RED+"M", shutil.move,  "Move New") or \
                                                    (      "C", shutil.copy2, "Copy New")
     gprint(out)
     if not fake:
         action(self.fullname, newfullname)
     self.fullname,self.rootdir=newfullname, None
     return ret
Пример #9
0
    def printdumpinfo(self):
        print BLUE+BWHITE+"Dump information\n"
        print "Old", BLUE+str(len(self.old))
        print u"".join([u"{}\n\t{}\n\n".format(new.fullname, old.fullname) for new,old in self.old])
        
        print "\n", YELLOW+"Overwrite", BLUE+str(len(self.overwrite))
        print u"".join([u"{}\n\t{}\n\n".format(new.fullname, old.fullname) for new,old in self.overwrite])

        print "\n", RED+"Failed", len(self.failed)
        print u"\n".join(self.failed)
    
        print "Actions:{"
        gpylib.misc.ppdict(self._dumpactions,prekey=u" "*4, prevalue=u" "*8)
        print "}"
        
        gprint("\nTotal file [<>], readsize [<>], readmd5 [<>]\n", _PicFile.totalfile,
                _PicFile.totalsize, _PicFile.totalmd5)
        gprint("MD5 Cache Size [<>], reachcount [<>]\n", *gpylib.misc.getmd5.status )        
Пример #10
0
 def dumpstep1(self, fake, movenewfile, actions):
     if self.clean or self.count()==0:
         actions['Dir No Action'] +=1
         return
     
     print "\n\n",
     if self.path is None: #还没有创建过
         self.path=os.path.join(self.basepath, self.pics[0].datetime.replace(':',"-"))
         
         actions['MkDir'] +=1
         print "mkdir",self.path
         if not fake:
             os.mkdir(self.path)
     else:
         print self.path
         
     gprint("[<>] Files\n", len(self.pics))
     for p in self.pics:
         actions[p.dump(self.path,fake,movenewfile)] +=1
Пример #11
0
    def add(self, fn, rootdir):
        pf = _PicFile(fn,rootdir)
        samefile = self.findsamefile(pf)
        #根据MD5去找
        if samefile:
            pf.exif=samefile.exif
            #更新其日期而已这样后面才能找到

        if len(self.groups)==0:
            self.groups.append(_PicGroup.newone(self.path,pf))
            return
            
        for g in self.groups:
            if pf<=g:
            #如果pf比第一个组还要小,那么就加入到第一个组了
            #如果pf在两个组中间,则添加到后一个组里面
            #如果pf比任何一个组都要大,则添加到最后一个组
                break
        
        re, refpf = g.add(pf)
        if re=="Old":
            gprint(DIM+"O")
            self.old.append( (pf,refpf) )
        elif re=="Overwrite":
            gprint(YELLOW+"W")
            self.overwrite.append( (pf,refpf) )
        else: #'New'
            self.md5dict[pf.size].append( pf )
            gprint(GREEN+"N")
            if(g.count()>=self.max):
                ng=g.split()
                if ng is not None:
                    bisect.insort_left(self.groups,ng)
Пример #12
0
def ppdict(d,prekey=u"",postkey=u"", prevalue=u"\t",postvalue=u""):
    for k,v in d.iteritems():
        gprint(u"<><><>\n",prekey,k,postkey)
        
        if isinstance(v,list):
            for i in v:
                gprint(u"<><><>\n",prevalue, i, postvalue)
        else:
            gprint(u"<><><>\n",prevalue,v, postvalue)
Пример #13
0
 def selftest(self, verbose=False):
     lastpg=None
     errorcount=0
     
     print BLUE+BWHITE+"\n\nSelf Testing"
     for pg in self.groups:
         if verbose:
             gprint("[<>] - [<>] : <> Pics\n", pg.pics[0].datetime, pg.pics[-1].datetime, BLUE+"{:03d}".format(len(pg.pics)) )
             
         if lastpg is not None and lastpg.pics[-1].cmptime(pg.pics[0])>=0:
             errorcount+=1
             gprint("<>\n\t<>\n\t<>\n", RED+"Error Found: Group split", lastpg.path, pg.path)
         lastf=None
         for f in pg.pics:
             if lastf is not None and lastf.cmptime(f)>0:
                 errorcount+=1
                 gprint("<>\n\t<>\n\t<>\n", RED+"Error Found: File Order", lastf.fullname, f.fullname)
             lastf=f
         lastpg=pg
         
     gprint("\nCheck end, [<>] errors found.\n", (RED if errorcount else GREEN) + str(errorcount) )
Пример #14
0
def findsamefile(*dirlist):
    gprint(u"Checking the following directories:\n<>\n", u"".join(u"\t[{}]\n".format(d) for d in dirlist))
    
    sizedict, fc = defaultdict(list), 0
    for fn in itertools.chain( *(gpylib.misc.walkfiles(d, lambda x: True, True) for d in dirlist) ):
        sizedict[os.stat(fn).st_size].append( fn )
        fc += 1
    
    md5dict, mc = defaultdict(list), 0
    for f in itertools.chain( *(filter(lambda x: len(x)>1, sizedict.itervalues())) ):
        gpylib.misc.printworking()
        md5dict[gpylib.misc.getmd5(f)].append(f)
        mc+=1
    
    samefilelist = filter(lambda x: len(x)>1, md5dict.itervalues())
    gprint("\nTotal [<>] duplicate files.\n", BLUE + str(len(samefilelist)) )
    print u"\n\n\n".join("\n".join(fns) for fns in sorted(samefilelist) )
    gprint("Total [<>] files, calc md5 [<>].",BLUE+str(fc), RED+str(mc) )
Пример #15
0
 def printse(self):
     gprint("\nGroup [<>]\n\t", BLUE+str(len(self.pics)))
     print "\n\t".join(pf.name for pf in [self.pics[0],self.pics[-1]])
Пример #16
0
 def printdebug(self, step=50):
     gprint("\nGroup [<>]\n\t", BLUE+str(len(self.pics)))
     print "\n\t".join(pf.name for pf in self.pics[::step])
Пример #17
0
 def printonlysi(self):
     gprint("\nGroup [<>]\n\t", BLUE+str(len(self.pics)))
     print "\n\t".join([u"{}\n\t\t{}\n\t\t{}".format(pf.name,pf.fullname,pf._pre.fullname) for pf in self.pics \
                 if re.match(ur"^N{0,1}[SI]+$",pf.nameflag,re.U)!=None])
Пример #18
0
 def printme(self):
     print "Total ",self.count
     print "\n", RED+"No Exif", len(self.noexifs)
     print u"\n".join(self.noexifs)
     
     print "\n", RED+"No MMDT", len(self.nommdts)
     print u"\n".join(self.nommdts)
     
     print "\n", RED+"No MM", len(self.nomms)
     print u"\n".join([u"{}\n\t{}".format(*item) for item in self.nomms])
     
     print "\n", RED+"Zero DT", len(self.zeordts)
     print u"\n".join(self.zeordts)
     
     print "\n", RED+"MM No DateTime", len(self.mmnodts)
     print "\n".join([u"{}\n\t[{}]".format(f,d) for f,d in self.mmnodts])
     
     print "\n", BLUE+"Date Time", len(self.ymds)
     print "\n".join(["{}\t{}".format(d,c) for (d,c) in sorted(self.ymds.items())])
     
     print "\n", BLUE+"Make Models", len(self.models)
     print "\n".join(["{}\t\t{}".format(m,c) for (m,c) in self.models.iteritems()])
     
     print "\n", RED+"No MM Paths", len(self.nomms)
     print u"\n".join(sorted(set(os.path.split(fn)[0] for (fn,dt) in self.nomms)))
     
     print "\n", RED+"Zero DT Paths", len(self.zeordts)
     print u"\n".join(sorted(set(os.path.split(fn)[0] for fn in self.zeordts)))
     
     #相同size、相同MD5 但是Datatime不同的,这样备份后会重复。
     #相同size、相同Datetime,不同MD5的,这样比较Funny
     #三个都相同的,会在备份的时候自然只会选择一个
     
     def THOSE_VALUE_MORE_THAN_ONE(d):
         return filter(lambda x: len(x[1])>1, d.iteritems())
     
     for (size,fndts) in THOSE_VALUE_MORE_THAN_ONE(self.sizes):
         dt_md5s = defaultdict(lambda: defaultdict(list))
         md5_dts = defaultdict(lambda: defaultdict(list))
         dtmd5_fns = defaultdict(list)
         for (fn,dt) in fndts:
             md5=gpylib.misc.getmd5(fn)
             dt_md5s[dt][md5].append(fn)
             md5_dts[md5][dt].append(fn)
             dtmd5_fns[(dt,md5)].append(fn)
         
         for (dt,md5s) in THOSE_VALUE_MORE_THAN_ONE(dt_md5s): 
             #Funny: size,datetime相同,但是MD5不同
             gprint("\n<> [<>] [<>]\n",BLUE+"Funny",size, dt)
             gpylib.misc.ppdict(md5s,u"    [",u"]", u"        ",u"")
             #这个item是 key/list
             
         for (md5, dts) in THOSE_VALUE_MORE_THAN_ONE( md5_dts): 
             #重复: size,MD5相同,但是DateTime不同
             gprint("\n<> [<>] [<>]\n",YELLOW+"Duplicate",size, md5)
             gpylib.misc.ppdict(dts,u"    [",u"]", u"        ",u"")
         
         for ((dt,md5),fns) in THOSE_VALUE_MORE_THAN_ONE( dtmd5_fns):
             #有size,md5,DateTime都相同的情况
             gprint("\n<> [<>] [<>] [<>]\n    ", GREEN+"Same File", size, dt, md5)
             print u"\n    ".join(fns)
Пример #19
0
def testarchiveiflost(newdir,archivedir, verbose, del_ifok=False):
    gprint("Check [<>] in [<>], verbose[<>], del_ifok[<>]\n\n", newdir, archivedir, verbose, del_ifok)
    
    print BLUE+BWHITE+"Indexing archive dir.."
    verboselist=[]
    
    archivedict=defaultdict(list)
    for fn in gpylib.misc.walkfiles(archivedir, _WALK_CONDITION, True):
        archivedict[os.stat(fn).st_size].append( [fn,None] )
    
    print BLUE+"\nChecking..."
    lostcount, removecount=0,0
    
    for fn in gpylib.misc.walkfiles(newdir, _WALK_CONDITION, True,pre="\n"):
        size=os.stat(fn).st_size
        fnmd5s = archivedict.get(size,None)
        if fnmd5s is None:
            gprint("<>\n\t<>\n",RED+"Lost:",fn)
            lostcount +=1
            continue
        
        md5=gpylib.misc.getmd5(fn)
        for fnmd5 in fnmd5s:
            if fnmd5[1] is None:
                fnmd5[1]=gpylib.misc.getmd5(fnmd5[0])
            
            if fnmd5[1] == md5:
                if verbose == '1':
                    gprint("\n<>\n\t<>\n", fn, fnmd5[0])
                elif verbose != "0" and verbose is not False:
                    verboselist.append( (fn,fnmd5[0]) )
                
                if del_ifok == "del_ifok":
                    os.chmod(fn, stat.S_IRWXU| stat.S_IRWXG| stat.S_IRWXO)
                    os.remove( fn )
                    gprint(RED+"D")
                    removecount +=1
                break
        else:
            gprint("<>\n\t<>\n",RED+"Lost:",fn)
            lostcount +=1
    
    gprint("\nCheck ended.\n\t[<>] files lost.\n\t[<>]files deleted.", 
            YELLOW+str(lostcount), RED+str(removecount))
            
    if len(verboselist)!=0:
        with open(verbose,"w") as f:
            f.writelines("\n{}\n\t{}\n".format(*a) for a in verboselist)
Пример #20
0
def printworking(current=[0]):
    chars=[RED+'-',GREEN+'\\', BLUE+"|", YELLOW+'/']
    gprint("<><>", chars[current[0]],"\b")
    current[0]=(current[0]+1)%4