def addFile(self,rfile,statinfo): perms=[] gc.collect() from archiver.archiveFiles import id_generator kilo_byte_size = self.arraysize/1024 mega_byte_size = kilo_byte_size/1024 if self.extendedcifs: perms = self.buildPerms(perms,rfile) if len(self.jobarray)<self.numfiles and mega_byte_size<self.archivemb: self.jobarray.append({"rfile":rfile,"perms":perms}) self.arraysize = self.arraysize+statinfo.st_size else: jobcopy = self.jobarray if self.usecelery: #self.alljobs.append(jobcopy) id_gen = self.temp_dir+"/"+id_generator(size=16) af.apply_async(args=[id_gen, jobcopy,self.debug,self.description,self.tags,self.dry,self.extendedcifs,self.crawlid]) del jobcopy[:] gc.collect() #else: # self.queue.put(jobcopy) self.totaljobsize=self.arraysize+self.totaljobsize del self.jobarray[:] gc.collect() self.jobarray=[] self.arraysize=0 self.jobarray.append({"rfile":rfile,"perms":perms}) self.arraysize = self.arraysize+statinfo.st_size
def recurseCrawl(self,filepath=filepath): global logger from archiver.archiveFiles import id_generator for (path, dirs, files) in os.walk(filepath): for fi in files: kilo_byte_size = self.arraysize/1024 mega_byte_size = kilo_byte_size/1024 rfile = os.path.join(path,fi) if os.path.islink(rfile): continue statinfo = os.stat(rfile) if self.oldertime>0 or self.newertime>0: dateatime = datetime.fromtimestamp(statinfo.st_mtime) #datemtime = datetime.fromtimestamp(statinfo.st_mtime) if self.oldertime>0 and self.newertime>0: #between if (dateatime < (datetime.now() - timedelta(days=self.oldertime))) and (dateatime > (datetime.now() - timedelta(days=self.newertime))): #self.filelist.append(rfile) self.addFile(rfile,statinfo) continue elif self.oldertime>0 and self.newertime==0: #print "%s %s" % (dateatime, (datetime.now() - timedelta(days=self.oldertime))) if (dateatime < (datetime.now() - timedelta(days=self.oldertime))): #self.filelist.append(rfile) self.addFile(rfile,statinfo) continue elif self.oldertime==0 and self.newertime>0: if (dateatime > (datetime.now() - timedelta(days=self.newertime))): #self.filelist.append(rfile) self.addFile(rfile,statinfo) continue else: continue else: #self.filelist.append(rfile) self.addFile(rfile,statinfo) gc.collect() jobcopy = self.jobarray if self.usecelery: if len(jobcopy)>0: #self.alljobs.append(jobcopy) id_gen=self.temp_dir+"/"+id_generator(size=16) af.apply_async(args=[id_gen, jobcopy,self.debug,self.description,self.tags,self.dry,self.extendedcifs,self.crawlid]) else: pass #if len(jobcopy)>0: # self.queue.put(jobcopy) self.totaljobsize=self.totaljobsize+self.arraysize logger.info("Done crawl %s %s %s bytes" % (filepath,self.crawlid,self.totaljobsize)) return