def isSameItems(item1, item2): item1FullPath = item1.getFullPath() item2FullPath = item2.getFullPath() if os.stat(item1FullPath).st_size != os.stat(item2FullPath).st_size: return False if infoCollector.getHeadContentMd5(item1FullPath) != infoCollector.getHeadContentMd5(item2FullPath): return False return True
def headMd5(self): #raise "generating md5" ncl("Generating Md5") try: return infoCollector.getHeadContentMd5(self.fullPath) except IOError: return None
def getNextUpdatedItem(self): #print 'zipdir:',self.zipStorageDir for walkingItem in os.walk(self.zipStorageDir): #print walkingItem for j in walkingItem[2]: encZipFileFullPath = transform.transformDirToInternal(os.path.join(walkingItem[0], j)) print encZipFileFullPath zipFileFolderStorageItem = folderStorage.storageItem(self.zipStorageDir, encZipFileFullPath) if self.lastState.updated(zipFileFolderStorageItem): ################################################################## #For zip storage, if the zip file was updated (or newly created) we #should enumerate all element in this zip file ################################################################## #First decrypt the zip file if encZipFileFullPath.find('.enc') == -1: #Not an encrypted zip file, continue print 'not a encrypted zip file: ',encZipFileFullPath continue self.regenerateNeeded = False zipFileFullPath = self.getZipFile(encZipFileFullPath) #Enumerate all files in the decrypted zip file zf = zipClass.ZFile(zipFileFullPath, 'r') #Generate a log file if it does not exist if not os.path.exists(encZipFileFullPath.replace('.enc','.enclog')): self.regenerateNeeded = True for i in zf.list(): #yield zipStorageItem(i, zf) zf.extract(i, self.workingDir) extractedItemFullPath = os.path.join(self.workingDir, i) extractedItemInfo = {"timestamp": getTimeInSeconds(zf.zfile.getinfo(i).date_time), "headMd5":infoCollector.getHeadContentMd5(extractedItemFullPath), "parentEncryptedZip":encZipFileFullPath, "size":os.stat(extractedItemFullPath).st_size, } extractedItem = zipStorageItem(self.workingDir, os.path.join(self.workingDir, i)) if self.regenerateNeeded: relaPath = transform.formatRelativePath(i) self.zippedFileInfoRegenerating[relaPath] = extractedItemInfo yield extractedItem if self.regenerateNeeded: self.saveRegeneratedState(encZipFileFullPath, zipFileFullPath)
def store(self, item): ######################################## #Check if the target item is already updated ######################################## self.readZipStorageState() relaPath = transform.formatRelativePath(item.getRelaPath()) if getTimeInSeconds(self.zipStorageState[relaPath]["zippedTimeStemp"]) < int(item.getTimestamp()): print 'Want to update an older file to a newer file. Ignore it. return ######################################## #Check if the target item is already updated? ######################################## if self.curArchive is None: self.createNewZip() if (self.curArchivedSize > MAX_SINGLE_ARCHIVE_SIZE): self.encZip() self.createNewZip() #Add the file to zip fullPath = item.getFullPath() try: existingElem = self.zipContentState[transform.formatRelativePath(i)] return except: pass localItem = encZipStorageItem(self.zipStorageDir, self.curArchiveName, relaPath, fullPath) ############################## #Add the file to zip file ############################## #print 'copying "%s" to "%s"'%(fullPath, relPath) self.curArchive.addfile(unicode(fullPath).encode('gbk'), unicode(relaPath).encode('gbk')) self.curArchivedSize += os.stat(fullPath).st_size self.zippedFileInfo[relaPath] = {"timestamp": os.stat(fullPath).st_mtime, "fullPath":fullPath, "headMd5":infoCollector.getHeadContentMd5(fullPath), "parentZip":self.curArchiveName, "size":os.stat(fullPath).st_size,}
def updated(self, item): #print 'checking item for update: %s'%item.getItemId(), self.stateDict[item.getItemId()] try: #Item was recorded, check if timestamp changed if self.stateDict[item.getItemId()]["timestamp"] != item.getTimestamp(): #File changed, check hash? if self.stateDict[item.getItemId()]["headMd5"] != infoCollector.getHeadContentMd5( item.getFullPath()): #print 'time and hash does not match' return True else: #Timestamp is not equal but the content is equal, update the local timestamp self.stateDict[item.getItemId()]["timestamp"] = item.getTimestamp() print 'local file timestamp updated' except KeyError: return True #print 'getting info for:', item.getItemId() #print 'checking:--------------------' #print item.getItemInfo() # print self.stateDict[item.getItemId()] # print 'comparing time: %f, %f'%(self.stateDict[item.getItemId()]["timestamp"], item.getTimestamp()) return False
def updateContentStateForFile(self, encZipFileFullPath): encZipLogFilePath = getEncZipLogFilenameFromEncrypted(encZipFileFullPath) if not os.path.exists(): #Regenerate the state info file zipFileFullPath = self.getZipFile(encZipFileFullPath) #Enumerate all files in the decrypted zip file zf = zipClass.ZFile(zipFileFullPath, 'r') #Generate a log file if it does not exist for i in zf.list(): #yield zipStorageItem(i, zf) zf.extract(i, self.workingDir) extractedItem = folderStorage.folderStorageItem(self.workingDir, os.path.join(self.workingDir, i)) relaPath = transform.formatRelativePath(i) self.zippedFileInfoRegenerating[relaPath] = {"zippedTimeStemp": zf.zfile.getinfo(i).date_time, "headMd5":infoCollector.getHeadContentMd5(extractedItem.getFullPath()), "parentEncryptedZip":encZipFileFullPath, "size":os.stat(extractedItem.getFullPath()).st_size} self.saveRegeneratedState(encZipFileFullPath, zipFileFullPath) newLog = readEncryptedZipLog(encZipLogFilePath) updateZipLog(newLog)
def getItemInfo(self): return {"timestamp": self.getTimestamp(), "fullPath":self.getFullPath(), "headMd5":infoCollector.getHeadContentMd5(self.getFullPath()), "size":os.stat(self.getFullPath()).st_size}
def headMd5(self): return infoCollector.getHeadContentMd5(self.fullPath)
def headMd5(self): # raise "generating md5" ncl("Generating Md5") return infoCollector.getHeadContentMd5(self.fullPath)