def processItem(self, job, item): monitoringFullPath = transform.transformDirToInternal(item['monitoringPath']) archiveId = gZipFolderCollectionPrefix + monitoringFullPath if not self.collectionInDbForMonitoringPath.has_key(monitoringFullPath): self.collectionInDbForMonitoringPath[monitoringFullPath] = collectionDatabase.collectionOnMongoDbBase(archiveId, self.dbInst.getCollectionDb()) objUuid = self.dbInst.addVirtualObj({"monitoringPath": monitoringFullPath, "zippedInfoCollectionId": archiveId}); idInCol = objUuid self.zippedInfoCollectionList.addObj(idInCol, objUuid) #Save the item in the archive collection: zippedInfoColllection://D:/tmp/ fullPath = transform.transformDirToInternal(item["fullPath"]) relativePath = transform.getRelativePathFromFull(fullPath, monitoringFullPath) if not os.path.exists(fullPath): job.delete() return False#No job release, job was deleted. ################################################################# # Start process the ################################################################# if not self.collectionInDbForMonitoringPath[monitoringFullPath].exists(relativePath): #This item is not in the collection, so we need to extract info from this item newObj = self.dbInst.getFsObjFromFullPath(fullPath) self.collectionInDbForMonitoringPath[monitoringFullPath].addObj(relativePath, newObj["uuid"]) zipFilePath = transform.transformDirToInternal( fileTools.getTimestampWithFreeName(self.workingDir, gInfoFileDecryptedExt, gInfoFilePrefix)) self.decCopier.copy(fullPath, zipFilePath) for i in zippedInfo(self.workingDir).enumItems(zipFilePath): print '--------------------------------------------------' print i fp = open(i, 'r') loadedFileInfo = json.load(fp) print loadedFileInfo for i in zippedInfo(self.workingDir).enumZippedFiles(zipFilePath): fp = open(i, 'r') print 'data file extracted:', i ''' else: #This item is not in the collection, so we need to extract info from this item newObj = self.dbInst.getFsObjFromFullPath(fullPath) self.collectionInDbForMonitoringPath[monitoringFullPath].addObj(relativePath, newObj["uuid"]) zipFilePath = transform.transformDirToInternal( fileTools.getTimestampWithFreeName(self.workingDir, gInfoFileDecryptedExt, gInfoFilePrefix)) self.decCopier.copy(fullPath, zipFilePath) for i in zippedInfo(self.workingDir).enumItems(zipFilePath): print '--------------------------------------------------' print i fp = open(i, 'r') loadedFileInfo = json.load(fp) print loadedFileInfo for i in zippedInfo(self.workingDir).enumZippedFiles(zipFilePath): fp = open(i, 'r') print 'data file extracted:', i ''' return True#Release job
def add_to_collection(self, full_path): ''' Return true if the item is already in collection ''' full_path = transform.transformDirToInternal(full_path) obj_id_in_col = transform.getRelativePathFromFull(full_path, self.rootFolder) dir_obj = self.obj_db.getFsObjFromFullPath(full_path) obj_uuid = dir_obj.get_uuid() res = self.collection.isSame(obj_id_in_col, obj_uuid) if not res: cl("updated:", obj_uuid, obj_id_in_col) self.collection.addObj(obj_id_in_col, dir_obj.get_uuid()) else: #cl("not updated:", obj_uuid, obj_id_in_col) pass return res
def processJob(self, job, item): monitoringFullPath = transform.transformDirToInternal(item['monitoringPath']) archiveId = "zippedInfoColllection://" + monitoringFullPath if not self.collectionInDbForMonitoringPath.has_key(monitoringFullPath): self.collectionInDbForMonitoringPath[monitoringFullPath] = collectionDatabase.collectionOnMongoDbBase(archiveId, self.dbInst.getCollectionDb()) #Save the item in the archive collection: zippedInfoColllection://D:/tmp/ fullPath = transform.transformDirToInternal(item["fullPath"]) relativePath = transform.getRelativePathFromFull(fullPath, monitoringFullPath) if not self.collectionInDbForMonitoringPath[monitoringFullPath].exists(relativePath): #This item is not in the collection, so we need to extract info from this item newObj = self.dbInst.getFsObjFromFullPath(fullPath) self.collectionInDbForMonitoringPath[monitoringFullPath].addObj(relativePath, newObj["uuid"]) for i in zippedInfo(self.workingDir).enumItems(fullPath): fp = open(i, 'r') loadedFileInfo = json.load(fp) print loadedFileInfo return True
def generator(self): ############################################### #Scan for existing files ############################################### collection = self.objDb.getCollection(self.targetCollectionId) cl('start scanning') #for i in os.walk(self.rootFolder): for i in os.listdir(self.rootFolder+ "/"): if (self.addedItemCnt % 1000) == 0: cl("processing item cnt:", self.addedItemCnt) self.addedItemCnt += 1 fullPath = transform.transformDirToInternal(os.path.join(self.rootFolder, i)) #print '---------------------real adding item' #Update the item info for the item ncl('before fs obj base') #itemUrl = ufsObj.fsObjBase(fullPath).getObjUrl() objInCol = transform.getRelativePathFromFull(fullPath, self.rootFolder) #print fullPath, self.rootFolder if objInCol.find("/") != -1: print objInCol, self.rootFolder raise "no recursive scanning support" ncl('before get fs obj') newObjUuid = self.objDb.getFsObjFromFullPath(fullPath)["uuid"] #print fullPath if newObjUuid is None: cl("item deleted, do not add it") continue ncl('before update obj uuid') ''' collection.updateObjUuidIfNeeded(itemUrl, newObjUuid) ''' if collection.isSame(objInCol, newObjUuid): ncl("no updates needed", objInCol, newObjUuid) yield objInCol continue collection.updateObjUuidRaw(objInCol, newObjUuid) ncl('new item added', objInCol) yield objInCol folderObj = self.objDb.getFsObjFromFullPath(self.rootFolder) self.objDb.updateObjByUuid(folderObj["uuid"], {"folderCollectionId": self.targetCollectionId}) cl(folderObj, {"folderCollectionId": self.targetCollectionId})