def analyze(islandId,timeBoundary,commit,type): mc = ManagementContainer.getInstance() im = mc.getIslandManager() isle = im.getIsland(islandId) fIS = IndexSearchConstraint(None,None) fIS.constrainByIsland(isle) fIS.constrainByType(type) ism = mc.getIndexSearchManager() qb = MyCriteria("isattachment:0 AND processingtime:[%s TO *]" % timeBoundary) qr = ism.scaledSearch(qb,fIS,CallerApp.INTERNAL) for doc in qr: pass dupsById = qr.getDuplicates() print time.asctime(),'Found',dupsById.size(),type,'duplicates in island',islandId,'total docs',qr.getDocCount() for me in dupsById.entrySet(): storageId = me.getKey() locIds = me.getValue() data = {} for locId in locIds: docs = getDocuments(ism,isle,storageId,locId) byId = {} for doc in docs: byId[doc.getString(IIndexSearchSchema.FIELD_CONTENT_ID)] = doc data[locId] = byId try: dupData = [] if commit is True: (found,dupData) = simpleValidateData(data) if not found: print time.asctime(),'The DB has no matching record for any of the documents found in the archive. Skipping',storageId continue else: dupData = validateData(data) for storageId,locId in dupData: print time.asctime(),'Deleting documents for',storageId,'on location',locId deleteDuplicate(locId,storageId,commit,type) except Exception,e: print time.asctime(),'Not recommending any change for storage ID',storageId,'due to validation value',e