示例#1
0
def analyze(islandId,timeBoundary,commit,type):

    mc = ManagementContainer.getInstance()
    im = mc.getIslandManager()
    isle = im.getIsland(islandId)
    fIS = IndexSearchConstraint(None,None)
    fIS.constrainByIsland(isle)
    fIS.constrainByType(type)
    ism = mc.getIndexSearchManager()
    qb = MyCriteria("isattachment:0 AND processingtime:[%s TO *]" % timeBoundary)
    qr = ism.scaledSearch(qb,fIS,CallerApp.INTERNAL)
    for doc in qr:
       pass
    dupsById = qr.getDuplicates()   

    print time.asctime(),'Found',dupsById.size(),type,'duplicates in island',islandId,'total docs',qr.getDocCount()

    for me in dupsById.entrySet():
       storageId = me.getKey()
       locIds = me.getValue()
       data = {}
       for locId in locIds:
           docs = getDocuments(ism,isle,storageId,locId)
           byId = {}
           for doc in docs:
               byId[doc.getString(IIndexSearchSchema.FIELD_CONTENT_ID)] = doc
           data[locId] = byId
       try:
           dupData = []
           if commit is True:
               (found,dupData) = simpleValidateData(data)
               if not found:
                   print time.asctime(),'The DB has no matching record for any of the documents found in the archive. Skipping',storageId
                   continue
           else:
               dupData = validateData(data)

           for storageId,locId in dupData:
               print time.asctime(),'Deleting documents for',storageId,'on location',locId
               deleteDuplicate(locId,storageId,commit,type)
          
       except Exception,e:
           print time.asctime(),'Not recommending any change for storage ID',storageId,'due to validation value',e