def performSampleAudit(loc,samplePercent,seed): global BATCH_SAMPLE_PCT mc = ManagementContainer.getInstance() cm = mc.getClusterManager() im = mc.getIslandManager() cluster = cm.getCluster(loc.getClusterId()) island = im.getIsland(cluster.getIslandId()) masterURL = URL(loc.getClusterLocationProperty(SolrClusterAdapter.SOLR_MASTER_HOST_URL_PROP)) slaveURL = URL(loc.getClusterLocationProperty(SolrClusterAdapter.SOLR_SLAVE_HOST_URL_PROP)) ssmMaster = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml') ssmMaster.setURL(masterURL) ssmSlave = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml') ssmSlave.setURL(slaveURL) sm = mc.getIndexSearchManager() # note that this will be slow -- especially if samplePercent is large and data set size is large srcIS = IndexSearchConstraint(None,None) srcIS.constrainByNumberOfHitsToReturn(1) srcIS.constrainByIsland(island) destIS = IndexSearchConstraint(None,None) destIS.constrainByNumberOfHitsToReturn(1) destIS.constrainByIsland(island) # get a message count for the customer srcQR = search(MainMessageQuery(),srcIS,ssmSlave) srcDocCount = srcQR.getDocCount() print 'total messages',srcDocCount,'msgs' msgsToSample = int(srcDocCount * samplePercent) if msgsToSample < 100: chunkSize = 10 elif msgsToSample < 5000: chunkSize = 100 else: chunkSize = 1000 print 'Source sample size',msgsToSample,'msgs' # ensure 10% on selections per sample if samplePercent < 0.10: samplePercent = 0.10 # if corpus is large, sample more per chunk if chunkSize >= 1000 and msgsToSample > 50000: samplePercent = 0.5 # perform sample audit sampleCountsFromSource(ssmSlave,ssmMaster,srcIS,destIS,samplePercent,msgsToSample,seed,chunkSize)
def performQuickAudit(location): mc = ManagementContainer.getInstance() clus = mc.getClusterManager().getCluster(location.getClusterId()) isle = mc.getIslandManager().getIsland(clus.getIslandId()) masterURL = URL(location.getClusterLocationProperty(SolrClusterAdapter.SOLR_MASTER_HOST_URL_PROP)) slaveURL = URL(location.getClusterLocationProperty(SolrClusterAdapter.SOLR_SLAVE_HOST_URL_PROP)) ssmMaster = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml') ssmMaster.setURL(masterURL) ssmSlave = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml') ssmSlave.setURL(slaveURL) srcIS = IndexSearchConstraint(None,None) srcIS.constrainByIsland(isle) srcIS.constrainByNumberOfHitsToReturn(1) destIS = IndexSearchConstraint(None,None) destIS.constrainByIsland(isle) destIS.constrainByNumberOfHitsToReturn(1) srcQR = search('',srcIS,ssmSlave) destQR = search('',destIS,ssmMaster) srcDocCount = srcQR.getDocCount() destDocCount = destQR.getDocCount() srcQR = search(MainMessageQuery(),srcIS,ssmSlave) destQR = search(MainMessageQuery(),destIS,ssmMaster) srcMessages = srcQR.getDocCount() destMessages = destQR.getDocCount() srcQR = search(AttachmentQuery(),srcIS,ssmSlave) destQR = search(AttachmentQuery(),destIS,ssmMaster) srcAttachments = srcQR.getDocCount() destAttachments = destQR.getDocCount() srcQR = search('-isattachment:*',srcIS,ssmSlave) destQR = search('-isattachment:*',destIS,ssmMaster) srcNoAttachments = srcQR.getDocCount() destNoAttachments = destQR.getDocCount() print 'SOURCE',slaveURL,'DEST',masterURL print 'SOURCE Documents:',srcDocCount,'DEST Documents:',destDocCount print 'SOURCE Messages:',srcMessages,'DEST Messages:',destMessages print 'SOURCE Attachments:',srcAttachments,'DEST Attachments:',destAttachments print 'SOURCE (test data):',srcNoAttachments,'DEST (test data):',destNoAttachments
def performSampleAudit(custIds,samplePercent,seed): global BATCH_SAMPLE_PCT mc = ManagementContainer.getInstance() cm = mc.getCustomerManager() sm = mc.getIndexSearchManager() # note that this will be slow -- especially if samplePercent is large and data set size is large for custId in custIds: cust = cm.getCustomer(custId) srcIS = IndexSearchConstraint(None,None) srcIS.constrainByIsland(cust.getOldFeedIsland()) srcIS.constrainByNumberOfHitsToReturn(1) srcIS.constrainByCustomerId(custId) destIS = IndexSearchConstraint(None,None) destIS.constrainByIsland(cust.getFeedIsland()) destIS.constrainByNumberOfHitsToReturn(1) destIS.constrainByCustomerId(custId) # get a message count for the customer srcQR = searchAndWrap(MainMessageQuery(),srcIS,sm) srcDocCount = srcQR.getDocCount() print 'Customer',custId,'total messages',srcDocCount,'msgs' msgsToSample = int(srcDocCount * samplePercent) if msgsToSample < 100: chunkSize = 10 elif msgsToSample < 5000: chunkSize = 100 else: chunkSize = 1000 print 'Customer',custId,'Source sample size',msgsToSample,'msgs' # ensure 10% on selections per sample if samplePercent < 0.10: samplePercent = 0.10 # if corpus is large, sample more per chunk if chunkSize >= 1000 and msgsToSample > 50000: samplePercent = 0.5 # perform sample audit sampleCountsFromSource(sm,srcIS,destIS,samplePercent,msgsToSample,seed,chunkSize)
def search(isle, term): mc = ManagementContainer.getInstance() sm = mc.getIndexSearchManager() im = mc.getIslandManager() pm = mc.getPartitionManager() fIS = IndexSearchConstraint(None, None) fIS.constrainByIsland(isle) fIS.constrainByNumberOfHitsToReturn(10) fIS.constrainByMinimumStorageId(0) fIS.setOutputFields(["storageid", "partitionid"]) fIS.sortBy("storageid", True) done = False cnt = 0 lastID = 0 while not done: ok = False while not ok: try: fQR = sm.search(term, fIS, None, CallerApp.INTERNAL) ok = True except Throwable, t: print "Exception caught during search, retry = true", t t.printStackTrace() numDocs = fQR.getDocCount() print "Found numDocs", numDocs done = fQR.getDocCount() == 0 print "Preview some data" for doc in fQR.documents(): # print doc.getPartitionID(),pm.getPartition(doc.getPartitionID()).isReadOnly(),doc.getStorageID(),doc.getReceivedDate() print doc.getPartitionID(), doc.getStorageID() lastID = doc.getStorageID() cnt += 1 fIS.constrainByMinimumStorageId(lastID) # just loop once done = True
def performQuickAudit(custIds): mc = ManagementContainer.getInstance() cm = mc.getCustomerManager() sm = mc.getIndexSearchManager() for custId in custIds: cust = cm.getCustomer(custId) srcIS = IndexSearchConstraint(None,None) srcIS.constrainByIsland(cust.getOldFeedIsland()) srcIS.constrainByNumberOfHitsToReturn(1) srcIS.constrainByCustomerId(custId) destIS = IndexSearchConstraint(None,None) destIS.constrainByIsland(cust.getFeedIsland()) destIS.constrainByNumberOfHitsToReturn(1) destIS.constrainByCustomerId(custId) srcQR = searchAndWrap('',srcIS,sm) destQR = search('',destIS,sm) srcDocCount = srcQR.getDocCount() destDocCount = destQR.getDocCount() srcQR = searchAndWrap(MainMessageQuery(),srcIS,sm) destQR = search(MainMessageQuery(),destIS,sm) srcMessages = srcQR.getDocCount() destMessages = destQR.getDocCount() srcQR = searchAndWrap(AttachmentQuery(),srcIS,sm) destQR = search(AttachmentQuery(),destIS,sm) srcAttachments = srcQR.getDocCount() destAttachments = destQR.getDocCount() print 'Customer',custId,'SOURCE Documents:',srcDocCount,'DEST Documents:',destDocCount print 'Customer',custId,'SOURCE Messages:',srcMessages,'DEST Messages:',destMessages print 'Customer',custId,'SOURCE Attachments:',srcAttachments,'DEST Attachments:',destAttachments
def test(islandId,custName,numMessages): mc = ManagementContainer.getInstance() custid = findCustomer(custName) if custid < 0: print 'test failed because customer',custName,'was not found' return 1 island = mc.getIslandManager().getIsland(islandId) try: msgs = findMessages(mc,custid,numMessages) if msgs.size() < numMessages: print 'Fail, Did not find all messages stored, only found', msgs.size() return 1 if not checkSearchStatus(mc,msgs,custid): print 'Fail, could not find all messages in search index' return 1 ism = mc.getIndexSearchManager() isc = IndexSearchConstraint(custid,None) isc.constrainByNumberOfHitsToReturn(1) isc.constrainByIsland(island) isc.constrainByOffset(0) try: results = ism.resolveLocations(isc) print 'Fail, expected IndexSearchException' return 1 except IndexSearchException, e: print 'Pass, IndexSearchException',e except: print 'Expected IndexSearchException, but got',sys.exc_info(),traceback.print_exc(file=sys.stderr) return 1 isc.constrainByNumberOfHitsToReturn(2) isc.constrainByOffset(0) try: results = ism.resolveLocations(isc) sz = getResultSize(results) print 'Pass, hits=2',sz if sz < numMessages: print 'Wrong number of results. Expected >=',numMessages,'Got',sz except: print 'Unexpected exception caught when hits was set to 2',sys.exc_info(),traceback.print_exc(file=sys.stderr) return 1 isc.constrainByNumberOfHitsToReturn(numMessages) isc.constrainByOffset(0) try: results = ism.resolveLocations(isc) sz = getResultSize(results) print 'Pass, hits=',numMessages,sz if sz < numMessages: print 'Wrong number of results. Expected >=',numMessages,'Got',sz,results.getResults() except: print 'Unexpected exception caught when hits was set to',numMessages,sys.exc_info(),traceback.print_exc(file=sys.stderr) return 1
def testArchive(numMessages, numFound, query = ''): basePath = "" mc = ManagementContainer.getInstance() custList = mc.getCustomerManager().findCustomers([SearchConstraint(ICustomerManager.PROP_NAME, SearchConstraintOperator.CONSTRAINT_EQUALS, custname)]) customerId = custList[0].getCustID() print time.asctime(), "Customer Id:", customerId reviewer = mc.getUserManager().findUserForEmail(users[0] + '@' + domainName) reviewerId = reviewer.getUserID() reviewerGroup = mc.getReviewerGroupManager().getReviewerGroup(customerId, REVIEWER_GROUP_NAME) if reviewerGroup is None: print time.asctime(), 'reviewer group not found' sys.exit(1) # wait for all msgs to be stored msgs = findMessages(mc, customerId, numMessages, True) # wait for all msgs to be indexed and searchable print time.asctime(), 'waiting for all messages to be searchable' if not checkSearchStatus(mc,msgs,customerId): print time.asctime(), 'messages were not searchable in the alotted time' sys.exit(1) print time.asctime(), 'all messages searchable:', [m.getMessageId() for m in msgs] # get list of msg IDs that satisfy query foundMsgIDs = [] ism = mc.getIndexSearchManager() isc = IndexSearchConstraint(customerId, None) isc.constrainByNumberOfHitsToReturn(2*numMessages) qs = UQLSearchCriteria(query, False) rs = ism.search(qs, isc, None, CallerApp.RECOVERY_ARCHIVE) for m in rs: foundMsgIDs.append(m.getStorageID()) print time.asctime(),'found messages:', foundMsgIDs # Creating e-discovery archive archive, result, rm = buildArchive(customerId, mc, numFound, reviewerId, reviewerGroup, UQLQuery=query)# download export chunks if result is True: print time.asctime(), 'exporting archive...' basePath = "/tmp/" + str(customerId) if os.path.exists(basePath) : shutil.rmtree(basePath) os.makedirs(basePath) out = FileOutputStream(basePath + "/archive.zip") rm.createPerUserActiveRecoveryArchiveFile(customerId, archive.getFile().getName(), reviewerId, SimpleOutputStreamWrapper(out), None) out.close() if os.system("cd " + basePath + "; unzip archive.zip") != 0: print time.asctime(), 'failed to unzip ' + basePath + '/archive.zip' result = False # verify exported messages if result is True: print time.asctime(), 'verifying exported messages...' if os.path.exists(os.path.join(basePath, 'nomessagefound.txt')): print time.asctime(), 'Archive was empty. No messages found by RecoveryManager' result = False else: archiveMsgs = len(os.listdir(basePath + '/' + str(reviewerId) + '/inbox')) / 2 if archiveMsgs != numFound: print time.asctime(), 'Archive message count incorrect:', numFound, '!=', archiveMsgs result = False for msgId in foundMsgIDs: archiveFile = basePath + '/' + str(reviewerId) + '/inbox/' + str(msgId) if not os.path.isfile(archiveFile + '.gz'): print time.asctime(), 'Archive message not present: ' + archiveFile + '.gz' result = False if not os.path.isfile(archiveFile + '.key'): print time.asctime(), 'Archive key not present: ' + archiveFile + '.key' result = False # verify EDRM if result is True: print time.asctime(), 'verifying EDRM XML...' minStorageId = min(foundMsgIDs) maxStorageId = max(foundMsgIDs) edrmFile = basePath + '/edrm_v1-2_' + str(minStorageId) + '-' + str(maxStorageId) + '.xml' if not os.path.isfile(edrmFile): print time.asctime(), 'EDRM XML not present: ' + edrmFile result = False # TODO - Add XML schema validation if result is True: print time.asctime(), 'on-premises/cloud message export successful' else: print time.asctime(), 'on-premises/cloud message export failed' if result is True: os.system("rm -rf " + basePath ) sys.exit(0) else: sys.exit(1)