示例#1
0
def performSampleAudit(loc,samplePercent,seed):
    global BATCH_SAMPLE_PCT
    mc = ManagementContainer.getInstance()
    cm = mc.getClusterManager()
    im = mc.getIslandManager()
    cluster = cm.getCluster(loc.getClusterId())
    island = im.getIsland(cluster.getIslandId())

    masterURL = URL(loc.getClusterLocationProperty(SolrClusterAdapter.SOLR_MASTER_HOST_URL_PROP))
    slaveURL = URL(loc.getClusterLocationProperty(SolrClusterAdapter.SOLR_SLAVE_HOST_URL_PROP))

    ssmMaster = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml')
    ssmMaster.setURL(masterURL)
    ssmSlave = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml')
    ssmSlave.setURL(slaveURL)

    sm = mc.getIndexSearchManager()
    # note that this will be slow -- especially if samplePercent is large and data set size is large

    srcIS = IndexSearchConstraint(None,None)
    srcIS.constrainByNumberOfHitsToReturn(1)
    srcIS.constrainByIsland(island)

    destIS = IndexSearchConstraint(None,None)
    destIS.constrainByNumberOfHitsToReturn(1)
    destIS.constrainByIsland(island)

    # get a message count for the customer
    srcQR = search(MainMessageQuery(),srcIS,ssmSlave)
    srcDocCount = srcQR.getDocCount()
    print 'total messages',srcDocCount,'msgs'
    msgsToSample = int(srcDocCount * samplePercent)
   
    if msgsToSample < 100:
        chunkSize = 10
    elif msgsToSample < 5000:
        chunkSize = 100
    else:
        chunkSize = 1000
    print 'Source sample size',msgsToSample,'msgs'

    # ensure 10% on selections per sample
    if samplePercent < 0.10:
        samplePercent = 0.10
    # if corpus is large, sample more per chunk
    if chunkSize >= 1000 and msgsToSample > 50000:
        samplePercent = 0.5

    # perform sample audit
    sampleCountsFromSource(ssmSlave,ssmMaster,srcIS,destIS,samplePercent,msgsToSample,seed,chunkSize)
示例#2
0
def performQuickAudit(location):
    mc = ManagementContainer.getInstance()
    clus = mc.getClusterManager().getCluster(location.getClusterId())
    isle = mc.getIslandManager().getIsland(clus.getIslandId())

    masterURL = URL(location.getClusterLocationProperty(SolrClusterAdapter.SOLR_MASTER_HOST_URL_PROP))
    slaveURL = URL(location.getClusterLocationProperty(SolrClusterAdapter.SOLR_SLAVE_HOST_URL_PROP))

    ssmMaster = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml')
    ssmMaster.setURL(masterURL)
    ssmSlave = MySolrSearchManager(mc.getConfiguration(),'solrconfig.xml')
    ssmSlave.setURL(slaveURL)

    srcIS = IndexSearchConstraint(None,None)
    srcIS.constrainByIsland(isle)
    srcIS.constrainByNumberOfHitsToReturn(1)

    destIS = IndexSearchConstraint(None,None)
    destIS.constrainByIsland(isle)
    destIS.constrainByNumberOfHitsToReturn(1)

    srcQR = search('',srcIS,ssmSlave)
    destQR = search('',destIS,ssmMaster)

    srcDocCount = srcQR.getDocCount()
    destDocCount = destQR.getDocCount()

    srcQR = search(MainMessageQuery(),srcIS,ssmSlave)
    destQR = search(MainMessageQuery(),destIS,ssmMaster)

    srcMessages = srcQR.getDocCount()
    destMessages = destQR.getDocCount()

    srcQR = search(AttachmentQuery(),srcIS,ssmSlave)
    destQR = search(AttachmentQuery(),destIS,ssmMaster)

    srcAttachments = srcQR.getDocCount()
    destAttachments = destQR.getDocCount()

    srcQR = search('-isattachment:*',srcIS,ssmSlave)
    destQR = search('-isattachment:*',destIS,ssmMaster)

    srcNoAttachments = srcQR.getDocCount()
    destNoAttachments = destQR.getDocCount()

    print 'SOURCE',slaveURL,'DEST',masterURL
    print 'SOURCE Documents:',srcDocCount,'DEST Documents:',destDocCount
    print 'SOURCE Messages:',srcMessages,'DEST Messages:',destMessages
    print 'SOURCE Attachments:',srcAttachments,'DEST Attachments:',destAttachments
    print 'SOURCE (test data):',srcNoAttachments,'DEST (test data):',destNoAttachments
示例#3
0
def performSampleAudit(custIds,samplePercent,seed):
    global BATCH_SAMPLE_PCT
    mc = ManagementContainer.getInstance()
    cm = mc.getCustomerManager()
    sm = mc.getIndexSearchManager()
    # note that this will be slow -- especially if samplePercent is large and data set size is large
    for custId in custIds:
        cust = cm.getCustomer(custId)

        srcIS = IndexSearchConstraint(None,None)
        srcIS.constrainByIsland(cust.getOldFeedIsland())
        srcIS.constrainByNumberOfHitsToReturn(1)
        srcIS.constrainByCustomerId(custId)

        destIS = IndexSearchConstraint(None,None)
        destIS.constrainByIsland(cust.getFeedIsland())
        destIS.constrainByNumberOfHitsToReturn(1)
        destIS.constrainByCustomerId(custId)

        # get a message count for the customer
        srcQR = searchAndWrap(MainMessageQuery(),srcIS,sm)
        srcDocCount = srcQR.getDocCount()
        print 'Customer',custId,'total messages',srcDocCount,'msgs'
        msgsToSample = int(srcDocCount * samplePercent)
   
        if msgsToSample < 100:
            chunkSize = 10
        elif msgsToSample < 5000:
            chunkSize = 100
        else:
            chunkSize = 1000
        print 'Customer',custId,'Source sample size',msgsToSample,'msgs'

        # ensure 10% on selections per sample
        if samplePercent < 0.10:
            samplePercent = 0.10
        # if corpus is large, sample more per chunk
        if chunkSize >= 1000 and msgsToSample > 50000:
            samplePercent = 0.5

        # perform sample audit
        sampleCountsFromSource(sm,srcIS,destIS,samplePercent,msgsToSample,seed,chunkSize)
示例#4
0
def search(isle, term):
    mc = ManagementContainer.getInstance()
    sm = mc.getIndexSearchManager()
    im = mc.getIslandManager()
    pm = mc.getPartitionManager()

    fIS = IndexSearchConstraint(None, None)
    fIS.constrainByIsland(isle)
    fIS.constrainByNumberOfHitsToReturn(10)
    fIS.constrainByMinimumStorageId(0)
    fIS.setOutputFields(["storageid", "partitionid"])
    fIS.sortBy("storageid", True)

    done = False
    cnt = 0
    lastID = 0
    while not done:
        ok = False
        while not ok:
            try:
                fQR = sm.search(term, fIS, None, CallerApp.INTERNAL)
                ok = True
            except Throwable, t:
                print "Exception caught during search, retry = true", t
                t.printStackTrace()

        numDocs = fQR.getDocCount()
        print "Found numDocs", numDocs

        done = fQR.getDocCount() == 0

        print "Preview some data"
        for doc in fQR.documents():
            # print doc.getPartitionID(),pm.getPartition(doc.getPartitionID()).isReadOnly(),doc.getStorageID(),doc.getReceivedDate()
            print doc.getPartitionID(), doc.getStorageID()
            lastID = doc.getStorageID()
            cnt += 1
        fIS.constrainByMinimumStorageId(lastID)

        # just loop once
        done = True
示例#5
0
def performQuickAudit(custIds):
    mc = ManagementContainer.getInstance()
    cm = mc.getCustomerManager()
    sm = mc.getIndexSearchManager()
    for custId in custIds:
        cust = cm.getCustomer(custId)

        srcIS = IndexSearchConstraint(None,None)
        srcIS.constrainByIsland(cust.getOldFeedIsland())
        srcIS.constrainByNumberOfHitsToReturn(1)
        srcIS.constrainByCustomerId(custId)

        destIS = IndexSearchConstraint(None,None)
        destIS.constrainByIsland(cust.getFeedIsland())
        destIS.constrainByNumberOfHitsToReturn(1)
        destIS.constrainByCustomerId(custId)

        srcQR = searchAndWrap('',srcIS,sm)
        destQR = search('',destIS,sm)

        srcDocCount = srcQR.getDocCount()
        destDocCount = destQR.getDocCount()

        srcQR = searchAndWrap(MainMessageQuery(),srcIS,sm)
        destQR = search(MainMessageQuery(),destIS,sm)

        srcMessages = srcQR.getDocCount()
        destMessages = destQR.getDocCount()

        srcQR = searchAndWrap(AttachmentQuery(),srcIS,sm)
        destQR = search(AttachmentQuery(),destIS,sm)

        srcAttachments = srcQR.getDocCount()
        destAttachments = destQR.getDocCount()

        print 'Customer',custId,'SOURCE Documents:',srcDocCount,'DEST Documents:',destDocCount
        print 'Customer',custId,'SOURCE Messages:',srcMessages,'DEST Messages:',destMessages
        print 'Customer',custId,'SOURCE Attachments:',srcAttachments,'DEST Attachments:',destAttachments
示例#6
0
def test(islandId,custName,numMessages):
    mc = ManagementContainer.getInstance()
    
    custid = findCustomer(custName)    
    if custid < 0:
        print 'test failed because customer',custName,'was not found'
        return 1
        
    island = mc.getIslandManager().getIsland(islandId)
    
    try:
        msgs = findMessages(mc,custid,numMessages)
    
        if msgs.size() < numMessages:
            print 'Fail, Did not find all messages stored, only found', msgs.size()
            return 1
    
        if not checkSearchStatus(mc,msgs,custid):
            print 'Fail, could not find all messages in search index'
            return 1

        ism = mc.getIndexSearchManager()
        isc = IndexSearchConstraint(custid,None)

        isc.constrainByNumberOfHitsToReturn(1)
        isc.constrainByIsland(island)
        isc.constrainByOffset(0)
        try:
            results = ism.resolveLocations(isc)
            print 'Fail, expected IndexSearchException'
            return 1
        except IndexSearchException, e:
            print 'Pass, IndexSearchException',e
        except:
            print 'Expected IndexSearchException, but got',sys.exc_info(),traceback.print_exc(file=sys.stderr)
            return 1

        isc.constrainByNumberOfHitsToReturn(2)
        isc.constrainByOffset(0)
        try:
            results = ism.resolveLocations(isc)
            sz = getResultSize(results)
            print 'Pass, hits=2',sz
          
            if sz < numMessages:
                print 'Wrong number of results. Expected >=',numMessages,'Got',sz
        except:
            print 'Unexpected exception caught when hits was set to 2',sys.exc_info(),traceback.print_exc(file=sys.stderr)
            return 1

        isc.constrainByNumberOfHitsToReturn(numMessages)
        isc.constrainByOffset(0)
        try:
            results = ism.resolveLocations(isc)
            sz = getResultSize(results)
            print 'Pass, hits=',numMessages,sz
            if sz < numMessages:
                print 'Wrong number of results. Expected >=',numMessages,'Got',sz,results.getResults()
        except:
            print 'Unexpected exception caught when hits was set to',numMessages,sys.exc_info(),traceback.print_exc(file=sys.stderr)
            return 1
示例#7
0
def testArchive(numMessages, numFound, query = ''):
    basePath = ""
    mc = ManagementContainer.getInstance()
    custList = mc.getCustomerManager().findCustomers([SearchConstraint(ICustomerManager.PROP_NAME, SearchConstraintOperator.CONSTRAINT_EQUALS, custname)])
    customerId = custList[0].getCustID()
    print time.asctime(), "Customer Id:", customerId
    reviewer = mc.getUserManager().findUserForEmail(users[0] + '@' + domainName)
    reviewerId = reviewer.getUserID()
    reviewerGroup = mc.getReviewerGroupManager().getReviewerGroup(customerId, REVIEWER_GROUP_NAME)
    if reviewerGroup is None:
        print time.asctime(), 'reviewer group not found'
        sys.exit(1)

    # wait for all msgs to be stored
    msgs = findMessages(mc, customerId, numMessages, True)
    # wait for all msgs to be indexed and searchable
    print time.asctime(), 'waiting for all messages to be searchable'
    if not checkSearchStatus(mc,msgs,customerId):
        print time.asctime(), 'messages were not searchable in the alotted time'
        sys.exit(1)
    print time.asctime(), 'all messages searchable:', [m.getMessageId() for m in msgs]

    # get list of msg IDs that satisfy query
    foundMsgIDs = []
    ism = mc.getIndexSearchManager()
    isc = IndexSearchConstraint(customerId, None)
    isc.constrainByNumberOfHitsToReturn(2*numMessages)
    qs = UQLSearchCriteria(query, False)
    rs = ism.search(qs, isc, None, CallerApp.RECOVERY_ARCHIVE)
    for m in rs:
        foundMsgIDs.append(m.getStorageID())

    print time.asctime(),'found messages:', foundMsgIDs

    # Creating e-discovery archive
    archive, result, rm = buildArchive(customerId, mc, numFound, reviewerId, reviewerGroup, UQLQuery=query)# download export chunks
    if result is True:
        print time.asctime(), 'exporting archive...'
        basePath = "/tmp/" + str(customerId)
        if os.path.exists(basePath) :
            shutil.rmtree(basePath)
        os.makedirs(basePath)

        out = FileOutputStream(basePath + "/archive.zip")
        rm.createPerUserActiveRecoveryArchiveFile(customerId, archive.getFile().getName(), reviewerId,
                                                  SimpleOutputStreamWrapper(out), None)
        out.close()
        if os.system("cd " + basePath + "; unzip archive.zip") != 0:
            print time.asctime(), 'failed to unzip ' + basePath + '/archive.zip'
            result = False

    # verify exported messages
    if result is True:
        print time.asctime(), 'verifying exported messages...'
        if os.path.exists(os.path.join(basePath, 'nomessagefound.txt')):
            print time.asctime(), 'Archive was empty. No messages found by RecoveryManager'
            result = False
        else:
            archiveMsgs = len(os.listdir(basePath + '/' + str(reviewerId) + '/inbox')) / 2
            if archiveMsgs != numFound:
                print time.asctime(), 'Archive message count incorrect:', numFound, '!=', archiveMsgs
                result = False

            for msgId in foundMsgIDs:
                archiveFile = basePath + '/' + str(reviewerId) + '/inbox/' + str(msgId)
                if not os.path.isfile(archiveFile + '.gz'):
                    print time.asctime(), 'Archive message not present: ' + archiveFile + '.gz'
                    result = False
                if not os.path.isfile(archiveFile + '.key'):
                    print time.asctime(), 'Archive key not present: ' + archiveFile + '.key'
                    result = False

    # verify EDRM
    if result is True:
        print time.asctime(), 'verifying EDRM XML...'
        minStorageId = min(foundMsgIDs)
        maxStorageId = max(foundMsgIDs)

        edrmFile = basePath + '/edrm_v1-2_' + str(minStorageId) + '-' + str(maxStorageId) + '.xml'
        if not os.path.isfile(edrmFile):
            print time.asctime(), 'EDRM XML not present: ' + edrmFile
            result = False

            # TODO - Add XML schema validation
    if result is True:
        print time.asctime(), 'on-premises/cloud message export successful'
    else:
        print time.asctime(), 'on-premises/cloud message export failed'


    if result is True:
        os.system("rm -rf " + basePath )
        sys.exit(0)
    else:
        sys.exit(1)