Python setupCollections示例，mdb.setupCollections Python示例

示例#1

0

显示文件

文件： testTimeSeries.py 项目： robrant/bam

    def MongoLookup(self):
        ''' Fn checks whether a timeseries document already exists for this period.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Check the count - should be 0 before the doc gets inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 0)
        
        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        
        # Count should be 1 now that the document has been inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 1)
        
        # Clean up, remove he content and close the connection
        #dbh.timeseries.remove()
        mdb.close(c, dbh)

示例#2

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testInsertBaselineDoc(self):
        ''' Inserts a completed baseline document into the baseline collection.'''
        
        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)

        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                   timeStamp=datetime.datetime(2011,6,22,12,10,45),
                   lat=34.4, lon=45.5,
                   text='this text contained the hashtag #keyword1',
                   tweetID=346664, userID=4444, source='twitter')
        
        # Instantiate the baseline object/class
        baseLine = bl.baseline(kywd=testKywd,cellBuildPeriod=600)

        # Build the document and insert it
        doc = baseLine.buildDoc()
        bl.insertBaselineDoc(dbh, doc)
        
        res = dbh.baseline.find()[0]
        print res
        
        self.assertEquals(res['keyword'], 'keyword1')
        self.assertEquals(res['mgrs'], '38SND4595706622')
        self.assertEquals(res['mgrsPrecision'], 10)

        # Close the connection
        mdb.close(c, dbh)

示例#3

0

显示文件

文件： testTimeSeries.py 项目： robrant/bam

    def testUpdateDocument(self):
        ''' Function updates/increments a specific hour.minute in a document.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
       
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=24)

        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)

        # Update/increment a specific hour.minute
        ts.updateCount()

        # Run a query for this item
        outDocs = dbh.timeseries.find({'data.12.1':1})

        for doc in outDocs:
            print doc
            self.assertEquals(doc['mgrs'], '38SND4595706622')

        # Close the connection
        mdb.close(c, dbh)

示例#4

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testlastBaselined(self):
        ''' Builds a baseline document for inserting.'''

        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)
        
        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                   timeStamp=datetime.datetime(2011,6,22,12,10,45),
                   lat=34.4, lon=45.5,
                   text='this text contained the hashtag #keyword1',
                   tweetID=346664, userID=4444, source='twitter')
        
        # Create a new baseline object
        baseLine = bl.baseline(kywd=testKywd, cellBuildPeriod=600)
        
        baseLine.outputs['days30_all']      = 0.5
        baseLine.outputs['days7_all']       = 0.4
        baseLine.outputs['hrs30_all']       = 0.3
        baseLine.outputs['days30_weekly']   = 0.2
        baseLine.outputs['days7_daily']     = 0.1
        
        doc = baseLine.buildDoc()
        bl.insertBaselineDoc(dbh, doc)
        
        # Method returns the date of last baseline calculation
        lastBaseline = baseLine.lastBaselined()
        self.assertEquals(lastBaseline, datetime.datetime(2011,6,22,12,10))

        # Close the connection
        mdb.close(c, dbh)

示例#5

0

显示文件

文件： setupMongoStore.py 项目： robrant/bam

def main():
    ''' Builds the collections and indexes needed for the bam mongo work.
        # See also /src/tests/testMdb for full tests of the base functions. '''
    
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config"
    #path = 'home/dotcloud/code/config/'
    file = "mongoSetup.cfg"
    params = getConfig(path,file)
    
    # Get a db handle
    if params.verbose==True:
        print "Get Mongo Handle."
    c, dbh = mdb.getHandle(host=params.host, port=params.port, db=params.db)

    # Set up collections
    if params.verbose==True:
        print "Setup the mongo collections."
    
    mdb.setupCollections(c, dbh, params.db, params.collections, params.dropDb)

    # Get the collection handles
    timeSeriesHandle = dbh[params.timeseries]
    baselineHandle   = dbh[params.baseline]
    alertsHandle     = dbh[params.alerts]
    mappingHandle    = dbh[params.mapping]
    
    # Set up the indexes on the collections
    if params.verbose==True:
        print "Setup the mongo indexes."
    
    setupTimeseriesIndexes(timeSeriesHandle, dropIndexes=params.dropIdx)
    setupAlertsIndexes(alertsHandle, dropIndexes=params.dropIdx)
    setupBaselineIndexes(baselineHandle, dropIndexes=params.dropIdx)
        
    # Close the connection
    if params.verbose==True:
        print "Closing the connection."
    
    mdb.close(c, dbh)

示例#6

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testBuildFullArrayFlat(self):
        '''Build a full FLATTENED array from a cursor result'''
        
        st = datetime.datetime.utcnow()
        
        # A keyword that went in yesterday creates a timeseries yesterday
        nowDt = datetime.datetime(year=2011,month=1,day=12,hour=11,minute=1,second=1)
        oneDay= datetime.timedelta(days=1)

        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=nowDt-oneDay, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')
        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert 2ND DOC IN THE COLLECTION
        kword.timeStamp = nowDt 
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        nowDate = nowDt.replace(hour=0,minute=0,second=0,microsecond=0) 
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', nowDate, 168)
        # Close the connection
        mdb.close(c, dbh)

        # Inputs
        period = datetime.timedelta(days=7)
        dates, data = bl.buildFullArray(resultSet, nowDate, period, 1)
        
        
        firstDay = dates[0]
        lastDay = dates[-1]
        

        self.assertEquals(data.shape[0], 11520)
        self.assertEquals(firstDay, nowDate - period)
        self.assertEquals(lastDay, nowDate)

示例#7

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testGetAllCountForOneCellLookback(self):
        ''' Gets a count for a single cell'''

       
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=15*60)
        baselineTime = datetime.datetime(2011,1,2,12,0,0)
       
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Last 2  documents
        lookback = 24
        mgrs    = '38SND4595706622'
        qKeyword = 'keyword1'
        res = bl.getResultsPerCell(dbh,
                                   collection='timeseries', 
                                   mgrs=mgrs, 
                                   keyword=qKeyword, 
                                   inDate=baselineTime,
                                   lookback=lookback)
        print res
        
        results = []
        for doc in res:
            print doc
            results.append(doc)
            
        self.assertEqual(len(results), 1)
        
        # Close the connection
        mdb.close(c, dbh)

示例#8

0

显示文件

文件： testTimeSeries.py 项目： robrant/bam

    def InsertBlankDoc(self):
        ''' Checks the successful inserting of a mongo document '''
        
        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)
        
        # Clean up and drop it
        #dbh.timeseries.remove()
        
        # Close the connection
        mdb.close(c, dbh)

示例#9

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testGetAllCountForOneCell(self):
        ''' Gets a count for a single cell'''
        
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=11*60)
 
        # Build a keyword to represent the basekine
        kword = kw(keyword='keyword1', timeStamp=oldTweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()

        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()


        # ALL DOCUMENTS
        mgrs    = '38SND4595706622'
        keyword = 'keyword1'
        
        # This indate represents when the baseline was run (12:10) minus the interest period (10 minutes)
        inDate = datetime.datetime(2011,1,2,12,0,0)
        results = bl.getResultsPerCell(dbh, collection='timeseries', mgrs=mgrs, keyword=keyword, inDate=inDate)

        self.assertEqual(len(results), 1)

示例#10

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testBuildFullArray(self):
        '''Build a full array from a cursor result'''
        
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,1,2,12,1,1), lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert the doc now that its been modified
        kword.timeStamp = datetime.datetime(2011,1,1,12,1,1) 
        ts = timeSeries()
        ts.importData(kword)
        
        success = ts.insertBlankDoc()
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', datetime.datetime(2011,1,2), 168)
        
        # Inputs
        inDate = datetime.datetime(2011, 1, 2, 0, 0)
        period = datetime.timedelta(days=7)
        flat = None
        
        dates, data = bl.buildFullArray(resultSet, inDate, period, flat)
        
        self.assertEquals(len(dates), 8)
        self.assertEquals(len(data), 8)        

        # Close the connection
        mdb.close(c, dbh)

示例#11

0

显示文件

文件： reOrderVast.py 项目： robrant/bam

def main(): 
    '''
    Script to build tweet objects from the VAST dataset and place them on a Queue and/or JMS
    for testing purposes.
    
    LIKELY SPEED IMPROVEMENTS:
    - BUILDING BLANK ARRAYS IN THE TIME SERIES TAKES A WHILE
    - PUTTING THE KEYWORDS IN A QUEUE, HAVING SET UP THE THREADS TO PROCESS EACH ONE.
    - ANY DUPLICATION CHECKS?
    
    
    
    '''
    
    start = datetime.datetime.utcnow()
    tweetProcessTimes = datetime.timedelta(seconds=0)
    
    #dripRate = 1.5
    
    # JMS destination
    destination = '/topic/test.vasttweets'
    hostIn      = 'localhost'
    portIn      = 61613

    # Reset the collections
    c, dbh = mdb.getHandle()
    dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections

    #jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True)
    # Make the JMS connection via STOMP and the jmsCode class
    #jms.connect()
     
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/"
    #fName= "MicroblogsSample.csv"
    fName= "Microblogs.csv"
    outFName = "MicroblogsOrdered.csv"
    
    f = retrieveFile(path, fName)
    fo = open(os.path.join(path, outFName), 'w')
    
    x = 0
    
    # Start time
    earliestTweet = datetime.datetime(2011, 5, 18, 13, 25)
    earliestTweet = time.mktime(time.struct_time(earliestTweet.timetuple()))
    lastTweetTime = earliestTweet
    print "First Tweet Time: ", lastTweetTime
    
    # This speeds things up from seconds to minutes
    speedUpRate = 60.0
    records = []
    
    # Loop the lines build tweet objects
    for line in f.readlines():
        
        #print line
        # Extract content from each line
        line = line.rstrip('\r').rstrip('\n').rstrip('\r')
        
        if x == 0:
            x+=1
            continue
        
        if x % 1000 == 0:
            print "processed: ", x
        #if x > 1000:
        #    break
        #    sys.exit(0)
            
        line = line.split(',')
        
        tweetId, dt, latLon, text = line
        
        # Get the datetime group into seconds since UNIX time
        dtg = getTime(tweetId, dt)

        if not dtg:
            continue

        record = [tweetId, dtg, latLon, text]
        records.append(record)
        
        x += 1
    
    f.close()
    
    sortedTable = sortTable(records, col=1)

    
    # Now loop the sorted list and write out to a new file
    for record in sortedTable:
        

        lineOut = "%s,%s,%s,%s\n" %(record[0], record[1], record[2], record[3])
        
        fo.write(lineOut)
    
    f.close()

示例#12

0

显示文件

文件： testsBaseline.py 项目： robrant/bam

    def testProcessBaselineLast30Days(self):
        ''' Checks accurate population of an array for 30 day all '''
        
        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)

        # Set up some times to work with
        tweetTime = datetime.datetime.utcnow()
        thisMinute = tweetTime.replace(second=0,microsecond=0)
        today = tweetTime.replace(hour=0, minute=0, second=0, microsecond=0)
        
        # Thirty days ago - at the start of the day
        lastMonthTweet = tweetTime - datetime.timedelta(days=30)
        
        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                      timeStamp=lastMonthTweet,
                      lat=34.4, lon=45.5,
                      text='this text contained the hashtag #keyword1',
                      tweetID=346664, userID=4444, source='twitter')
        
        # Insert a new timeseries object for the tweet 30 days ago
        ts = timeSeries()
        ts.importData(testKywd)
        success = ts.insertBlankDoc()
        ts.updateCount()
        
        # Create a keyword object for the current tweet
        testKywd2 = kw(keyword='keyword1',
                       timeStamp=lastMonthTweet + datetime.timedelta(hours=1),
                       lat=34.4, lon=45.5,
                       text='this text contained the hashtag #keyword1',
                       tweetID=346664, userID=4444, source='twitter')
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts2 = timeSeries()
        ts2.importData(testKywd2)
        success = ts2.insertBlankDoc()
        ts2.updateCount()
        
        # Create a keyword object for the current tweet
        testKywd3 = testKywd
        testKywd3.timeStamp = tweetTime
        # Instantiate the baseline object/class
        base = bl.baseline(kywd=testKywd3, cellBuildPeriod=600)
        if base.needUpdate == True:
            if not base.lastBaselined():
                doc = base.buildDoc()
                bl.insertBaselineDoc(dbh, doc)
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts3 = timeSeries()
        ts3.importData(testKywd3)
        success = ts3.insertBlankDoc()
        ts3.updateCount()

        tweetTimeMinus2Days = tweetTime - datetime.timedelta(days=2)
        
        # Create a new keyword object to test the daily slicing
        testKywd5 = kw(keyword='keyword1',
                       timeStamp=tweetTimeMinus2Days,
                       lat=34.4, lon=45.5,
                       text='this text contained the hashtag #keyword1',
                       tweetID=346664, userID=4444, source='twitter')
        
        # Insert the current keyword - NOTE HOW THIS IS AFTER THE BASELINE BUILD
        ts5 = timeSeries()
        ts5.importData(testKywd5)
        success = ts5.insertBlankDoc()
        ts5.updateCount()

        # Process Baseline
        base.processBaseline()
        
        # Get back the 30 day array
        arr = base.test30DayArray
        
        # Calculate what the array length should be
        soFarToday = (thisMinute - today).seconds/60.0
        
        # The start of the array datetime
        lastMonthDay = lastMonthTweet.replace(hour=0, minute=0, second=0, microsecond=0)
        
        # The number of days between today and the start of the array (then in minutes)
        dateDiff = (today - lastMonthDay)
        minsDiff = dateDiff.days*1440 + dateDiff.seconds/60.0 
        total = minsDiff + soFarToday
        
        # Confirm its the right length
        self.assertEqual(total, len(arr))
        
        # Get the minutes for the first 2 keywords (the third shouldn't be there)
        kwd1Min = int((testKywd.timeStamp - lastMonthDay).seconds/60)
        kwd2Min = int((testKywd2.timeStamp - lastMonthDay).seconds/60)
        
        kwd1Test = [arr[kwd1Min-1], arr[kwd1Min], arr[kwd1Min+1]]
        kwd2Test = [arr[kwd2Min-1], arr[kwd2Min], arr[kwd2Min+1]]
        
        for j in arr:
            if arr[j] > 0:
                print j, arr[j]
        
        self.assertEquals(kwd1Test, [0,1,0])
        self.assertEquals(kwd2Test, [0,1,0])
        
        # 30 DAY TIME SLICE CHECK
        arr = base.test30DaySliced
        # weekly 
        testSliced = int(30/7) * 6 * 60
        self.assertEquals(testSliced, len(arr))
        
        arr7Day = base.test7DayArray
        test7DayAll = (thisMinute - today).seconds/60.0 + 1440*7
        self.assertEquals(len(arr7Day), int(test7DayAll))
        
        arr30Hrs = base.test30hrArray
        test30Hours = 30*60
        self.assertEquals(len(arr30Hrs), int(test30Hours))
        
        # Close the connection
        mdb.close(c, dbh)

示例#13

0

显示文件

文件： scriptGetVast.py 项目： robrant/bam

def main(): 
    '''
    Script to build tweet objects from the VAST dataset and place them on a Queue and/or JMS
    for testing purposes.
    
    LIKELY SPEED IMPROVEMENTS:
    - BUILDING BLANK ARRAYS IN THE TIME SERIES TAKES A WHILE
    - PUTTING THE KEYWORDS IN A QUEUE, HAVING SET UP THE THREADS TO PROCESS EACH ONE.
    - ANY DUPLICATION CHECKS?
    
    
    
    '''
    db = 'bam'
    host = 'localhost'
    port = 27017
    
    start = datetime.datetime.utcnow()
    tweetProcessTimes = datetime.timedelta(seconds=0)
    
    blUnits     = 'minute'
    blPrecision = 10
    baselineParameters = [blUnits, blPrecision] 
    mgrsPrecision = 2
    
    #dripRate = 1.5
    
    # JMS destination
    #destination = '/topic/test.vasttweets'
    #hostIn      = 'localhost'
    #portIn      = 61613

    # Reset the collections
    c, dbh = mdb.getHandle()
    dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
    dbh = mdb.setupIndexes(dbh)
    
    #jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True)
    # Make the JMS connection via STOMP and the jmsCode class
    #jms.connect()
     
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/"
    #fName= "MicroblogsSample.csv"
    fName= "MicroblogsOrdered.csv"
    tweetStats = 'tweetStatsFile_50000.csv'
    tptFile = open(path+tweetStats, 'w')
    
    # The script used to generate the baseline
    baselinePath = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/src/scripts/'
    baselineScript = 'subprocessBaseline.py'
    scriptFile = os.path.join(baselinePath, baselineScript)

    
    f = retrieveFile(path, fName)
    x = 0
    
    # Start time
    earliestTweet = datetime.datetime(2011, 4, 30, 0, 0)
    earliestTweet = time.mktime(time.struct_time(earliestTweet.timetuple()))
    lastTweetTime = earliestTweet
    print "First Tweet Time: ", lastTweetTime
    
    # This speeds things up from seconds to minutes
    speedUpRate = 1000
    
    # Build a blank timeseries array to save it being built everytime
    blankData = buildBlankData(hours=24)
    
    # Loop the lines build tweet objects
    for line in f.readlines():
        
        #print line
        # Extract content from each line
        line = line.rstrip('\r').rstrip('\n').rstrip('\r')

        if x == 0:
            x+=1
            continue
        
        if x % 100 == 0:
            print "processed: ", x
        
        if x >100000:
            print line
            break
            sys.exit(0)
            
        line = line.split(',')
        
        tweetProcessStart = datetime.datetime.utcnow()
        
        tweetId, dt, latLon, text = line
        
        # Get the geos
        geos = getGeos(tweetId, latLon)
        if not geos:
            print "skipping this record - bad or no geos"
            continue
        
        # Get the datetime group into seconds since UNIX time
        dtg = getTime(tweetId, dt)

        if not dtg:
            print "skipping this record - bad or no time"
            continue
        
        # Get the tweettime into seconds from UNIX
        tweetTime = time.mktime(time.struct_time(dtg.timetuple()))
        #print "The time of this tweet", tweetTime
        
        # Get the tweet time in seconds since the last tweet
        sinceLastTweet = tweetTime - lastTweetTime
        #print "Time since last tweet", sinceLastTweet
        
        #delay = sinceLastTweet / speedUpRate
        #print "Delay: ", delay
                
        # Apply a scaling to it
        #time.sleep(delay)
        
        # Assign this tweet time to be the last tweet time
        lastTweetTime = tweetTime
        
        # Build a tweet object
        twt = vastTweet()
        twt.importData(timeStamp=dtg, lat=geos[0], lon=geos[1], text=text, tweetId=tweetId)
        
        #----------------------------------------------------------------------------------
        # PROCESS INTO KEYWORDS
                
        # Build into keywords - skipping a step for development
        kywd = processTweet(twt, mgrsPrecision)
        
        # Add keywords to the list based on hashtags
        kywd.fromHashTag()
        
        # Add keywords to the list based on name lookup
        kywd.fromLookup()

        if len(kywd.keywords) == 0:
            pass
            #print "No matches: ", twt.text
        
        xx = 0
        #Now loop the resultant keywords
        for kwObj in kywd.keywords:
            
            xx += 1
            
            #print "------------------"
            #print kwObj.keyword
            #print kwObj.text
        
            #-------------------------------------------------------
            # Pass keyword object into a class
            #ts = timeSeries(host='localhost', port=27017, db='bam')
            ts = timeSeries(c=c, dbh=dbh)
            ts.importData(kwObj, blockPrecision=24)
    
            success = ts.insertDoc(blankData=blankData, incrementBy=100)
  
            callBaseliner(scriptFile, host, port, db, kwObj, baselineParameters, mac=1)
  
        # METRICS - currently about 0.05 seconds per tweet
        tweetProcessStop = datetime.datetime.utcnow()
        tweetProcessTimes += (tweetProcessStop - tweetProcessStart)
        processDif = (tweetProcessStop - tweetProcessStart) 
        tptFile.write(str(x)+","+str(xx)+","+str(processDif.seconds + processDif.microseconds/1000000.)+"\n")
        #----------------------------------------------------------------------------------
        # SEND TO JMS WITH THIS CODE

        # Convert it into a JSON object
        #jTwt = twt.vastTweet2Json()
        #print jTwt

        # Push the JSON version of the tweet to the JMS
        #jms.sendData(destination, jTwt, x)

        #----------------------------------------------------------------------------------
        
        x += 1
    
        #time.sleep(dripRate)
        
    # Disconnect from the JMS
    #jms.disConnect()    

    end = datetime.datetime.utcnow()
    dif = end - start
    
    print "Total Tweet Process Time: %s" %tweetProcessTimes.seconds
    print "Average Tweet process time: %s" % (float(tweetProcessTimes.seconds)/float(x))

    print "Tweet Processed: %s" %x
    print "Total Process Time: %s" %(dif)
    
    # Close the mongo connection
    mdb.close(c, dbh)
    f.close()
    tptFile.close()