Exemplo n.º 1
0
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    print "got DC environment settings."
    reParams = getRedisEnvironment()
    print "got redis environment settings."
    
    # Authenticate on the admin db
    try:
        c, adminDbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass)
        print 'got handle'
    except:
        print "Failed to get handle under admin."
    # Authentication of the administrator
    #try:
    #    auth = adminDbh.authenticate(dcParams.adminUser, dcParams.adminPass)
    #except Exception, e:
    #    print "Failed to authenticate with mongo db."
    #    print e
    
    # Create a new user
    p = getConfigParameters(configFile)
    # Switch the database handle to that being used from the admin one
    dbh = c[p.db]
    success = dbh.add_user(p.dbUser, p.dbPassword)
    c.disconnect()
    
    try:
        # Authenticate on the admin db
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword)
        print 'Connected to the normal db: %s' %(p.db)
    except:
        logging.critical("Failed to connect to db and get handle as user.", exc_info=True)
        sys.exit()
    
    # Write out the new information to the regular config file
    try:
        writeConfigFile(configFile, dcParams)
        print 'Writing out mongo config info.'
        writeConfigFileRedis(configFile, reParams)
        print 'Writing out redis config'
    except:
        logging.critical("Failed in writing params back to config file.", exc_info=True)
    
    mdb.close(c, dbh)
Exemplo n.º 2
0
    def testInsertBaselineDoc(self):
        ''' Inserts a completed baseline document into the baseline collection.'''
        
        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)

        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                   timeStamp=datetime.datetime(2011,6,22,12,10,45),
                   lat=34.4, lon=45.5,
                   text='this text contained the hashtag #keyword1',
                   tweetID=346664, userID=4444, source='twitter')
        
        # Instantiate the baseline object/class
        baseLine = bl.baseline(kywd=testKywd,cellBuildPeriod=600)

        # Build the document and insert it
        doc = baseLine.buildDoc()
        bl.insertBaselineDoc(dbh, doc)
        
        res = dbh.baseline.find()[0]
        print res
        
        self.assertEquals(res['keyword'], 'keyword1')
        self.assertEquals(res['mgrs'], '38SND4595706622')
        self.assertEquals(res['mgrsPrecision'], 10)

        # Close the connection
        mdb.close(c, dbh)
Exemplo n.º 3
0
 def __init__(self, kywd, c=None, dbh=None, host=None, port=None, db=None, baselinePrecision=[]):
     ''' Constructor. '''
     
     # Connection for this baseline class
     if not c and not dbh:
         # Get a connection to the db    
         self.c, self.dbh = mdb.getHandle(host=host, port=port, db=db)
     else:
         self.c, self.dbh = c, dbh
         
     self.collection = 'timeseries'
     
     # Bad, bad version of inheriting keyword - NEED TO FIX THIS
     self.keyword        = kywd.keyword
     self.timeStamp      = kywd.timeStamp
     self.mgrs           = kywd.mgrs
     self.mgrsPrecision  = kywd.mgrsPrecision
     
     # Has this keyword ever been seen before at this location?
     self.outputs     = {}
     
     # When was the last baseline processed (units = hour or minute or second; precision = int val)
     # Or... what was the most recent baseline run - parameters come from top level
     unit, precision = baselinePrecision
     
     self.baselineTimeStamp = truncateTimeStamp(self.timeStamp, unit, precision)
     
     # Checks to see whether it exists at all or if it needs updating.
     # Outside the object, this is used to decide whether to continue with the processing      
     self.needUpdate = self.needUpdating()
Exemplo n.º 4
0
def main(cleanup, ageOff, protectedSubs=None):
    ''' Coordinates which element to cleanup - subscriptions including remote delete of subs from instagram
        or event data - getting rid of the documents that contain the media and metadata'''
    
    # If there is a command to remove or keep protected subs, use it
    if protectedSubs:
        protectedSubs = bool(protectedSubs)
    
    # Get the config information
    os.chdir('/home/dotcloud/code/')
    cwd  = os.getcwd()
    cfgs = os.path.join(cwd, 'config/crowded.cfg')
    p = getConfigParameters(cfgs)
    
    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)

    # Whether to cleanup the subscriptions and subscription collection
    if cleanup == 'subs':
        subsCollHandle=dbh[p.subsCollection]
        res = ageOffSubscriptions(p, subsCollHandle, ageOff=ageOff, protectedSubs=protectedSubs)
        print datetime.datetime.utcnow(), res
    
    # Or the events metadata collection
    elif cleanup == 'events':
        evCollHandle=dbh[p.eventsCollection]
        res = ageOffMetadata(evCollHandle, ageOff=ageOff, protectMedia=protectedSubs)
        print datetime.datetime.utcnow(), res
        
    else:
        print 'Provide either subs or events as the first argument depending on what you want to clean up.'
Exemplo n.º 5
0
def main():
    """ """

    # Connect and get db and collection handle
    try:
        c, dbh = mdb.getHandle(p.dbHost, p.dbPort, p.db, p.dbUser, p.dbPassword)
        collectionHandle = dbh[p.camsCollection]
    except:
        logging.critical("DB connection Failed", exc_info=True)

    # Get the feed content
    feedContent = hitFeed(p.feedUrl)
    if not feedContent:
        logging.critical("** SCRIPT EXIT **\n%s\n\n" % ("=" * 52))
        sys.exit()

    # Break out the content into head and items
    header, rootUrl, cameras = extractContent(feedContent)
    if not header or not rootUrl or not cameras:
        logging.critical("** SCRIPT EXIT **\n%s\n\n" % ("=" * 52))
        sys.exit()

    # Build the camera root URL
    rootUrl = p.tflDomain + rootUrl
    fc = feedChannel(header)

    # Deal with each of the items
    for camera in cameras:

        # Build an 'item' object based on the RSS item
        item = feedItem(fc, camera, rootUrl)
        item.buildGeoJson()

        # Insert the document into mongo
        response = mongoInsert(collectionHandle, item)
Exemplo n.º 6
0
    def testUpdateDocument(self):
        ''' Function updates/increments a specific hour.minute in a document.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
       
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=24)

        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)

        # Update/increment a specific hour.minute
        ts.updateCount()

        # Run a query for this item
        outDocs = dbh.timeseries.find({'data.12.1':1})

        for doc in outDocs:
            print doc
            self.assertEquals(doc['mgrs'], '38SND4595706622')

        # Close the connection
        mdb.close(c, dbh)
Exemplo n.º 7
0
    def MongoLookup(self):
        ''' Fn checks whether a timeseries document already exists for this period.   '''

        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Check the count - should be 0 before the doc gets inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 0)
        
        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        
        # Count should be 1 now that the document has been inserted
        count = ts.mongoLookup()
        self.assertEquals(count, 1)
        
        # Clean up, remove he content and close the connection
        #dbh.timeseries.remove()
        mdb.close(c, dbh)
Exemplo n.º 8
0
def main():
    
    # Config file parameters
    pathIn = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/'
    fileIn = 'preAllocateTimeSeries.cfg'
    
    # Get parameters from config
    p = params(pathIn, fileIn)
    
    # Connect and get db and collection handle
    c, dbh = mdb.getHandle(p.host, p.port, p.db)
    collectionHandle = dbh[p.coll]

    # Current datetime
    #today = datetime.datetime.utcnow().replace(hour=0, minute=0,second=0,microsecond=0)
    today = datetime.datetime(2011,5,1)
    
    # Build some blank data
    blankDataArr = buildBlankData()
    
    # A list to hold the timeseries we need to pre-allocate for
    
    # Get pairs to be pre-allocated from yesterday - lookback is in days
    if 'yesterday' in p.baselineTypes:
        preAllocate, minCount, maxCount = getCommonMgrsAndKeyword(collectionHandle, p.mgrsPrecision, p.nothingFoundKeyword, today, lookback=1)
    
    # Now loop the keyword/mgrs pairs and build new timeseries documents for today
    for item in preAllocate:
        response = insertDoc(collectionHandle, item['mgrs'], p.mgrsPrecision, item['keyword'], 'twitter', today, buildBlankData())
        
    mdb.close(c, dbh)
Exemplo n.º 9
0
    def testGetCurrentTags(self):

        import mdb
        cwd = os.getcwd()
        parent = os.path.dirname(cwd)
        cfgs = os.path.join(parent, 'config/twitterCrowded.cfg')
        p = getConfigParameters(cfgs)

        # The mongo bits
        try:
            c, dbh = mdb.getHandle(host=p.dbHost,
                                   port=p.dbPort,
                                   db=p.db,
                                   user=p.dbUser,
                                   password=p.dbPassword)
            evCollHandle = dbh[p.eventsCollection]
        except:
            print "Failed to connect to mongo."
            sys.exit(1)

        res = cf.getCurrentTags(evCollHandle, 'shitbrick')
        self.assertEquals(res, ['hellworld', 'fuckhole', 'shitbrick'])

        # Quick test chucked in
        results = cf.getQueryBBox(evCollHandle)
        print results
Exemplo n.º 10
0
    def testlastBaselined(self):
        ''' Builds a baseline document for inserting.'''

        # Connect and get handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)
        
        # Build a keyword object
        testKywd = kw(keyword='keyword1',
                   timeStamp=datetime.datetime(2011,6,22,12,10,45),
                   lat=34.4, lon=45.5,
                   text='this text contained the hashtag #keyword1',
                   tweetID=346664, userID=4444, source='twitter')
        
        # Create a new baseline object
        baseLine = bl.baseline(kywd=testKywd, cellBuildPeriod=600)
        
        baseLine.outputs['days30_all']      = 0.5
        baseLine.outputs['days7_all']       = 0.4
        baseLine.outputs['hrs30_all']       = 0.3
        baseLine.outputs['days30_weekly']   = 0.2
        baseLine.outputs['days7_daily']     = 0.1
        
        doc = baseLine.buildDoc()
        bl.insertBaselineDoc(dbh, doc)
        
        # Method returns the date of last baseline calculation
        lastBaseline = baseLine.lastBaselined()
        self.assertEquals(lastBaseline, datetime.datetime(2011,6,22,12,10))

        # Close the connection
        mdb.close(c, dbh)
Exemplo n.º 11
0
    def __init__(self, c=None, dbh=None, host=None, port=None, db=None):
        ''' instantiate the object and attributes. ***NOT TESTED*** '''

        if not c and not dbh:
            # Get a connection to the db    
            self.c, self.dbh = mdb.getHandle(host=host, port=port, db=db)
        else:
            self.c, self.dbh = c, dbh
Exemplo n.º 12
0
def main(p, mediaOnly=None):
    """ Coordinates a new twitter stream connection"""

    # Logging config
    logFile = os.path.join(p.errorPath, p.connErrorFile)
    logging.basicConfig(filename=logFile, format="%(levelname)s:: %(asctime)s %(message)s", level=p.logLevel)

    # The mongo bits
    try:
        c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
        evCollHandle = dbh[p.eventsCollection]
    except:
        logging.critical("Failed to connect to db and authenticate.", exc_info=True)
        sys.exit()

    # Here's the redis queue for managing the tweets as they come in
    try:
        q = RedisQueue(p.redisName, host=p.redisHost, password=p.redisPassword, port=p.redisPort, db=0)
    except:
        logging.critical("REDIS: Failed to connect in connectionClient.py. ", exc_info=True)
        sys.exit()

    # Connection placeholder in case the exception catches the drop out
    connection = True

    while connection == True:

        # Get the existing tags and add the current
        try:
            tags = cf.getCurrentTags(evCollHandle)
        except:
            tags = None
            logging.error("Failed to get current tags from db.", exc_info=True)

        # Build the building boxes
        try:
            bboxes = cf.getCurrentBBoxes(evCollHandle)
        except:
            bboxes = None
            logging.error("Failed to get current BBOXes from db.", exc_info=True)

        if not tags and not bboxes:
            logging.warning("Currently no tags or bboxes in the db.")
            sys.exit()

        try:
            print tags, bboxes
            with tweetstream.FilterStream(p.sourceUser, p.sourcePassword, track=tags, locations=bboxes) as stream:
                for tweet in stream:
                    if mediaOnly:
                        try:
                            q.put(json.dumps(tweet))
                        except:
                            logging.critical("Failed to put tweet on redis. This tweet: \n%s" % (tweet), exc_info=True)

        except tweetstream.ConnectionError:
            logging.critical("Disconnected from twitter", exc_info=True)
Exemplo n.º 13
0
def main(configFile, subscriptionType, source):
    ''' Coordinates the retrieval of public CCTV camera URLs to crowded. '''
    
    # Get the config information into a single object
    p = getConfigParameters(configFile)
    
    #////////////////////////////////////////////////////////
    if source == 'cctv':
        url = p.tflUrl
    elif source == 'youtube':
        url = p.socialiseUrl
    elif source == 'flickr':
        url = p.viewFinderUrl
    # More sources here and adds to the config file
    #////////////////////////////////////////////////////////
        
    # Mongo connection parameters
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    collHandle = dbh['subs']
    evCollHandle = dbh['events']
    
    # Get the active subs
    activeSubs = getActiveSubs(collHandle, type=subscriptionType)

    # Barf at this point if there's nothing in subs
    if not activeSubs or len(activeSubs) < 1:
        mdb.close(c, dbh)
        return None

    # For each active active subscription, query by geography
    for aSub in activeSubs:
        
        print 'ASUB:', aSub
        if subscriptionType == 'geography':
            lon, lat = aSub['loc']
            radius = float(aSub['radius'])
            media = queryByGeo(url, lat, lon, radius)
        
        elif subscriptionType == 'tag':
            tag = aSub['objectId']
            media = queryByTag(url, tag)
        
        # For each of the images, update the correct event url list
        for image in media:
            # Mod the datetime into a python dt
            try:
                img = datetime.datetime.strptime(image['captured'], "%Y-%m-%dT%H:%M:%S")
            except Exception, e:
                img = datetime.datetime.strptime(image['published'], "%Y-%m-%dT%H:%M:%S")
            image['dt'] = img    
            
            success = updateEvents(evCollHandle, aSub['objectId'], image)
            if success == None:
                print "Failed to update event ID '%s' with media: \n %s" %(aSub['objectId'], image)
Exemplo n.º 14
0
def getMongoHandles(p):
    ''' Gets the mongo connection handle, authentication and the collection handle.  '''

    # Handles the mongo connections
    c, dbh = mdb.getHandle(db=p.db, host=p.dbHost, port=p.dbPort)

    # Authentication
    try:
        auth = dbh.authenticate(p.dbUser, p.dbPassword)
    except Exception, e:
        print "Failed to authenticate with mongo db."
        print e
Exemplo n.º 15
0
def getMongoHandles(p):
    ''' Gets the mongo connection handle, authentication and the collection handle.  '''

    # Handles the mongo connections
    c, dbh = mdb.getHandle(db=p.db, host=p.dbHost, port=p.dbPort)

    # Authentication
    try:
        auth = dbh.authenticate(p.dbUser, p.dbPassword)
    except Exception, e:
        print "Failed to authenticate with mongo db."
        print e
Exemplo n.º 16
0
    def __init__(self):
        ''' Sets up the config information, database connection and builds
            a blank data array for easy inserting. '''

        # Reads in a load of config information
        path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/"
        file = "keywordListenerConfig.cfg"
        self.getConfig(path, file)
        
        # Handles the mongo connections
        self.c, self.dbh = mdb.getHandle(db=self.db, host=self.host, port=self.port)
        
        # Build a blank timeseries array to save it being built everytime
        self.blankData = buildBlankData(hours=24)
Exemplo n.º 17
0
def main(p):

    # The mongo bits
    try:
        c, dbh = mdb.getHandle(host=p.dbHost,
                               port=p.dbPort,
                               db=p.db,
                               user=p.dbUser,
                               password=p.dbPassword)
        evCollHandle = dbh[p.eventsCollection]
    except:
        logging.critical('Failed to connect and authenticate', exc_info=True)
        sys.exit()

    # Get the current tags
    tags = cf.getCurrentTags(evCollHandle)
    # Get the current bounding boxes
    queryBBoxes = cf.getQueryBBox(evCollHandle)

    x = 1
    while x == 1:

        # Here's the redis queue for managing the tweets as they come in
        try:
            q = RedisQueue(p.redisName,
                           host=p.redisHost,
                           password=p.redisPassword,
                           port=p.redisPort,
                           db=0)
        except:
            logging.error('Failed to connect to REDIS db.', exc_info=True)
            sys.exit()

        # This call is blocking, so expect it to hang on this point
        tweetStr = q.get()
        tweet = json.loads(tweetStr)

        # Work out which object/event this tweet is associated with
        if tags:
            tweetTags = cf.whichTags(tags, tweet)
            for tweetTag in tweetTags:
                success = dispatchTweet(p, tweet, tweetTag)
                logging.debug("Tag-based message dispatched: %s" % (success))

        if queryBBoxes:
            tweetGeos = cf.matchesCurrentGeos(queryBBoxes, tweet)
            for tweetGeo in tweetGeos:
                success = dispatchTweet(p, tweet, tweetGeo)
                logging.debug("Geo-based message dispatched: %s" % (success))
Exemplo n.º 18
0
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    logging.info("Mongo Params:\n%s\n%s\n%s\n%s" %(dcParams.mongoHost, dcParams.mongoPort, dcParams.adminUser, dcParams.adminPass))
       
    # Authenticate on the admin db
    try:
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass)
    except:
        logging.critical('Failed to connect to database as admin.')
        sys.exit()
        
    # Create a new user
    p = getConfigParameters(configFile)
    # Switch the database handle to that being used from the admin one
    dbh = c[p.db]
    success = dbh.add_user(p.dbUser, p.dbPassword)
    c.disconnect()

    try:
        # Authenticate on the admin db
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.critical("Failed to connect to db and get handle as user.", exc_info=True)
        sys.exit()
        
    # Write out the new information to the regular config file
    try:
        writeConfigFile(configFile, dcParams)
    except:
        logging.critical("Failed in writing params back to config file.", exc_info=True)
        
    mdb.close(c, dbh)
Exemplo n.º 19
0
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    
    # Authenticate on the admin db
    try:
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass)
    except:
        logging.critical('Failed to connect to database as admin.')
        sys.exit()

    # Create a new user
    p = getConfigParameters(configFile)
    # Switch the database handle to that being used from the admin one
    dbh = c[p.db]
    success = dbh.add_user(p.dbUser, p.dbPassword)
    c.disconnect()
    
    try:
        # Authenticate on the admin db
        c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.critical("Failed to connect to db and get handle as user.", exc_info=True)
        sys.exit()
    
    # Write out the new information to the regular config file
    try:
        writeConfigFile(configFile, dcParams)
        print "----- writing out new config parameters."
    except:
        logging.critical("Failed in writing params back to config file.", exc_info=True)
    
    mdb.close(c, dbh)
Exemplo n.º 20
0
def main(configFile=None):
    ''' Builds the collections and indexes needed. '''

    # Get the config information into a single object
    p = getConfigParameters(configFile)

    # Get a db handle
    if p.verbose==True:
        print "---- Geting Mongo Handle."
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db)
    
    try:
        auth = dbh.authenticate(p.dbUser, p.dbPassword)
    except Exception, e:
        print "Failed to authenticate with mongo db."
        print e
Exemplo n.º 21
0
def getMongoHandles(p):
    ''' Gets the mongo connection handle, authentication and the collection handle.  '''

    # Handles the mongo connections
    c, dbh = mdb.getHandle(db=p.db, host=p.dbHost, port=p.dbPort)

    # Authentication
    try:
        auth = dbh.authenticate(p.dbUser, p.dbPassword)
    except:
        logging.warning("Failed to authenticate with mongo db.")

    collHandle = dbh[p.slangCollection]
    emoCollHandle = dbh[p.emoCollection]
    
    return c, dbh, collHandle, emoCollHandle
Exemplo n.º 22
0
    def testBuildFullArrayFlat(self):
        '''Build a full FLATTENED array from a cursor result'''
        
        st = datetime.datetime.utcnow()
        
        # A keyword that went in yesterday creates a timeseries yesterday
        nowDt = datetime.datetime(year=2011,month=1,day=12,hour=11,minute=1,second=1)
        oneDay= datetime.timedelta(days=1)

        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=nowDt-oneDay, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')
        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert 2ND DOC IN THE COLLECTION
        kword.timeStamp = nowDt 
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        nowDate = nowDt.replace(hour=0,minute=0,second=0,microsecond=0) 
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', nowDate, 168)
        # Close the connection
        mdb.close(c, dbh)

        # Inputs
        period = datetime.timedelta(days=7)
        dates, data = bl.buildFullArray(resultSet, nowDate, period, 1)
        
        
        firstDay = dates[0]
        lastDay = dates[-1]
        

        self.assertEquals(data.shape[0], 11520)
        self.assertEquals(firstDay, nowDate - period)
        self.assertEquals(lastDay, nowDate)
Exemplo n.º 23
0
def main(configFile=None):
    ''' Takes the dotcloud default admin privs, authorises on the db, 
        creates the user I've specified and returns. '''
    
    # Get the parameters that were set up by dotcloud
    dcParams = getEnvironment()
    print dcParams.adminUser, dcParams.adminPass
    
    # Authenticate on the admin db
    c, adminDbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin')
    print 'got handle'
    # Authentication of the administrator
    try:
        auth = adminDbh.authenticate(dcParams.adminUser, dcParams.adminPass)
    except Exception, e:
        print "Failed to authenticate with mongo db."
        print e
Exemplo n.º 24
0
def main(configFile=None):
    ''' Builds the collections and indexes needed. '''

    # Get the config information into a single object
    p = getConfigParameters(configFile)

    try:
        c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.warning("Failed to connect to db and get handle.", exc_info=True)

    # The collections provided and create them and their indexes
    for coll in p.collections:
        collHandle = buildCollection(dbh, p, coll['collection'])
        indexes = buildIndexes(p, coll, collHandle)
    
    mdb.close(c, dbh)
Exemplo n.º 25
0
def main(configFile=None):
    ''' Builds the collections and indexes needed. '''

    # Get the config information into a single object
    p = getConfigParameters(configFile)

    try:
        c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    except:
        logging.warning("Failed to connect to db and get handle.", exc_info=True)

    # The collections provided and create them and their indexes
    for coll in p.collections:
        print "Building Collections and indexes: %s" %coll
        collHandle = buildCollection(dbh, p, coll['collection'])
        indexes = buildIndexes(p, coll, collHandle)
    
    mdb.close(c, dbh)
Exemplo n.º 26
0
    def testGetAllCountForOneCellLookback(self):
        ''' Gets a count for a single cell'''

       
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=15*60)
        baselineTime = datetime.datetime(2011,1,2,12,0,0)
       
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Last 2  documents
        lookback = 24
        mgrs    = '38SND4595706622'
        qKeyword = 'keyword1'
        res = bl.getResultsPerCell(dbh,
                                   collection='timeseries', 
                                   mgrs=mgrs, 
                                   keyword=qKeyword, 
                                   inDate=baselineTime,
                                   lookback=lookback)
        print res
        
        results = []
        for doc in res:
            print doc
            results.append(doc)
            
        self.assertEqual(len(results), 1)
        
        # Close the connection
        mdb.close(c, dbh)
Exemplo n.º 27
0
    def InsertBlankDoc(self):
        ''' Checks the successful inserting of a mongo document '''
        
        # Get connection to mongo
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections        
        
        # New timeseries object with data
        ts = timeSeries()
        ts.importData(self.kw, blockPrecision=1)

        # Build and insert a new mongo formatted document
        success = ts.insertBlankDoc()
        self.assertEquals(success, 1)
        
        # Clean up and drop it
        #dbh.timeseries.remove()
        
        # Close the connection
        mdb.close(c, dbh)
Exemplo n.º 28
0
def getEvents(p):
    ''' Returns all currently active events in mongo '''

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost,
                           port=p.dbPort,
                           db=p.db,
                           user=p.dbUser,
                           password=p.dbPassword)
    evCollHandle = dbh[p.eventsCollection]

    try:
        docs = evCollHandle.find(
            fields=['objectId', 'subType', 'start', 'loc', 'radius'])
        docsOut = [d for d in docs]

    except:
        print "No documents matched your query. Object ID: %s." % objectId
        docsOut = []
    mdb.close(c, dbh)

    # Additional fields that might be useful
    for doc in docsOut:
        # Get rid of the mongo ID
        _id = doc.pop('_id')

        if doc.has_key('loc'):

            # calculate the radius in metres
            latScale, lonScale = radialToLinearUnits(float(doc['loc'][1]))
            scale = (latScale + lonScale) / 2.0
            doc['radius_m'] = int(doc['radius'] * scale)

            # Calculate the top left, bottom right
            s = doc['loc'][1] - doc['radius']
            w = doc['loc'][0] - doc['radius']
            n = doc['loc'][1] + doc['radius']
            e = doc['loc'][0] + doc['radius']
            doc['bbox'] = [[w, s], [e, n]]

    return docsOut
Exemplo n.º 29
0
def main(p):

    # The mongo bits
    try:
        c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
        evCollHandle = dbh[p.eventsCollection]    
    except:
        logging.critical('Failed to connect and authenticate', exc_info=True)
        sys.exit()

    # Get the current tags 
    tags = cf.getCurrentTags(evCollHandle)
    # Get the current bounding boxes
    queryBBoxes = cf.getQueryBBox(evCollHandle)
    
    x = 1
    while x == 1:
        
        # Here's the redis queue for managing the tweets as they come in
        try:
            q = RedisQueue(p.redisName, host=p.redisHost, password=p.redisPassword, port=p.redisPort, db=0)
        except:
            logging.error('Failed to connect to REDIS db.', exc_info=True)
            sys.exit()
        
        # This call is blocking, so expect it to hang on this point
        tweetStr = q.get()
        tweet = json.loads(tweetStr)
        
        # Work out which object/event this tweet is associated with
        if tags:
            tweetTags = cf.whichTags(tags, tweet)
            for tweetTag in tweetTags:
                success = dispatchTweet(p, tweet, tweetTag)
                logging.debug("Tag-based message dispatched: %s" %(success))
        
        if queryBBoxes:
            tweetGeos = cf.matchesCurrentGeos(queryBBoxes, tweet)
            for tweetGeo in tweetGeos:
                success = dispatchTweet(p, tweet, tweetGeo)
                logging.debug("Geo-based message dispatched: %s" %(success))
Exemplo n.º 30
0
def main(cleanup, ageOff, protectedSubs=None):
    ''' Coordinates which element to cleanup - subscriptions including remote delete of subs from instagram
        or event data - getting rid of the documents that contain the media and metadata'''

    # If there is a command to remove or keep protected subs, use it
    if protectedSubs:
        protectedSubs = bool(protectedSubs)

    # Get the config information
    os.chdir('/home/dotcloud/code/')
    cwd = os.getcwd()
    cfgs = os.path.join(cwd, 'config/crowded.cfg')
    p = getConfigParameters(cfgs)

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost,
                           port=p.dbPort,
                           db=p.db,
                           user=p.dbUser,
                           password=p.dbPassword)

    # Whether to cleanup the subscriptions and subscription collection
    if cleanup == 'subs':
        subsCollHandle = dbh[p.subsCollection]
        res = ageOffSubscriptions(p,
                                  subsCollHandle,
                                  ageOff=ageOff,
                                  protectedSubs=protectedSubs)
        print datetime.datetime.utcnow(), res

    # Or the events metadata collection
    elif cleanup == 'events':
        evCollHandle = dbh[p.eventsCollection]
        res = ageOffMetadata(evCollHandle,
                             ageOff=ageOff,
                             protectMedia=protectedSubs)
        print datetime.datetime.utcnow(), res

    else:
        print 'Provide either subs or events as the first argument depending on what you want to clean up.'
Exemplo n.º 31
0
def main():
    ''' Builds the collections and indexes needed for the bam mongo work.
        # See also /src/tests/testMdb for full tests of the base functions. '''
    
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config"
    #path = 'home/dotcloud/code/config/'
    file = "mongoSetup.cfg"
    params = getConfig(path,file)
    
    # Get a db handle
    if params.verbose==True:
        print "Get Mongo Handle."
    c, dbh = mdb.getHandle(host=params.host, port=params.port, db=params.db)

    # Set up collections
    if params.verbose==True:
        print "Setup the mongo collections."
    
    mdb.setupCollections(c, dbh, params.db, params.collections, params.dropDb)

    # Get the collection handles
    timeSeriesHandle = dbh[params.timeseries]
    baselineHandle   = dbh[params.baseline]
    alertsHandle     = dbh[params.alerts]
    mappingHandle    = dbh[params.mapping]
    
    # Set up the indexes on the collections
    if params.verbose==True:
        print "Setup the mongo indexes."
    
    setupTimeseriesIndexes(timeSeriesHandle, dropIndexes=params.dropIdx)
    setupAlertsIndexes(alertsHandle, dropIndexes=params.dropIdx)
    setupBaselineIndexes(baselineHandle, dropIndexes=params.dropIdx)
        
    # Close the connection
    if params.verbose==True:
        print "Closing the connection."
    
    mdb.close(c, dbh)
Exemplo n.º 32
0
def main():

    # Should really move these to being 
    parser = OptionParser()
    parser.add_option("-H", "--host",   dest="host")
    parser.add_option("-p", "--port",   dest="port")
    parser.add_option("-d", "--db",     dest="db")
    
    
    parser.add_option("-m", "--mgrs",               dest="mgrs")
    parser.add_option("-M", "--mgrsprecision",      dest="mgrsPrecision")
    parser.add_option("-t", "--timestamp",          dest="timeStamp")
    parser.add_option("-k", "--keyword",            dest="keyword")
    parser.add_option("-u", "--baselineUnit",       dest="baselineUnit")
    parser.add_option("-v", "--baselineValue",      dest="baselineValue")
    
    (options, args) = parser.parse_args()
    
    # Format the option inputs = these really should be arguments
    port              = int(options.port)
    timeStamp         = datetime.datetime.strptime(options.timeStamp, "%Y-%m-%dT%H:%M:%S")
    mgrsPrecision     = int(options.mgrsPrecision)
    baselinePrecision = [options.baselineUnit, int(options.baselineValue)]
    
    c, dbh = mdb.getHandle(host=options.host, port=port, db=options.db)
    
    # Build the baseline objects as we go so that they can be updated at the end of the period.
    base = baseline(options.mgrs, mgrsPrecision, options.keyword, timeStamp, c=c, dbh=dbh, baselinePrecision=baselinePrecision)

    # Does the baseline document need updating?
    if base.needUpdate == True:
        
        # This method takes care of update and insert
        base.processBaseline(tsd.buildBlankData())
        
    try:
        mdb.close(c, dbh)
    except:
        pass
Exemplo n.º 33
0
    def testGetCurrentTags(self):
        
        import mdb
        cwd = os.getcwd()
        parent = os.path.dirname(cwd)
        cfgs = os.path.join(parent, 'config/twitterCrowded.cfg')
        p = getConfigParameters(cfgs)

        # The mongo bits
        try:
            c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
            evCollHandle = dbh[p.eventsCollection]    
        except:
            print "Failed to connect to mongo."
            sys.exit(1)

        res = cf.getCurrentTags(evCollHandle, 'shitbrick')
        self.assertEquals(res, ['hellworld','fuckhole', 'shitbrick'])

        # Quick test chucked in
        results = cf.getQueryBBox(evCollHandle)
        print results
Exemplo n.º 34
0
    def testGetAllCountForOneCell(self):
        ''' Gets a count for a single cell'''
        
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        tweetTime = datetime.datetime(2011,1,2,12,5,15)
        oldTweetTime = tweetTime - datetime.timedelta(seconds=11*60)
 
        # Build a keyword to represent the basekine
        kword = kw(keyword='keyword1', timeStamp=oldTweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()

        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1',
                           tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()


        # ALL DOCUMENTS
        mgrs    = '38SND4595706622'
        keyword = 'keyword1'
        
        # This indate represents when the baseline was run (12:10) minus the interest period (10 minutes)
        inDate = datetime.datetime(2011,1,2,12,0,0)
        results = bl.getResultsPerCell(dbh, collection='timeseries', mgrs=mgrs, keyword=keyword, inDate=inDate)

        self.assertEqual(len(results), 1)
Exemplo n.º 35
0
    def testBuildFullArray(self):
        '''Build a full array from a cursor result'''
        
        # Get a db handle
        c, dbh = mdb.getHandle()
        dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections
 
        # Build a keyword
        kword = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,1,2,12,1,1), lat=34.4, lon=45.5,
                           text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter')

        # New timeseries object
        ts = timeSeries()
        ts.importData(kword)
        success = ts.insertBlankDoc()
        
        # Insert the doc now that its been modified
        kword.timeStamp = datetime.datetime(2011,1,1,12,1,1) 
        ts = timeSeries()
        ts.importData(kword)
        
        success = ts.insertBlankDoc()
        
        # Last 1 weeks worth of documents
        resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', datetime.datetime(2011,1,2), 168)
        
        # Inputs
        inDate = datetime.datetime(2011, 1, 2, 0, 0)
        period = datetime.timedelta(days=7)
        flat = None
        
        dates, data = bl.buildFullArray(resultSet, inDate, period, flat)
        
        self.assertEquals(len(dates), 8)
        self.assertEquals(len(data), 8)        

        # Close the connection
        mdb.close(c, dbh)
Exemplo n.º 36
0
def getMediaByObjectId(p, objectId):
    ''' Gets a mongo doc back based on the object ID. Called by the display page. '''

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost,
                           port=p.dbPort,
                           db=p.db,
                           user=p.dbUser,
                           password=p.dbPassword)
    evCollHandle = dbh[p.eventsCollection]

    # The query into mongo that should only return 1 doc
    query = {'objectId': objectId}
    doc = evCollHandle.find(query)

    try:
        doc = [d for d in doc][0]
    except:
        print "No document matched your query. Object ID: %s." % objectId
        doc = None
    mdb.close(c, dbh)

    return doc
Exemplo n.º 37
0
def getCamsByGeo(p, lat, lon, radius, postcode=None):
    '''Get the cameras that fall within radius of lat/lon or postcode'''
    
    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword)
    camsCollHandle = dbh[p.camsCollection] 
    
    # Convert the incoming metres radius to degrees
    latRad, lonRad = radialToLinearUnits(lat)
    scale = (latRad+lonRad)/2.0
    radius = float(radius)/scale
    
    # Query mongo
    query = SON({'$near':[float(lon), float(lat)]})
    query['$maxDistance'] = radius
    res = camsCollHandle.find({'loc' : query})
    
    # Get results
    if res:
        results = [r for r in res]
    if len(results) == 0:
        results = None
        
    return results
Exemplo n.º 38
0
# - If so, break out the admin functions.
# - Look into using twisted as the framework or web server so that it is FAST to receive the POSTs
#   from external sources
#   Extract the instagram bits from this code.

#///////////////////////////////////////////////////////////////////////////////////////////////

os.chdir('/home/dotcloud/current/')
cwd = os.getcwd()
cfgs = os.path.join(cwd, 'config/crowded.cfg')
p = getConfigParameters(cfgs)

# The mongo bits
c, dbh = mdb.getHandle(host=p.dbHost,
                       port=p.dbPort,
                       db=p.db,
                       user=p.dbUser,
                       password=p.dbPassword)
evCollHandle = dbh[p.eventsCollection]

logFile = os.path.join(p.errorPath, p.errorFile)
logging.basicConfig(filename=logFile,
                    format='%(levelname)s:: \t%(asctime)s %(message)s',
                    level='DEBUG')

#------------------------------------------------------------------------------------------------


def on_error(errFile='errors.txt', message=None):
    ''' Handles an error message '''
Exemplo n.º 39
0
port = 27017
db = 'bam'
collection = 'timeseries'
lookback = datetime.timedelta(days=5)
queryEnd = datetime.datetime(2011,05,04)
queryStart = queryEnd - lookback
mgrs = None
mgrsPrecision = None
blankDay = buildBlankData()

#========================

print queryStart, queryEnd

# Get a mongo db handle
c, dbh = mdb.getHandle(host=host, port=port, db=db)

# Get a collection handle
collHandle = dbh[collection]

# Query based on a keyword only
keyword = 'sick'

query = {'keyword':keyword,
         'start':{'$gte' : queryStart},
         'start':{'$lte'  : queryEnd}}

if mgrs:
    query['mgrs'] = mgrs
if mgrsPrecision:
    query['mgrsPrecision'] = mgrsPrecision
Exemplo n.º 40
0
def main(configFile=None):
    ''' Coordinates the management functions
        Command line called, typically from a CRON.'''

    # Get the config file
    p = getConfigParameters(configFile)

    # Logging config
    logFile = os.path.join(p.errorPath, p.errorFile)
    logging.basicConfig(filename=logFile,
                        format='%(levelname)s:: \t%(asctime)s %(message)s',
                        level=p.logLevel)

    # Streaming client
    connClientPath = os.path.dirname(p.errorPath)
    p.streamClient = os.path.join(connClientPath, 'src/connectionClient.py')

    # The mongo bits
    try:
        c, dbh = mdb.getHandle(host=p.dbHost,
                               port=p.dbPort,
                               db=p.db,
                               user=p.dbUser,
                               password=p.dbPassword)
        evCollHandle = dbh[p.eventsCollection]
        mgmtCollHandle = dbh[p.mgmtCollection]
        logging.debug("Connected and authenticated on the db.")
    except:
        logging.critical('Failed to connect to db and authenticate.',
                         exc_info=True)
        sys.exit()

    # Create a new management document if needed
    initialOID = setInitialPid(mgmtCollHandle)

    # Get the current events from crowded
    crowdedEvents = getCrowdedEvents(p)

    # Get the events currently stored by this app
    myEvents = getLocalEvents(p, evCollHandle)

    # Compare the 2 sets of events: what's old and new?
    oldEvents, newEvents = checkEvents(crowdedEvents, myEvents)

    # Expire old events from db, so that the new stream reflects the correct interest
    for oldEvent in oldEvents:
        print oldEvent
        logging.debug('Expiring Old Event in DB: %s' % (oldEvent))
        res = expireOldEvent(evCollHandle, oldEvent)

    # Create new item in the db
    for newEvent in newEvents:
        logging.debug('Creating New Event in DB: %s' % (newEvent))
        res = createLocalEvent(evCollHandle, newEvent)

    # Get the old process ID and kill it off
    pid = getPid(mgmtCollHandle)
    logging.debug('Current PID: %s' % (pid))

    # Only continue if there is a change in the events
    if len(oldEvents) > 0 or len(newEvents) > 0:

        if pid:
            logging.debug('Killing old process with ID: %s' % (pid))
            res = killOldProcess(pid)

        # Now create the new one
        newPid = processNewEvent(p)
        logging.debug('Creating a new process with PID: %s' % (newPid))

        # Update the current process id in mongo
        res = storePid(mgmtCollHandle, newPid)
        logging.debug('Stored the new PID: %s' % (res))

    mdb.close(c, dbh)
    logging.shutdown()
Exemplo n.º 41
0
def main(p, response):
    '''Handles the subscription updates, including making the call to the endpoint and dumping to jms/text.'''

    # The mongo bits
    c, dbh = mdb.getHandle(host=p.dbHost,
                           port=p.dbPort,
                           db=p.db,
                           user=p.dbUser,
                           password=p.dbPassword)
    evCollHandle = dbh[p.eventsCollection]

    # Accepts a list of dictionaries - the update message
    updates = json.loads(response)

    # Format the url and get the media metadata
    for upd in updates:

        objectId = upd['object_id']
        objectType = upd['object']
        # Does the next URL already exist for this object?
        #url = getNextUrl(p, objectId)

        # If the next (ie this) url hasn't been written to a file, build it from the config file
        #if url == None or len(url) < 1:
        url = buildUrl(p, objectType, objectId)

        # Get the media that has changed since the last time
        mediaMeta = getMediaUpdates(url)

        # Find the pagination info and save out info that concerning next url for this subscription
        #handleMediaPagination(p, url, objectId, mediaMeta)

        # Get the last insert time
        lastUpdated = retrieveLatestImage(evCollHandle, objectId)
        latest = time.mktime(lastUpdated.timetuple())
        newLatest = time.mktime(lastUpdated.timetuple())

        # Update the tags and urls arrays
        if mediaMeta and mediaMeta.has_key('data'):
            #print "Number of Images:", len(mediaMeta['data'])
            for photo in mediaMeta['data']:

                # Append the datetime information
                try:
                    epochTime = float(photo['created_time'])
                    dt = datetime.datetime.fromtimestamp(epochTime)
                except Exception, e:
                    print e

                # For recent images
                if epochTime > latest:
                    # Update the list of images stored
                    res = updateImageUrls(evCollHandle, objectId, photo, dt)
                    # Update the tag information
                    res = updateTags(evCollHandle, objectId, photo)

                # Get the latest image datetime
                if epochTime > newLatest:
                    #print "improving newLatest", epochTime, newLatest
                    newLatest = epochTime

            # Update the latest datetime on record
            updateTimeStamp = datetime.datetime.fromtimestamp(newLatest)
            updateLatestInfo(evCollHandle, objectId, updateTimeStamp)
Exemplo n.º 42
0
def main(): 
    '''
    Script to build tweet objects from the VAST dataset and place them on a Queue and/or JMS
    for testing purposes.
    
    LIKELY SPEED IMPROVEMENTS:
    - BUILDING BLANK ARRAYS IN THE TIME SERIES TAKES A WHILE
    - PUTTING THE KEYWORDS IN A QUEUE, HAVING SET UP THE THREADS TO PROCESS EACH ONE.
    - ANY DUPLICATION CHECKS?
    
    
    
    '''
    
    start = datetime.datetime.utcnow()
    tweetProcessTimes = datetime.timedelta(seconds=0)
    
    #dripRate = 1.5
    
    # JMS destination
    destination = '/topic/test.vasttweets'
    hostIn      = 'localhost'
    portIn      = 61613

    # Reset the collections
    c, dbh = mdb.getHandle()
    dbh = mdb.setupCollections(dbh, dropCollections=True)         # Set up collections

    #jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True)
    # Make the JMS connection via STOMP and the jmsCode class
    #jms.connect()
     
    path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/"
    #fName= "MicroblogsSample.csv"
    fName= "Microblogs.csv"
    outFName = "MicroblogsOrdered.csv"
    
    f = retrieveFile(path, fName)
    fo = open(os.path.join(path, outFName), 'w')
    
    x = 0
    
    # Start time
    earliestTweet = datetime.datetime(2011, 5, 18, 13, 25)
    earliestTweet = time.mktime(time.struct_time(earliestTweet.timetuple()))
    lastTweetTime = earliestTweet
    print "First Tweet Time: ", lastTweetTime
    
    # This speeds things up from seconds to minutes
    speedUpRate = 60.0
    records = []
    
    # Loop the lines build tweet objects
    for line in f.readlines():
        
        #print line
        # Extract content from each line
        line = line.rstrip('\r').rstrip('\n').rstrip('\r')
        
        if x == 0:
            x+=1
            continue
        
        if x % 1000 == 0:
            print "processed: ", x
        #if x > 1000:
        #    break
        #    sys.exit(0)
            
        line = line.split(',')
        
        tweetId, dt, latLon, text = line
        
        # Get the datetime group into seconds since UNIX time
        dtg = getTime(tweetId, dt)

        if not dtg:
            continue

        record = [tweetId, dtg, latLon, text]
        records.append(record)
        
        x += 1
    
    f.close()
    
    sortedTable = sortTable(records, col=1)

    
    # Now loop the sorted list and write out to a new file
    for record in sortedTable:
        

        lineOut = "%s,%s,%s,%s\n" %(record[0], record[1], record[2], record[3])
        
        fo.write(lineOut)
    
    f.close()
Exemplo n.º 43
0
def main(p, mediaOnly=None):
    ''' Coordinates a new twitter stream connection'''

    # Logging config
    logFile = os.path.join(p.errorPath, p.connErrorFile)
    logging.basicConfig(filename=logFile,
                        format='%(levelname)s:: %(asctime)s %(message)s',
                        level=p.logLevel)

    # The mongo bits
    try:
        c, dbh = mdb.getHandle(host=p.dbHost,
                               port=p.dbPort,
                               db=p.db,
                               user=p.dbUser,
                               password=p.dbPassword)
        evCollHandle = dbh[p.eventsCollection]
    except:
        logging.critical('Failed to connect to db and authenticate.',
                         exc_info=True)
        sys.exit()

    # Here's the redis queue for managing the tweets as they come in
    try:
        q = RedisQueue(p.redisName,
                       host=p.redisHost,
                       password=p.redisPassword,
                       port=p.redisPort,
                       db=0)
    except:
        logging.critical("REDIS: Failed to connect in connectionClient.py. ",
                         exc_info=True)
        sys.exit()

    # Connection placeholder in case the exception catches the drop out
    connection = True

    while connection == True:

        # Get the existing tags and add the current
        try:
            tags = cf.getCurrentTags(evCollHandle)
        except:
            tags = None
            logging.error('Failed to get current tags from db.', exc_info=True)

        # Build the building boxes
        try:
            bboxes = cf.getCurrentBBoxes(evCollHandle)
        except:
            bboxes = None
            logging.error('Failed to get current BBOXes from db.',
                          exc_info=True)

        if not tags and not bboxes:
            logging.warning('Currently no tags or bboxes in the db.')
            sys.exit()

        try:
            print tags, bboxes
            with tweetstream.FilterStream(p.sourceUser,
                                          p.sourcePassword,
                                          track=tags,
                                          locations=bboxes) as stream:
                for tweet in stream:
                    if mediaOnly:
                        try:
                            q.put(json.dumps(tweet))
                        except:
                            logging.critical(
                                "Failed to put tweet on redis. This tweet: \n%s"
                                % (tweet),
                                exc_info=True)

        except tweetstream.ConnectionError:
            logging.critical("Disconnected from twitter", exc_info=True)