def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() print "got DC environment settings." reParams = getRedisEnvironment() print "got redis environment settings." # Authenticate on the admin db try: c, adminDbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass) print 'got handle' except: print "Failed to get handle under admin." # Authentication of the administrator #try: # auth = adminDbh.authenticate(dcParams.adminUser, dcParams.adminPass) #except Exception, e: # print "Failed to authenticate with mongo db." # print e # Create a new user p = getConfigParameters(configFile) # Switch the database handle to that being used from the admin one dbh = c[p.db] success = dbh.add_user(p.dbUser, p.dbPassword) c.disconnect() try: # Authenticate on the admin db c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword) print 'Connected to the normal db: %s' %(p.db) except: logging.critical("Failed to connect to db and get handle as user.", exc_info=True) sys.exit() # Write out the new information to the regular config file try: writeConfigFile(configFile, dcParams) print 'Writing out mongo config info.' writeConfigFileRedis(configFile, reParams) print 'Writing out redis config' except: logging.critical("Failed in writing params back to config file.", exc_info=True) mdb.close(c, dbh)
def testInsertBaselineDoc(self): ''' Inserts a completed baseline document into the baseline collection.''' # Connect and get handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Build a keyword object testKywd = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,6,22,12,10,45), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Instantiate the baseline object/class baseLine = bl.baseline(kywd=testKywd,cellBuildPeriod=600) # Build the document and insert it doc = baseLine.buildDoc() bl.insertBaselineDoc(dbh, doc) res = dbh.baseline.find()[0] print res self.assertEquals(res['keyword'], 'keyword1') self.assertEquals(res['mgrs'], '38SND4595706622') self.assertEquals(res['mgrsPrecision'], 10) # Close the connection mdb.close(c, dbh)
def __init__(self, kywd, c=None, dbh=None, host=None, port=None, db=None, baselinePrecision=[]): ''' Constructor. ''' # Connection for this baseline class if not c and not dbh: # Get a connection to the db self.c, self.dbh = mdb.getHandle(host=host, port=port, db=db) else: self.c, self.dbh = c, dbh self.collection = 'timeseries' # Bad, bad version of inheriting keyword - NEED TO FIX THIS self.keyword = kywd.keyword self.timeStamp = kywd.timeStamp self.mgrs = kywd.mgrs self.mgrsPrecision = kywd.mgrsPrecision # Has this keyword ever been seen before at this location? self.outputs = {} # When was the last baseline processed (units = hour or minute or second; precision = int val) # Or... what was the most recent baseline run - parameters come from top level unit, precision = baselinePrecision self.baselineTimeStamp = truncateTimeStamp(self.timeStamp, unit, precision) # Checks to see whether it exists at all or if it needs updating. # Outside the object, this is used to decide whether to continue with the processing self.needUpdate = self.needUpdating()
def main(cleanup, ageOff, protectedSubs=None): ''' Coordinates which element to cleanup - subscriptions including remote delete of subs from instagram or event data - getting rid of the documents that contain the media and metadata''' # If there is a command to remove or keep protected subs, use it if protectedSubs: protectedSubs = bool(protectedSubs) # Get the config information os.chdir('/home/dotcloud/code/') cwd = os.getcwd() cfgs = os.path.join(cwd, 'config/crowded.cfg') p = getConfigParameters(cfgs) # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) # Whether to cleanup the subscriptions and subscription collection if cleanup == 'subs': subsCollHandle=dbh[p.subsCollection] res = ageOffSubscriptions(p, subsCollHandle, ageOff=ageOff, protectedSubs=protectedSubs) print datetime.datetime.utcnow(), res # Or the events metadata collection elif cleanup == 'events': evCollHandle=dbh[p.eventsCollection] res = ageOffMetadata(evCollHandle, ageOff=ageOff, protectMedia=protectedSubs) print datetime.datetime.utcnow(), res else: print 'Provide either subs or events as the first argument depending on what you want to clean up.'
def main(): """ """ # Connect and get db and collection handle try: c, dbh = mdb.getHandle(p.dbHost, p.dbPort, p.db, p.dbUser, p.dbPassword) collectionHandle = dbh[p.camsCollection] except: logging.critical("DB connection Failed", exc_info=True) # Get the feed content feedContent = hitFeed(p.feedUrl) if not feedContent: logging.critical("** SCRIPT EXIT **\n%s\n\n" % ("=" * 52)) sys.exit() # Break out the content into head and items header, rootUrl, cameras = extractContent(feedContent) if not header or not rootUrl or not cameras: logging.critical("** SCRIPT EXIT **\n%s\n\n" % ("=" * 52)) sys.exit() # Build the camera root URL rootUrl = p.tflDomain + rootUrl fc = feedChannel(header) # Deal with each of the items for camera in cameras: # Build an 'item' object based on the RSS item item = feedItem(fc, camera, rootUrl) item.buildGeoJson() # Insert the document into mongo response = mongoInsert(collectionHandle, item)
def testUpdateDocument(self): ''' Function updates/increments a specific hour.minute in a document. ''' # Get connection to mongo c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # New timeseries object with data ts = timeSeries() ts.importData(self.kw, blockPrecision=24) success = ts.insertBlankDoc() self.assertEquals(success, 1) # Update/increment a specific hour.minute ts.updateCount() # Run a query for this item outDocs = dbh.timeseries.find({'data.12.1':1}) for doc in outDocs: print doc self.assertEquals(doc['mgrs'], '38SND4595706622') # Close the connection mdb.close(c, dbh)
def MongoLookup(self): ''' Fn checks whether a timeseries document already exists for this period. ''' # Get connection to mongo c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # New timeseries object with data ts = timeSeries() ts.importData(self.kw, blockPrecision=1) # Check the count - should be 0 before the doc gets inserted count = ts.mongoLookup() self.assertEquals(count, 0) # Build and insert a new mongo formatted document success = ts.insertBlankDoc() # Count should be 1 now that the document has been inserted count = ts.mongoLookup() self.assertEquals(count, 1) # Clean up, remove he content and close the connection #dbh.timeseries.remove() mdb.close(c, dbh)
def main(): # Config file parameters pathIn = '/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/' fileIn = 'preAllocateTimeSeries.cfg' # Get parameters from config p = params(pathIn, fileIn) # Connect and get db and collection handle c, dbh = mdb.getHandle(p.host, p.port, p.db) collectionHandle = dbh[p.coll] # Current datetime #today = datetime.datetime.utcnow().replace(hour=0, minute=0,second=0,microsecond=0) today = datetime.datetime(2011,5,1) # Build some blank data blankDataArr = buildBlankData() # A list to hold the timeseries we need to pre-allocate for # Get pairs to be pre-allocated from yesterday - lookback is in days if 'yesterday' in p.baselineTypes: preAllocate, minCount, maxCount = getCommonMgrsAndKeyword(collectionHandle, p.mgrsPrecision, p.nothingFoundKeyword, today, lookback=1) # Now loop the keyword/mgrs pairs and build new timeseries documents for today for item in preAllocate: response = insertDoc(collectionHandle, item['mgrs'], p.mgrsPrecision, item['keyword'], 'twitter', today, buildBlankData()) mdb.close(c, dbh)
def testGetCurrentTags(self): import mdb cwd = os.getcwd() parent = os.path.dirname(cwd) cfgs = os.path.join(parent, 'config/twitterCrowded.cfg') p = getConfigParameters(cfgs) # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] except: print "Failed to connect to mongo." sys.exit(1) res = cf.getCurrentTags(evCollHandle, 'shitbrick') self.assertEquals(res, ['hellworld', 'fuckhole', 'shitbrick']) # Quick test chucked in results = cf.getQueryBBox(evCollHandle) print results
def testlastBaselined(self): ''' Builds a baseline document for inserting.''' # Connect and get handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Build a keyword object testKywd = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,6,22,12,10,45), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # Create a new baseline object baseLine = bl.baseline(kywd=testKywd, cellBuildPeriod=600) baseLine.outputs['days30_all'] = 0.5 baseLine.outputs['days7_all'] = 0.4 baseLine.outputs['hrs30_all'] = 0.3 baseLine.outputs['days30_weekly'] = 0.2 baseLine.outputs['days7_daily'] = 0.1 doc = baseLine.buildDoc() bl.insertBaselineDoc(dbh, doc) # Method returns the date of last baseline calculation lastBaseline = baseLine.lastBaselined() self.assertEquals(lastBaseline, datetime.datetime(2011,6,22,12,10)) # Close the connection mdb.close(c, dbh)
def __init__(self, c=None, dbh=None, host=None, port=None, db=None): ''' instantiate the object and attributes. ***NOT TESTED*** ''' if not c and not dbh: # Get a connection to the db self.c, self.dbh = mdb.getHandle(host=host, port=port, db=db) else: self.c, self.dbh = c, dbh
def main(p, mediaOnly=None): """ Coordinates a new twitter stream connection""" # Logging config logFile = os.path.join(p.errorPath, p.connErrorFile) logging.basicConfig(filename=logFile, format="%(levelname)s:: %(asctime)s %(message)s", level=p.logLevel) # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] except: logging.critical("Failed to connect to db and authenticate.", exc_info=True) sys.exit() # Here's the redis queue for managing the tweets as they come in try: q = RedisQueue(p.redisName, host=p.redisHost, password=p.redisPassword, port=p.redisPort, db=0) except: logging.critical("REDIS: Failed to connect in connectionClient.py. ", exc_info=True) sys.exit() # Connection placeholder in case the exception catches the drop out connection = True while connection == True: # Get the existing tags and add the current try: tags = cf.getCurrentTags(evCollHandle) except: tags = None logging.error("Failed to get current tags from db.", exc_info=True) # Build the building boxes try: bboxes = cf.getCurrentBBoxes(evCollHandle) except: bboxes = None logging.error("Failed to get current BBOXes from db.", exc_info=True) if not tags and not bboxes: logging.warning("Currently no tags or bboxes in the db.") sys.exit() try: print tags, bboxes with tweetstream.FilterStream(p.sourceUser, p.sourcePassword, track=tags, locations=bboxes) as stream: for tweet in stream: if mediaOnly: try: q.put(json.dumps(tweet)) except: logging.critical("Failed to put tweet on redis. This tweet: \n%s" % (tweet), exc_info=True) except tweetstream.ConnectionError: logging.critical("Disconnected from twitter", exc_info=True)
def main(configFile, subscriptionType, source): ''' Coordinates the retrieval of public CCTV camera URLs to crowded. ''' # Get the config information into a single object p = getConfigParameters(configFile) #//////////////////////////////////////////////////////// if source == 'cctv': url = p.tflUrl elif source == 'youtube': url = p.socialiseUrl elif source == 'flickr': url = p.viewFinderUrl # More sources here and adds to the config file #//////////////////////////////////////////////////////// # Mongo connection parameters c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) collHandle = dbh['subs'] evCollHandle = dbh['events'] # Get the active subs activeSubs = getActiveSubs(collHandle, type=subscriptionType) # Barf at this point if there's nothing in subs if not activeSubs or len(activeSubs) < 1: mdb.close(c, dbh) return None # For each active active subscription, query by geography for aSub in activeSubs: print 'ASUB:', aSub if subscriptionType == 'geography': lon, lat = aSub['loc'] radius = float(aSub['radius']) media = queryByGeo(url, lat, lon, radius) elif subscriptionType == 'tag': tag = aSub['objectId'] media = queryByTag(url, tag) # For each of the images, update the correct event url list for image in media: # Mod the datetime into a python dt try: img = datetime.datetime.strptime(image['captured'], "%Y-%m-%dT%H:%M:%S") except Exception, e: img = datetime.datetime.strptime(image['published'], "%Y-%m-%dT%H:%M:%S") image['dt'] = img success = updateEvents(evCollHandle, aSub['objectId'], image) if success == None: print "Failed to update event ID '%s' with media: \n %s" %(aSub['objectId'], image)
def getMongoHandles(p): ''' Gets the mongo connection handle, authentication and the collection handle. ''' # Handles the mongo connections c, dbh = mdb.getHandle(db=p.db, host=p.dbHost, port=p.dbPort) # Authentication try: auth = dbh.authenticate(p.dbUser, p.dbPassword) except Exception, e: print "Failed to authenticate with mongo db." print e
def __init__(self): ''' Sets up the config information, database connection and builds a blank data array for easy inserting. ''' # Reads in a load of config information path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config/" file = "keywordListenerConfig.cfg" self.getConfig(path, file) # Handles the mongo connections self.c, self.dbh = mdb.getHandle(db=self.db, host=self.host, port=self.port) # Build a blank timeseries array to save it being built everytime self.blankData = buildBlankData(hours=24)
def main(p): # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] except: logging.critical('Failed to connect and authenticate', exc_info=True) sys.exit() # Get the current tags tags = cf.getCurrentTags(evCollHandle) # Get the current bounding boxes queryBBoxes = cf.getQueryBBox(evCollHandle) x = 1 while x == 1: # Here's the redis queue for managing the tweets as they come in try: q = RedisQueue(p.redisName, host=p.redisHost, password=p.redisPassword, port=p.redisPort, db=0) except: logging.error('Failed to connect to REDIS db.', exc_info=True) sys.exit() # This call is blocking, so expect it to hang on this point tweetStr = q.get() tweet = json.loads(tweetStr) # Work out which object/event this tweet is associated with if tags: tweetTags = cf.whichTags(tags, tweet) for tweetTag in tweetTags: success = dispatchTweet(p, tweet, tweetTag) logging.debug("Tag-based message dispatched: %s" % (success)) if queryBBoxes: tweetGeos = cf.matchesCurrentGeos(queryBBoxes, tweet) for tweetGeo in tweetGeos: success = dispatchTweet(p, tweet, tweetGeo) logging.debug("Geo-based message dispatched: %s" % (success))
def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() logging.info("Mongo Params:\n%s\n%s\n%s\n%s" %(dcParams.mongoHost, dcParams.mongoPort, dcParams.adminUser, dcParams.adminPass)) # Authenticate on the admin db try: c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass) except: logging.critical('Failed to connect to database as admin.') sys.exit() # Create a new user p = getConfigParameters(configFile) # Switch the database handle to that being used from the admin one dbh = c[p.db] success = dbh.add_user(p.dbUser, p.dbPassword) c.disconnect() try: # Authenticate on the admin db c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.critical("Failed to connect to db and get handle as user.", exc_info=True) sys.exit() # Write out the new information to the regular config file try: writeConfigFile(configFile, dcParams) except: logging.critical("Failed in writing params back to config file.", exc_info=True) mdb.close(c, dbh)
def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() # Authenticate on the admin db try: c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin', user=dcParams.adminUser, password=dcParams.adminPass) except: logging.critical('Failed to connect to database as admin.') sys.exit() # Create a new user p = getConfigParameters(configFile) # Switch the database handle to that being used from the admin one dbh = c[p.db] success = dbh.add_user(p.dbUser, p.dbPassword) c.disconnect() try: # Authenticate on the admin db c, dbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.critical("Failed to connect to db and get handle as user.", exc_info=True) sys.exit() # Write out the new information to the regular config file try: writeConfigFile(configFile, dcParams) print "----- writing out new config parameters." except: logging.critical("Failed in writing params back to config file.", exc_info=True) mdb.close(c, dbh)
def main(configFile=None): ''' Builds the collections and indexes needed. ''' # Get the config information into a single object p = getConfigParameters(configFile) # Get a db handle if p.verbose==True: print "---- Geting Mongo Handle." c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db) try: auth = dbh.authenticate(p.dbUser, p.dbPassword) except Exception, e: print "Failed to authenticate with mongo db." print e
def getMongoHandles(p): ''' Gets the mongo connection handle, authentication and the collection handle. ''' # Handles the mongo connections c, dbh = mdb.getHandle(db=p.db, host=p.dbHost, port=p.dbPort) # Authentication try: auth = dbh.authenticate(p.dbUser, p.dbPassword) except: logging.warning("Failed to authenticate with mongo db.") collHandle = dbh[p.slangCollection] emoCollHandle = dbh[p.emoCollection] return c, dbh, collHandle, emoCollHandle
def testBuildFullArrayFlat(self): '''Build a full FLATTENED array from a cursor result''' st = datetime.datetime.utcnow() # A keyword that went in yesterday creates a timeseries yesterday nowDt = datetime.datetime(year=2011,month=1,day=12,hour=11,minute=1,second=1) oneDay= datetime.timedelta(days=1) # Get a db handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # Build a keyword kword = kw(keyword='keyword1', timeStamp=nowDt-oneDay, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Insert 2ND DOC IN THE COLLECTION kword.timeStamp = nowDt ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() nowDate = nowDt.replace(hour=0,minute=0,second=0,microsecond=0) # Last 1 weeks worth of documents resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', nowDate, 168) # Close the connection mdb.close(c, dbh) # Inputs period = datetime.timedelta(days=7) dates, data = bl.buildFullArray(resultSet, nowDate, period, 1) firstDay = dates[0] lastDay = dates[-1] self.assertEquals(data.shape[0], 11520) self.assertEquals(firstDay, nowDate - period) self.assertEquals(lastDay, nowDate)
def main(configFile=None): ''' Takes the dotcloud default admin privs, authorises on the db, creates the user I've specified and returns. ''' # Get the parameters that were set up by dotcloud dcParams = getEnvironment() print dcParams.adminUser, dcParams.adminPass # Authenticate on the admin db c, adminDbh = mdb.getHandle(host=dcParams.mongoHost, port=dcParams.mongoPort, db='admin') print 'got handle' # Authentication of the administrator try: auth = adminDbh.authenticate(dcParams.adminUser, dcParams.adminPass) except Exception, e: print "Failed to authenticate with mongo db." print e
def main(configFile=None): ''' Builds the collections and indexes needed. ''' # Get the config information into a single object p = getConfigParameters(configFile) try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.warning("Failed to connect to db and get handle.", exc_info=True) # The collections provided and create them and their indexes for coll in p.collections: collHandle = buildCollection(dbh, p, coll['collection']) indexes = buildIndexes(p, coll, collHandle) mdb.close(c, dbh)
def main(configFile=None): ''' Builds the collections and indexes needed. ''' # Get the config information into a single object p = getConfigParameters(configFile) try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) except: logging.warning("Failed to connect to db and get handle.", exc_info=True) # The collections provided and create them and their indexes for coll in p.collections: print "Building Collections and indexes: %s" %coll collHandle = buildCollection(dbh, p, coll['collection']) indexes = buildIndexes(p, coll, collHandle) mdb.close(c, dbh)
def testGetAllCountForOneCellLookback(self): ''' Gets a count for a single cell''' tweetTime = datetime.datetime(2011,1,2,12,5,15) oldTweetTime = tweetTime - datetime.timedelta(seconds=15*60) baselineTime = datetime.datetime(2011,1,2,12,0,0) # Get a db handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # Build a keyword kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Last 2 documents lookback = 24 mgrs = '38SND4595706622' qKeyword = 'keyword1' res = bl.getResultsPerCell(dbh, collection='timeseries', mgrs=mgrs, keyword=qKeyword, inDate=baselineTime, lookback=lookback) print res results = [] for doc in res: print doc results.append(doc) self.assertEqual(len(results), 1) # Close the connection mdb.close(c, dbh)
def InsertBlankDoc(self): ''' Checks the successful inserting of a mongo document ''' # Get connection to mongo c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # New timeseries object with data ts = timeSeries() ts.importData(self.kw, blockPrecision=1) # Build and insert a new mongo formatted document success = ts.insertBlankDoc() self.assertEquals(success, 1) # Clean up and drop it #dbh.timeseries.remove() # Close the connection mdb.close(c, dbh)
def getEvents(p): ''' Returns all currently active events in mongo ''' # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] try: docs = evCollHandle.find( fields=['objectId', 'subType', 'start', 'loc', 'radius']) docsOut = [d for d in docs] except: print "No documents matched your query. Object ID: %s." % objectId docsOut = [] mdb.close(c, dbh) # Additional fields that might be useful for doc in docsOut: # Get rid of the mongo ID _id = doc.pop('_id') if doc.has_key('loc'): # calculate the radius in metres latScale, lonScale = radialToLinearUnits(float(doc['loc'][1])) scale = (latScale + lonScale) / 2.0 doc['radius_m'] = int(doc['radius'] * scale) # Calculate the top left, bottom right s = doc['loc'][1] - doc['radius'] w = doc['loc'][0] - doc['radius'] n = doc['loc'][1] + doc['radius'] e = doc['loc'][0] + doc['radius'] doc['bbox'] = [[w, s], [e, n]] return docsOut
def main(p): # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] except: logging.critical('Failed to connect and authenticate', exc_info=True) sys.exit() # Get the current tags tags = cf.getCurrentTags(evCollHandle) # Get the current bounding boxes queryBBoxes = cf.getQueryBBox(evCollHandle) x = 1 while x == 1: # Here's the redis queue for managing the tweets as they come in try: q = RedisQueue(p.redisName, host=p.redisHost, password=p.redisPassword, port=p.redisPort, db=0) except: logging.error('Failed to connect to REDIS db.', exc_info=True) sys.exit() # This call is blocking, so expect it to hang on this point tweetStr = q.get() tweet = json.loads(tweetStr) # Work out which object/event this tweet is associated with if tags: tweetTags = cf.whichTags(tags, tweet) for tweetTag in tweetTags: success = dispatchTweet(p, tweet, tweetTag) logging.debug("Tag-based message dispatched: %s" %(success)) if queryBBoxes: tweetGeos = cf.matchesCurrentGeos(queryBBoxes, tweet) for tweetGeo in tweetGeos: success = dispatchTweet(p, tweet, tweetGeo) logging.debug("Geo-based message dispatched: %s" %(success))
def main(cleanup, ageOff, protectedSubs=None): ''' Coordinates which element to cleanup - subscriptions including remote delete of subs from instagram or event data - getting rid of the documents that contain the media and metadata''' # If there is a command to remove or keep protected subs, use it if protectedSubs: protectedSubs = bool(protectedSubs) # Get the config information os.chdir('/home/dotcloud/code/') cwd = os.getcwd() cfgs = os.path.join(cwd, 'config/crowded.cfg') p = getConfigParameters(cfgs) # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) # Whether to cleanup the subscriptions and subscription collection if cleanup == 'subs': subsCollHandle = dbh[p.subsCollection] res = ageOffSubscriptions(p, subsCollHandle, ageOff=ageOff, protectedSubs=protectedSubs) print datetime.datetime.utcnow(), res # Or the events metadata collection elif cleanup == 'events': evCollHandle = dbh[p.eventsCollection] res = ageOffMetadata(evCollHandle, ageOff=ageOff, protectMedia=protectedSubs) print datetime.datetime.utcnow(), res else: print 'Provide either subs or events as the first argument depending on what you want to clean up.'
def main(): ''' Builds the collections and indexes needed for the bam mongo work. # See also /src/tests/testMdb for full tests of the base functions. ''' path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/config" #path = 'home/dotcloud/code/config/' file = "mongoSetup.cfg" params = getConfig(path,file) # Get a db handle if params.verbose==True: print "Get Mongo Handle." c, dbh = mdb.getHandle(host=params.host, port=params.port, db=params.db) # Set up collections if params.verbose==True: print "Setup the mongo collections." mdb.setupCollections(c, dbh, params.db, params.collections, params.dropDb) # Get the collection handles timeSeriesHandle = dbh[params.timeseries] baselineHandle = dbh[params.baseline] alertsHandle = dbh[params.alerts] mappingHandle = dbh[params.mapping] # Set up the indexes on the collections if params.verbose==True: print "Setup the mongo indexes." setupTimeseriesIndexes(timeSeriesHandle, dropIndexes=params.dropIdx) setupAlertsIndexes(alertsHandle, dropIndexes=params.dropIdx) setupBaselineIndexes(baselineHandle, dropIndexes=params.dropIdx) # Close the connection if params.verbose==True: print "Closing the connection." mdb.close(c, dbh)
def main(): # Should really move these to being parser = OptionParser() parser.add_option("-H", "--host", dest="host") parser.add_option("-p", "--port", dest="port") parser.add_option("-d", "--db", dest="db") parser.add_option("-m", "--mgrs", dest="mgrs") parser.add_option("-M", "--mgrsprecision", dest="mgrsPrecision") parser.add_option("-t", "--timestamp", dest="timeStamp") parser.add_option("-k", "--keyword", dest="keyword") parser.add_option("-u", "--baselineUnit", dest="baselineUnit") parser.add_option("-v", "--baselineValue", dest="baselineValue") (options, args) = parser.parse_args() # Format the option inputs = these really should be arguments port = int(options.port) timeStamp = datetime.datetime.strptime(options.timeStamp, "%Y-%m-%dT%H:%M:%S") mgrsPrecision = int(options.mgrsPrecision) baselinePrecision = [options.baselineUnit, int(options.baselineValue)] c, dbh = mdb.getHandle(host=options.host, port=port, db=options.db) # Build the baseline objects as we go so that they can be updated at the end of the period. base = baseline(options.mgrs, mgrsPrecision, options.keyword, timeStamp, c=c, dbh=dbh, baselinePrecision=baselinePrecision) # Does the baseline document need updating? if base.needUpdate == True: # This method takes care of update and insert base.processBaseline(tsd.buildBlankData()) try: mdb.close(c, dbh) except: pass
def testGetCurrentTags(self): import mdb cwd = os.getcwd() parent = os.path.dirname(cwd) cfgs = os.path.join(parent, 'config/twitterCrowded.cfg') p = getConfigParameters(cfgs) # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] except: print "Failed to connect to mongo." sys.exit(1) res = cf.getCurrentTags(evCollHandle, 'shitbrick') self.assertEquals(res, ['hellworld','fuckhole', 'shitbrick']) # Quick test chucked in results = cf.getQueryBBox(evCollHandle) print results
def testGetAllCountForOneCell(self): ''' Gets a count for a single cell''' c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections tweetTime = datetime.datetime(2011,1,2,12,5,15) oldTweetTime = tweetTime - datetime.timedelta(seconds=11*60) # Build a keyword to represent the basekine kword = kw(keyword='keyword1', timeStamp=oldTweetTime, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Build a keyword kword = kw(keyword='keyword1', timeStamp=tweetTime, lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # ALL DOCUMENTS mgrs = '38SND4595706622' keyword = 'keyword1' # This indate represents when the baseline was run (12:10) minus the interest period (10 minutes) inDate = datetime.datetime(2011,1,2,12,0,0) results = bl.getResultsPerCell(dbh, collection='timeseries', mgrs=mgrs, keyword=keyword, inDate=inDate) self.assertEqual(len(results), 1)
def testBuildFullArray(self): '''Build a full array from a cursor result''' # Get a db handle c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections # Build a keyword kword = kw(keyword='keyword1', timeStamp=datetime.datetime(2011,1,2,12,1,1), lat=34.4, lon=45.5, text='this text contained the hashtag #keyword1', tweetID=346664, userID=4444, source='twitter') # New timeseries object ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Insert the doc now that its been modified kword.timeStamp = datetime.datetime(2011,1,1,12,1,1) ts = timeSeries() ts.importData(kword) success = ts.insertBlankDoc() # Last 1 weeks worth of documents resultSet = bl.getResultsPerCell(dbh, '38SND4595706622', 'keyword1', datetime.datetime(2011,1,2), 168) # Inputs inDate = datetime.datetime(2011, 1, 2, 0, 0) period = datetime.timedelta(days=7) flat = None dates, data = bl.buildFullArray(resultSet, inDate, period, flat) self.assertEquals(len(dates), 8) self.assertEquals(len(data), 8) # Close the connection mdb.close(c, dbh)
def getMediaByObjectId(p, objectId): ''' Gets a mongo doc back based on the object ID. Called by the display page. ''' # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] # The query into mongo that should only return 1 doc query = {'objectId': objectId} doc = evCollHandle.find(query) try: doc = [d for d in doc][0] except: print "No document matched your query. Object ID: %s." % objectId doc = None mdb.close(c, dbh) return doc
def getCamsByGeo(p, lat, lon, radius, postcode=None): '''Get the cameras that fall within radius of lat/lon or postcode''' # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) camsCollHandle = dbh[p.camsCollection] # Convert the incoming metres radius to degrees latRad, lonRad = radialToLinearUnits(lat) scale = (latRad+lonRad)/2.0 radius = float(radius)/scale # Query mongo query = SON({'$near':[float(lon), float(lat)]}) query['$maxDistance'] = radius res = camsCollHandle.find({'loc' : query}) # Get results if res: results = [r for r in res] if len(results) == 0: results = None return results
# - If so, break out the admin functions. # - Look into using twisted as the framework or web server so that it is FAST to receive the POSTs # from external sources # Extract the instagram bits from this code. #/////////////////////////////////////////////////////////////////////////////////////////////// os.chdir('/home/dotcloud/current/') cwd = os.getcwd() cfgs = os.path.join(cwd, 'config/crowded.cfg') p = getConfigParameters(cfgs) # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] logFile = os.path.join(p.errorPath, p.errorFile) logging.basicConfig(filename=logFile, format='%(levelname)s:: \t%(asctime)s %(message)s', level='DEBUG') #------------------------------------------------------------------------------------------------ def on_error(errFile='errors.txt', message=None): ''' Handles an error message '''
port = 27017 db = 'bam' collection = 'timeseries' lookback = datetime.timedelta(days=5) queryEnd = datetime.datetime(2011,05,04) queryStart = queryEnd - lookback mgrs = None mgrsPrecision = None blankDay = buildBlankData() #======================== print queryStart, queryEnd # Get a mongo db handle c, dbh = mdb.getHandle(host=host, port=port, db=db) # Get a collection handle collHandle = dbh[collection] # Query based on a keyword only keyword = 'sick' query = {'keyword':keyword, 'start':{'$gte' : queryStart}, 'start':{'$lte' : queryEnd}} if mgrs: query['mgrs'] = mgrs if mgrsPrecision: query['mgrsPrecision'] = mgrsPrecision
def main(configFile=None): ''' Coordinates the management functions Command line called, typically from a CRON.''' # Get the config file p = getConfigParameters(configFile) # Logging config logFile = os.path.join(p.errorPath, p.errorFile) logging.basicConfig(filename=logFile, format='%(levelname)s:: \t%(asctime)s %(message)s', level=p.logLevel) # Streaming client connClientPath = os.path.dirname(p.errorPath) p.streamClient = os.path.join(connClientPath, 'src/connectionClient.py') # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] mgmtCollHandle = dbh[p.mgmtCollection] logging.debug("Connected and authenticated on the db.") except: logging.critical('Failed to connect to db and authenticate.', exc_info=True) sys.exit() # Create a new management document if needed initialOID = setInitialPid(mgmtCollHandle) # Get the current events from crowded crowdedEvents = getCrowdedEvents(p) # Get the events currently stored by this app myEvents = getLocalEvents(p, evCollHandle) # Compare the 2 sets of events: what's old and new? oldEvents, newEvents = checkEvents(crowdedEvents, myEvents) # Expire old events from db, so that the new stream reflects the correct interest for oldEvent in oldEvents: print oldEvent logging.debug('Expiring Old Event in DB: %s' % (oldEvent)) res = expireOldEvent(evCollHandle, oldEvent) # Create new item in the db for newEvent in newEvents: logging.debug('Creating New Event in DB: %s' % (newEvent)) res = createLocalEvent(evCollHandle, newEvent) # Get the old process ID and kill it off pid = getPid(mgmtCollHandle) logging.debug('Current PID: %s' % (pid)) # Only continue if there is a change in the events if len(oldEvents) > 0 or len(newEvents) > 0: if pid: logging.debug('Killing old process with ID: %s' % (pid)) res = killOldProcess(pid) # Now create the new one newPid = processNewEvent(p) logging.debug('Creating a new process with PID: %s' % (newPid)) # Update the current process id in mongo res = storePid(mgmtCollHandle, newPid) logging.debug('Stored the new PID: %s' % (res)) mdb.close(c, dbh) logging.shutdown()
def main(p, response): '''Handles the subscription updates, including making the call to the endpoint and dumping to jms/text.''' # The mongo bits c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] # Accepts a list of dictionaries - the update message updates = json.loads(response) # Format the url and get the media metadata for upd in updates: objectId = upd['object_id'] objectType = upd['object'] # Does the next URL already exist for this object? #url = getNextUrl(p, objectId) # If the next (ie this) url hasn't been written to a file, build it from the config file #if url == None or len(url) < 1: url = buildUrl(p, objectType, objectId) # Get the media that has changed since the last time mediaMeta = getMediaUpdates(url) # Find the pagination info and save out info that concerning next url for this subscription #handleMediaPagination(p, url, objectId, mediaMeta) # Get the last insert time lastUpdated = retrieveLatestImage(evCollHandle, objectId) latest = time.mktime(lastUpdated.timetuple()) newLatest = time.mktime(lastUpdated.timetuple()) # Update the tags and urls arrays if mediaMeta and mediaMeta.has_key('data'): #print "Number of Images:", len(mediaMeta['data']) for photo in mediaMeta['data']: # Append the datetime information try: epochTime = float(photo['created_time']) dt = datetime.datetime.fromtimestamp(epochTime) except Exception, e: print e # For recent images if epochTime > latest: # Update the list of images stored res = updateImageUrls(evCollHandle, objectId, photo, dt) # Update the tag information res = updateTags(evCollHandle, objectId, photo) # Get the latest image datetime if epochTime > newLatest: #print "improving newLatest", epochTime, newLatest newLatest = epochTime # Update the latest datetime on record updateTimeStamp = datetime.datetime.fromtimestamp(newLatest) updateLatestInfo(evCollHandle, objectId, updateTimeStamp)
def main(): ''' Script to build tweet objects from the VAST dataset and place them on a Queue and/or JMS for testing purposes. LIKELY SPEED IMPROVEMENTS: - BUILDING BLANK ARRAYS IN THE TIME SERIES TAKES A WHILE - PUTTING THE KEYWORDS IN A QUEUE, HAVING SET UP THE THREADS TO PROCESS EACH ONE. - ANY DUPLICATION CHECKS? ''' start = datetime.datetime.utcnow() tweetProcessTimes = datetime.timedelta(seconds=0) #dripRate = 1.5 # JMS destination destination = '/topic/test.vasttweets' hostIn = 'localhost' portIn = 61613 # Reset the collections c, dbh = mdb.getHandle() dbh = mdb.setupCollections(dbh, dropCollections=True) # Set up collections #jms = jmsCode.jmsHandler(hostIn, portIn, verbose=True) # Make the JMS connection via STOMP and the jmsCode class #jms.connect() path = "/Users/brantinghamr/Documents/Code/eclipseWorkspace/bam/data/" #fName= "MicroblogsSample.csv" fName= "Microblogs.csv" outFName = "MicroblogsOrdered.csv" f = retrieveFile(path, fName) fo = open(os.path.join(path, outFName), 'w') x = 0 # Start time earliestTweet = datetime.datetime(2011, 5, 18, 13, 25) earliestTweet = time.mktime(time.struct_time(earliestTweet.timetuple())) lastTweetTime = earliestTweet print "First Tweet Time: ", lastTweetTime # This speeds things up from seconds to minutes speedUpRate = 60.0 records = [] # Loop the lines build tweet objects for line in f.readlines(): #print line # Extract content from each line line = line.rstrip('\r').rstrip('\n').rstrip('\r') if x == 0: x+=1 continue if x % 1000 == 0: print "processed: ", x #if x > 1000: # break # sys.exit(0) line = line.split(',') tweetId, dt, latLon, text = line # Get the datetime group into seconds since UNIX time dtg = getTime(tweetId, dt) if not dtg: continue record = [tweetId, dtg, latLon, text] records.append(record) x += 1 f.close() sortedTable = sortTable(records, col=1) # Now loop the sorted list and write out to a new file for record in sortedTable: lineOut = "%s,%s,%s,%s\n" %(record[0], record[1], record[2], record[3]) fo.write(lineOut) f.close()
def main(p, mediaOnly=None): ''' Coordinates a new twitter stream connection''' # Logging config logFile = os.path.join(p.errorPath, p.connErrorFile) logging.basicConfig(filename=logFile, format='%(levelname)s:: %(asctime)s %(message)s', level=p.logLevel) # The mongo bits try: c, dbh = mdb.getHandle(host=p.dbHost, port=p.dbPort, db=p.db, user=p.dbUser, password=p.dbPassword) evCollHandle = dbh[p.eventsCollection] except: logging.critical('Failed to connect to db and authenticate.', exc_info=True) sys.exit() # Here's the redis queue for managing the tweets as they come in try: q = RedisQueue(p.redisName, host=p.redisHost, password=p.redisPassword, port=p.redisPort, db=0) except: logging.critical("REDIS: Failed to connect in connectionClient.py. ", exc_info=True) sys.exit() # Connection placeholder in case the exception catches the drop out connection = True while connection == True: # Get the existing tags and add the current try: tags = cf.getCurrentTags(evCollHandle) except: tags = None logging.error('Failed to get current tags from db.', exc_info=True) # Build the building boxes try: bboxes = cf.getCurrentBBoxes(evCollHandle) except: bboxes = None logging.error('Failed to get current BBOXes from db.', exc_info=True) if not tags and not bboxes: logging.warning('Currently no tags or bboxes in the db.') sys.exit() try: print tags, bboxes with tweetstream.FilterStream(p.sourceUser, p.sourcePassword, track=tags, locations=bboxes) as stream: for tweet in stream: if mediaOnly: try: q.put(json.dumps(tweet)) except: logging.critical( "Failed to put tweet on redis. This tweet: \n%s" % (tweet), exc_info=True) except tweetstream.ConnectionError: logging.critical("Disconnected from twitter", exc_info=True)