def load_tweets_to_grids(): # mongodb mg = MongoDB() mg.connect() tweets = mg.find() grid_db = GridDB() grid_db.add(tweets) return grid_db
def all_grids(): mg = MongoDB() mg.connect() griddb = GridDB() print('querying grid volumes...') results = mg.group_by([{'$match': {'created_at': {'$gt': datetime.strptime('2012-10-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ'), '$lt': datetime.strptime('2012-11-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')}}}]) # print(results) griddb.add(results) ret = Grid.get_raw_pandas_ts(results, 'D') STL.seasonal_decomposition(ret)
def write_tweets_to_mongo(): """ load tweets, decide the grid of tweets and insert the tweets into mongo this function serves as the initial step of the pipeline. all following db-specific operations are based on the dataset inserted here. This function is only called once for the entire pipeline. """ data_file = "data/sandy_all.txt" kml_file = ["data/nj_ct.kml", "data/nyc_ct_sea.kml"] # kml_file = ['data/nyc_cb_sea.kml'] tweets = load_tweets(data_file) # trajectories = tweets_to_trajectories(tweets) grid_db = GridDB() grid_db.load_grid_from_file(kml_file[0]) grid_db.load_grid_from_file(kml_file[1]) # grid_db.write_grids_to_json('shapefile.json') grid_db.check_and_add(tweets) ##################################################### ############## index Tweets into MongoDB ############ ##################################################### # mongodb mg = MongoDB() mg.connect() print("connected...") mg.drop() # tweets to dicts; rst = [] for t in grid_db.get_tweets(): rst.append(t.to_dict()) print("inserting...") mg.insert_tweets(rst)