def query():
    """
    given {start_time, end_time, grid_id [optional]}
    return -> [{grid_id, volume}, ...]
    """
    start_time = datetime.strptime(request.query.get('start_time'), '%Y-%m-%dT%H:%M:%SZ')
    end_time = datetime.strptime(request.query.get('end_time'), '%Y-%m-%dT%H:%M:%SZ')
    grid_id = request.query.get('grid_id')

    mg = MongoDB()
    mg.connect()

    print('querying grid volumes...')

#     if grid_id:
#         results = mg.group_by([{'$match': {'created_at': {'$gt': start_time, '$lt': end_time}, 'grid_id': grid_id}},
#                                {'$group': {'_id': '$grid_id', 'count': {'$sum': 1}}}
#                                ])
#     else:
#         results = mg.group_by([{'$match': {'created_at': {'$gt': start_time, '$lt': end_time}}},
#                                {'$group': {'_id': '$grid_id', 'count': {'$sum': 1}}}
#                                ])
#    group and count distinct user:
    results = mg.group_by([ {'$match': {'created_at': {'$gt': start_time, '$lt': end_time}}},
                            {'$group': {'_id':{ 'grid_id':'$grid_id', 'user_id':'$user_id' }, 'count': {'$sum': 1} } },
                            {'$group': {'_id': '$_id.grid_id', 'count': {'$sum': 1}}}
                          ])

    ret = []
    for result in results:
        ret.append({'grid_id': result['_id'], 'volume': result['count']})

    response.content_type = 'application/json'
    return json_dumps(ret, indent=2)
def load_tweets_to_grids():

    # mongodb
    mg = MongoDB()
    mg.connect()

    tweets = mg.find()

    grid_db = GridDB()
    grid_db.add(tweets)

    return grid_db
def all_grids():
    mg = MongoDB()
    mg.connect()
    griddb = GridDB()
    print('querying grid volumes...')
    results = mg.group_by([{'$match': {'created_at': {'$gt': datetime.strptime('2012-10-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ'),
                                                      '$lt': datetime.strptime('2012-11-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')}}}])   # print(results)
    griddb.add(results)

    ret = Grid.get_raw_pandas_ts(results, 'D')

    STL.seasonal_decomposition(ret)
def single_grid(grid_id):
    mg = MongoDB()
    mg.connect()
    start_time = datetime.strptime('2012-10-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')
    end_time = datetime.strptime('2012-11-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')

    print('querying grid volumes...')
    # results = mg.group_by([{'$match': {'created_at': {'$gt': start_time, '$lt': end_time}, 'grid_id': grid_id}},
    #                       {'$group': {'_id': '$grid_id', 'count': {'$sum': 1}}}])
    results = mg.group_by([{'$match': {'created_at': {'$gt': datetime.strptime('2012-10-15T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'),
                                                       '$lt': datetime.strptime('2012-11-15T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')},
                                       'grid_id': grid_id}}])   # print(results)
    print(results)
    ret = Grid.get_raw_pandas_ts(results, 'H')
    # print('------------')
    # print(ret)
    print(STL.seasonal_decomposition(ret))
def write_tweets_to_mongo():

    """ load tweets, decide the grid of tweets and insert the tweets into mongo
        this function serves as the initial step of the pipeline. 
        all following db-specific operations are based on the dataset inserted here. 
        This function is only called once for the entire pipeline.
    """

    data_file = "data/sandy_all.txt"
    kml_file = ["data/nj_ct.kml", "data/nyc_ct_sea.kml"]
    # kml_file = ['data/nyc_cb_sea.kml']

    tweets = load_tweets(data_file)
    #     trajectories = tweets_to_trajectories(tweets)

    grid_db = GridDB()
    grid_db.load_grid_from_file(kml_file[0])
    grid_db.load_grid_from_file(kml_file[1])
    #     grid_db.write_grids_to_json('shapefile.json')

    grid_db.check_and_add(tweets)

    #####################################################
    ############## index Tweets into MongoDB ############
    #####################################################

    # mongodb
    mg = MongoDB()
    mg.connect()
    print("connected...")
    mg.drop()

    # tweets to dicts;
    rst = []
    for t in grid_db.get_tweets():
        rst.append(t.to_dict())

    print("inserting...")
    mg.insert_tweets(rst)
def query():
    """
    given {start_time, end_time, aggregation}
    return -> [{time, freq}, ...]
    """
    start_time = datetime.strptime(request.query.get("start_time"), '%Y-%m-%dT%H:%M:%SZ')
    end_time = datetime.strptime(request.query.get("end_time"), '%Y-%m-%dT%H:%M:%SZ')
    aggregation = request.query.get("aggregation")

    mg = MongoDB()
    mg.connect()

    print("querying time series...")
    results = mg.find( {'created_at': {'$gt': start_time, '$lt': end_time}} )

    results = Grid.get_ts(results, aggregation)

    ret = []
    for result in results:
        ret.append({'start_time': result[0].to_datetime().strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'frequency': result[1].item()})

    response.content_type = 'application/json'
    return json_dumps(ret, indent=2)
    return grid_db


if __name__ == "__main__":

    #     write_tweets_to_mongo()
    grid_db = load_tweets_to_grids()
    print(grid_db.get_outlier_grid_ids("H"))
    print("tweets loaded")

    # for grid_id in grid_db.grid_cache:
    #     print(grid_id, grid_db.grid_cache[grid_id].get_ts(grid_db.get_tweets(), 'H'))
    exit()
    mg = MongoDB()
    mg.connect()

    print("querying grid volumes...")

    results = mg.group_by(
        [
            {
                "$match": {
                    "created_at": {
                        "$gt": datetime.strptime("2012-10-15T20:00:02Z", "%Y-%m-%dT%H:%M:%SZ"),
                        "$lt": datetime.strptime("2012-10-22T20:00:02Z", "%Y-%m-%dT%H:%M:%SZ"),
                    }
                }
            }
        ]
    )  # print(results)
 def wrapper(func, *args, **kwargs):
     mongo = MongoDB()
     mongo.connect(app.config['app.mongohost'])