def push_incremental_data_to_es():
    params_collection = mongo_helpers.get_mongo_db_con(
        database=_MONGO_PARAMS_DB)[mongo_conf[_MONGO_PARAMS_COLLECTION]]
    timestamp_range = obtain_time_ranges(params_collection)
    LOGGER.debug(
        "Started river to push data to ES for {0}".format(timestamp_range))
    mongo_object_ids_range = mongo_helpers.get_server_object_ids(
        timestamp_range=timestamp_range)
    raw_data_collection = mongo_helpers.get_mongo_db_con(
        database=_MONGO_RAW_DATA_DB)[mongo_conf[_MONGO_RAW_DATA_COLLECTION]]
    query_filters = {'_id': {'$gte': mongo_object_ids_range[0], '$lte': mongo_object_ids_range[
        1]}, 'category': {'$in': _categories}}
    mongo_fetch_generator = mongo_helpers.create_mongo_fetch_generator(raw_data_collection,query_filters)
    process_pipeline(raw_data_collection,mongo_fetch_generator,query_filters)
    params_collection.update({'elasticsearch.lastUpdated': {'$exists': 'true'}}, {
                             '$set': {'elasticsearch.lastUpdated': str(timestamp_range[1])}})
Пример #2
0
def push_data_to_elasticsearch():
    for collection in _collections:
        for date_range in mongo_helpers.get_date_range_list(_NUMBER_OF_DAYS):
            print "Started pushing events for {collection} of {date_range}".format(collection=collection, date_range=date_range)
            raw_data_collection = mongo_helpers.get_mongo_db_con(
                database=_MONGO_RAW_DATA_DB)[collection]
            mongo_object_ids_range = mongo_helpers.get_server_object_ids(
                date_range)
            query_filters = {'_id': {'$gte': mongo_object_ids_range[0], '$lte': mongo_object_ids_range[
                1]}}
            mongo_fetch_generator = mongo_helpers.create_mongo_fetch_generator(
                raw_data_collection, query_filters)
            process_pipeline(
                raw_data_collection, mongo_fetch_generator, query_filters)