def push_incremental_data_to_es(): params_collection = mongo_helpers.get_mongo_db_con( database=_MONGO_PARAMS_DB)[mongo_conf[_MONGO_PARAMS_COLLECTION]] timestamp_range = obtain_time_ranges(params_collection) LOGGER.debug( "Started river to push data to ES for {0}".format(timestamp_range)) mongo_object_ids_range = mongo_helpers.get_server_object_ids( timestamp_range=timestamp_range) raw_data_collection = mongo_helpers.get_mongo_db_con( database=_MONGO_RAW_DATA_DB)[mongo_conf[_MONGO_RAW_DATA_COLLECTION]] query_filters = {'_id': {'$gte': mongo_object_ids_range[0], '$lte': mongo_object_ids_range[ 1]}, 'category': {'$in': _categories}} mongo_fetch_generator = mongo_helpers.create_mongo_fetch_generator(raw_data_collection,query_filters) process_pipeline(raw_data_collection,mongo_fetch_generator,query_filters) params_collection.update({'elasticsearch.lastUpdated': {'$exists': 'true'}}, { '$set': {'elasticsearch.lastUpdated': str(timestamp_range[1])}})
def push_data_to_elasticsearch(): for collection in _collections: for date_range in mongo_helpers.get_date_range_list(_NUMBER_OF_DAYS): print "Started pushing events for {collection} of {date_range}".format(collection=collection, date_range=date_range) raw_data_collection = mongo_helpers.get_mongo_db_con( database=_MONGO_RAW_DATA_DB)[collection] mongo_object_ids_range = mongo_helpers.get_server_object_ids( date_range) query_filters = {'_id': {'$gte': mongo_object_ids_range[0], '$lte': mongo_object_ids_range[ 1]}} mongo_fetch_generator = mongo_helpers.create_mongo_fetch_generator( raw_data_collection, query_filters) process_pipeline( raw_data_collection, mongo_fetch_generator, query_filters)