def execute(): print 'Started Entity Aggregation... ', start_timing() map_function = Code( open(join(JAVASCRIPT_PATH, MAP_FUNCTION_FILENAME), 'r').read()) reduce_function = Code( open(join(JAVASCRIPT_PATH, REDUCE_FUNCTION_FILENAME), 'r').read()) aggregate_map_function = Code( open(join(JAVASCRIPT_PATH, AGGREGATION_MAP_ADD_FUNCTION_FILENAME), 'r').read()) aggregate_reduce_function = Code( open(join(JAVASCRIPT_PATH, AGGREGATION_REDUCE_FUNCTION_FILENAME), 'r').read()) temp_raw.map_reduce(map_function, reduce_function, TEMP_RESULTS_COLLECTION_NAME) temp_results.map_reduce(aggregate_map_function, aggregate_reduce_function, {'reduce': RESULTS_COLLECTION_NAME}) if temp_raw.count() > 0: copy_into_collection(temp_raw.find(no_cursor_timeout=True), coll) temp_results.drop() temp_raw.drop() client.close() print 'Finished' stop_timing()
def save_to_collection(): for lower_entity in entity_pseudos.keys(): for entity in entity_pseudos[lower_entity]: topic_id = entity_topic[lower_entity] coll_name = TOPIC_COLLECTION_NAME(topic_id) check_or_create_collection(TOPIC_TWEETS_DB_NAME, coll_name, Collection.TOPIC) coll = topic_db[coll_name] copy_into_collection(raw_collection.find({ENTITIES: entity}), coll)
def execute(): print 'Started Entity Aggregation... ', start_timing() map_function = Code(open(join(JAVASCRIPT_PATH, MAP_FUNCTION_FILENAME), 'r').read()) reduce_function = Code(open(join(JAVASCRIPT_PATH, REDUCE_FUNCTION_FILENAME), 'r').read()) aggregate_map_function = Code(open(join(JAVASCRIPT_PATH, AGGREGATION_MAP_ADD_FUNCTION_FILENAME), 'r').read()) aggregate_reduce_function = Code(open(join(JAVASCRIPT_PATH, AGGREGATION_REDUCE_FUNCTION_FILENAME), 'r').read()) temp_raw.map_reduce(map_function, reduce_function, TEMP_RESULTS_COLLECTION_NAME) temp_results.map_reduce(aggregate_map_function, aggregate_reduce_function, {'reduce': RESULTS_COLLECTION_NAME}) if temp_raw.count() > 0: copy_into_collection(temp_raw.find(no_cursor_timeout=True), coll) temp_results.drop() temp_raw.drop() client.close() print 'Finished' stop_timing()