parser.add_argument('--es_dest_host', help='Store result data into elasticsearch.', default='chara.cs.illinois.edu:9200') parser.add_argument('--es_dest_index', help='Write result data into this index.', default='cs225') parser.add_argument('--es_dest_type', help='Write result data into this type.', default='test_answers') parser.add_argument('--query_function', help='The query function to search for matches with.', required=True) args = parser.parse_args() test_iterator = ElasticsearchIterator( [args.es_test_question_host], args.es_test_question_index, args.es_test_question_type) dest_es = Elasticsearch([args.es_dest_host]) current_doc = test_iterator.next() while current_doc is not None: answer_pair = get_answers([args.es_question_host], args.es_question_index, args.es_question_type, current_doc, 1, query_functions[args.query_function]) answer_doc = { 'answer_id': answer_pair[0][0], 'answer': answer_pair[0][1], 'c_id': current_doc['c_id'] }
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Structure raw json data.') parser.add_argument('--es_source_host', help='Read raw data from elasticsearch.', required=True) parser.add_argument('--es_source_index', help='Read raw data from this index.', required=True) parser.add_argument('--es_source_type', help='Read raw data from this type.', required=True) parser.add_argument('--es_dest_hosts', help='Store structured data into elasticsearch.', required=True, nargs='+') parser.add_argument('--es_dest_index', help='Write structured data into this index.', required=True) parser.add_argument('--es_dest_type', help='Write structured data into this type.', required=True) args = parser.parse_args() source_iterator = ElasticsearchIterator( [args.es_source_host], args.es_source_index, args.es_source_type) dest_es = Elasticsearch(args.es_dest_hosts) current_doc = source_iterator.next() while current_doc is not None: structured_docs = get_structured_docs(current_doc) for structured_doc in structured_docs: if structured_doc['answer']: dest_es.index(args.es_dest_index, args.es_dest_type, body=structured_doc) current_doc = source_iterator.next()