def __init__(self, config_option="prod", dryrun=True): self.esdata = collections.defaultdict(list) self.dryrun = dryrun self.datasets = ES_DATASET_CONFIG self.esconfig = es_api.get_esconfig(config_option) self.api_nlp = ZhidaoNlp() if not self.dryrun: es_api.batch_init(self.esconfig, self.datasets.values())
def sendto_es(eavps): esconfig = get_esconfig(ENV) # post 'http://localhost:9200/_bulk' run_esbulk_rows(eavps, "index", esconfig, ES_DATASET_CONFIG)
ES_DATASET_CONFIG = { "description": "复旦百科实体属性值0711", "es_index": "fudankg0711", "es_type": "fudankg_faq", "filepath_mapping": os.path.abspath(os.path.dirname(__file__)) + "/" + "fudankg_es_schema.json" } # search es_index 'http://localhost:9200/fudankg0623/_search?', no hits # then post 'http://localhost:9200/fudankg0623' # get 'http://localhost:9200/fudankg0623/fudankg_faq/_mapping', empty return # then put 'http://localhost:9200/fudankg0623/fudankg_faq/_mapping?pretty' with json batch_init(get_esconfig(ENV), [ES_DATASET_CONFIG]) def insert(): results = get_entity_avps_results() eavps = [] for word, entity, avps in results: eavp = parse_fudan_entity(entity, avps) eavps.extend(eavp) return eavps def sendto_es(eavps): esconfig = get_esconfig(ENV) # post 'http://localhost:9200/_bulk'
def sendto_es(jsons): esconfig = get_esconfig(ENV) run_esbulk_rows(jsons, "index", esconfig, ES_DATASET_CONFIG)