def connect_index(reset=False): if reset: print 'deleting existing dict' elastic.delete_index() elastic.create_index() actions = {} def index_term(term): ''' Look up definitions and synonyms of term, then returns their tokens for indexing further ''' definitions, synonyms = nlp.get_definitions_synonyms(term) if not definitions: return [] doc = {'term': term, 'definitions': definitions, 'synonyms': synonyms} actions[term] = { '_op_type': 'index', '_id': hash(term), '_index': elastic.SEARCH_INDEX, '_type': 'term', 'doc': doc } actions_count = len(actions) if actions_count > 1000 and actions_count % 1000 == 0: retry_commit() return nlp.tokenize(nlp.join(*definitions + synonyms)) def commit_index_actions(): actionables = filter(None, actions.values()) if not actionables: return False successes, errors = elastic.helpers.bulk(elastic.client, actionables) if errors: print errors print 'committed', successes, 'terms' print for term in actions: actions[term] = None return True def retry_commit(): for timeout in (0, 10, 20, 30): try: return commit_index_actions() except elastic.exceptions.ConnectionTimeout as error: print 'errored', error, 'sleeping for', timeout time.sleep(timeout) try: yield index_term, actions.viewkeys() finally: if actions: retry_commit() elastic.refresh_index()
def upsert_cities(cities, reset=False): if reset: elastic.delete_index() elastic.create_index() actions = map(create_city_upsert, cities) elastic.helpers.bulk( elastic.client, actions, request_timeout=60) elastic.refresh_index() return True
def connect_index(reset=False): if reset: print 'deleting existing dict' elastic.delete_index() elastic.create_index() actions = {} def index_term(term): ''' Look up definitions and synonyms of term, then returns their tokens for indexing further ''' definitions, synonyms = nlp.get_definitions_synonyms(term) if not definitions: return [] doc = {'term':term, 'definitions':definitions, 'synonyms':synonyms} actions[term] = {'_op_type':'index', '_id':hash(term), '_index':elastic.SEARCH_INDEX, '_type':'term', 'doc':doc } actions_count = len(actions) if actions_count > 1000 and actions_count % 1000 == 0: retry_commit() return nlp.tokenize(nlp.join(*definitions + synonyms)) def commit_index_actions(): actionables = filter(None, actions.values()) if not actionables: return False successes, errors = elastic.helpers.bulk(elastic.client, actionables) if errors: print errors print 'committed', successes, 'terms'; print for term in actions: actions[term] = None return True def retry_commit(): for timeout in (0,10,20,30): try: return commit_index_actions() except elastic.exceptions.ConnectionTimeout as error: print 'errored', error, 'sleeping for', timeout time.sleep(timeout) try: yield index_term, actions.viewkeys() finally: if actions: retry_commit() elastic.refresh_index()