def load(tweets): es = Elasticsearch(host=config.es_host, port=config.es_port) if es.indices.exists(idx_name): print('index {} already exists'.format(idx_name)) try: es.indices.put_mapping(doc_type, tweet_mapping, idx_name) except ElasticsearchException as e: print('error adding mapping:\n' + str(e)) es.indices.delete(idx_name) create_index(es, idx_name, mapping) else: print('index {} does not exist'.format(idx_name)) create_index(es, idx_name, mapping) k = 0 data = [] tweets_len = len(tweets) for doc in tweets: tweet = get_tweet(doc) bulk_doc = { "_index": idx_name, "_type": doc_type, "_id": tweet[id_field], "_source": tweet } data.append(bulk_doc) k += 1 if k % bulk_chunk_size == 0 or k == tweets_len: print "ElasticSearch bulk index (index: {INDEX}, type: {TYPE})...".format( INDEX=index_name, TYPE=doc_type) success, _ = bulk(es, data) print 'ElasticSearch indexed %d documents' % success data = []
def load(tweets): es = Elasticsearch(host = config.es_host, port = config.es_port) if es.indices.exists(index_name): print ('index {} already exists'.format(index_name)) try: es.indices.put_mapping(doc_type, tweet_mapping, index_name) except ElasticsearchException as e: print('error putting mapping:\n'+str(e)) print('deleting index {}...'.format(index_name)) es.indices.delete(index_name) create_index(es, index_name, mapping) else: print('index {} does not exist'.format(index_name)) create_index(es, index_name, mapping) counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: tweet = get_tweet(doc) bulk_doc = { "_index": index_name, "_type": doc_type, "_id": tweet[id_field], "_source": tweet } bulk_data.append(bulk_doc) counter+=1 if counter % bulk_chunk_size == 0 or counter == list_size: print "ElasticSearch bulk index (index: {INDEX}, type: {TYPE})...".format(INDEX=index_name, TYPE=doc_type) success, _ = bulk(es, bulk_data) print 'ElasticSearch indexed %d documents' % success bulk_data = []
def load(doc): es = Elasticsearch(host=config.es_host, port=config.es_port) tweet = get_tweet(doc) result = es.index(index=index_name, doc_type=doc_type, id=tweet[id_field], body=tweet) return result
def load(tweets): counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: tweet = get_tweet(doc) bulk_data.append(tweet) counter+=1 return bulk_data
def load(tweets): # es = Elasticsearch(host = config.es_host, port = config.es_port) es = Elasticsearch(hosts=[{ 'host': config.es_host, 'port': config.es_port }], http_auth=awsauth, use_ssl=True, verify_certs=True, connection_class=RequestsHttpConnection) es_version_number = es.info()['version']['number'] mapping_to_put = get_tweet_mapping(es_version_number) print(mapping_to_put) # mapping = {doc_type: tweet_mapping # } mapping = {'mappings': mapping_to_put} if es.indices.exists(index_name): print('index {} already exists'.format(index_name)) try: es.indices.put_mapping(body=mapping_to_put, index=index_name) except ElasticsearchException as e: print('error putting mapping:\n' + str(e)) print('deleting index {}...'.format(index_name)) es.indices.delete(index_name) create_index(es, index_name, mapping) else: print('index {} does not exist'.format(index_name)) create_index(es, index_name, mapping) counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: tweet = get_tweet(doc) bulk_doc = { "_index": index_name, # "_type": doc_type, "_id": tweet[id_field], "_source": tweet } bulk_data.append(bulk_doc) counter += 1 if counter % bulk_chunk_size == 0 or counter == list_size: print("ElasticSearch bulk index (index: {INDEX})...".format( INDEX=index_name)) success, _ = bulk(es, bulk_data) print('ElasticSearch indexed %d documents' % success) bulk_data = []
def load(tweets): # es = Elasticsearch(host = config.es_host, port = config.es_port) awsauth = create_awsauth() es = Elasticsearch(hosts=[{ 'host': config.es_host, 'port': config.es_port }], http_auth=awsauth, use_ssl=True, verify_certs=True, connection_class=RequestsHttpConnection) if es.indices.exists(index_name): print('index {} already exists'.format(index_name)) try: es.indices.put_mapping(doc_type, tweet_mapping, index_name) except ElasticsearchException as e: print('error putting mapping:\n' + str(e)) print('deleting index {}...'.format(index_name)) es.indices.delete(index_name) create_index(es, index_name, mapping) else: print('index {} does not exist'.format(index_name)) create_index(es, index_name, mapping) counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: try: tweet = get_tweet(doc) bulk_doc = { "_index": index_name, "_type": doc_type, "_id": tweet[id_field], "_source": tweet } bulk_data.append(bulk_doc) except Exception as e: print( "A single Tweet Doc failed to be loaded to Elasticsearch, tweet id is: " + doc['id_str'] + " Exception is: " + str(e)) counter += 1 if counter % bulk_chunk_size == 0 or counter == list_size: print "ElasticSearch bulk index (index: {INDEX}, type: {TYPE})...".format( INDEX=index_name, TYPE=doc_type) success, _ = bulk(es, bulk_data) print 'ElasticSearch indexed %d documents' % success bulk_data = []
def load(tweets): # es = Elasticsearch(host='9d230a39473f4abe9f0db0dc15d81c86.us-west1.gcp.cloud.es.io', port=9243, verify_certs=False, # scheme="https", http_auth=('superturbo', 'M1nuteMa1d')) # es = Elasticsearch(hosts = config.es_host) # es = Elasticsearch(host = config.es_host, port = config.es_port, http_auth=('superturbo', 'M1nuteMa1d')) # es_version_number = es.info()['version']['number'] # tweet_mapping = get_tweet_mapping(es_version_number) # mapping = {doc_type: tweet_mapping # } # # if es.indices.exists(index_name): # print ('index {} already exists'.format(index_name)) # try: # es.indices.put_mapping(doc_type, tweet_mapping, index_name) # except ElasticsearchException as e: # print('error putting mapping:\n'+str(e)) # print('deleting index {}...'.format(index_name)) # es.indices.delete(index_name) # create_index(es, index_name, mapping) # else: # print('index {} does not exist'.format(index_name)) # create_index(es, index_name, mapping) counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: tweet = get_tweet(doc) id = tweet[id_field] # print(tweet) bulk_doc = { "_index": index_name, "_type": doc_type, "_id": tweet[id_field], "_source": tweet } bulk_data.append(bulk_doc) counter += 1 requests.put(url + id, json=tweet, headers=headers, auth=HTTPBasicAuth('superturbo', 'M1nuteMa1d'))
def load(tweets): es = Elasticsearch(host = config.es_host, port = config.es_port) es_version_number = es.info()['version']['number'] tweet_mapping = get_tweet_mapping(es_version_number) mapping = {doc_type: tweet_mapping } if es.indices.exists(index_name): print ('index {} already exists'.format(index_name)) try: es.indices.put_mapping(doc_type, tweet_mapping, index_name) except ElasticsearchException as e: print('error putting mapping:\n'+str(e)) print('deleting index {}...'.format(index_name)) es.indices.delete(index_name) create_index(es, index_name, mapping) else: print('index {} does not exist'.format(index_name)) create_index(es, index_name, mapping) counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: tweet = get_tweet(doc) bulk_doc = { "_index": index_name, "_type": doc_type, "_id": tweet[id_field], "_source": tweet } bulk_data.append(bulk_doc) counter+=1 if counter % bulk_chunk_size == 0 or counter == list_size: print "ElasticSearch bulk index (index: {INDEX}, type: {TYPE})...".format(INDEX=index_name, TYPE=doc_type) success, _ = bulk(es, bulk_data) print 'ElasticSearch indexed %d documents' % success bulk_data = []
def load(tweets): """Create an elasticserach connection using the environment variables 'ES_USER' and 'ES_PASS'""" es = Elasticsearch(connection_class=Urllib3HttpConnection, host=config.es_host, port=config.es_port, http_auth=(os.getenv('ES_USER', 'user'), os.getenv('ES_PASS')), use_ssl=True) if es.indices.exists(index_name): print ('index {} already exists'.format(index_name)) try: es.indices.put_mapping(doc_type, tweet_mapping, index_name) except ElasticsearchException as e: print('error putting mapping:\n'+str(e)) print('deleting index {}...'.format(index_name)) es.indices.delete(index_name) create_index(es, index_name, mapping) else: print('index {} does not exist'.format(index_name)) create_index(es, index_name, mapping) counter = 0 bulk_data = [] list_size = len(tweets) for doc in tweets: print(doc) tweet = get_tweet(doc) bulk_doc = { "_index": index_name, "_type": doc_type, "_id": tweet[id_field], "_source": tweet } bulk_data.append(bulk_doc) counter+=1 if counter % bulk_chunk_size == 0 or counter == list_size: print "ElasticSearch bulk index (index: {INDEX}, type: {TYPE})...".format(INDEX=index_name, TYPE=doc_type) success, _ = bulk(es, bulk_data) print 'ElasticSearch indexed %d documents' % success bulk_data = []