class StdOutListener(StreamListener): counter = 0 total_docs_to_be_indexed = 1000 def __init__(self,*args,**kwargs): super(StdOutListener, self).__init__(*args, **kwargs) self.es = ES().getES() def on_data(self,data): print data while self.total_docs_to_be_indexed > self.counter: tweet = json.loads(data) self.index_tweet(tweet) self.counter += 1 return True return False def index_tweet(self,tweet): self.es.index(index = 'twitter', doc_type = 'tweets', id = tweet['id_str'], body = tweet) def on_error(self,status): print "the status is: " + str(status) pass
from es import ES from config import * from time import time es = ES().getES() if not es.indices.exists(index = index_name): print ' index does not exists, creat new index' es.indices.create(index = index_name, body = body) time.sleep(2) print 'index created successfully' else: print 'An index with this name already exists' doc1 = { 'name': 'Erlang', 'category': ['Distribute','OTP','Erlang','Elixir','Elm','Actor'], 'Publication': 'Ericsson', 'Publishing Date': '1970-01-01' } es.index(index = index_name, doc_type = doc_type, body = doc1, id = '123') response = es.get(index= index_name, doc_type = doc_type, id = '123', ignore= 404) print response