def download(): """ :return: """ years = range(1997, datetime.date.today().year + 1) print datetime.datetime.today(), "- downloading data for years:", years for year in years: paginate = True page = 1 while paginate: params = {"year": year, "page": page} print "downloading", year, "page", page res = requests.get(ACLED_URL, params=params) data = res.json() events = data['data'] print "indexing on", es.ES_ACLED_INDEX, "..." for event in events: event = hydrate_event(event) es.index(event) page += 1 if data['count'] < 500: paginate = False
def upsert(self, session=None): """ insert or update intelligently """ self.last_updated = datetime.datetime.now() res = es.index(index=dbname, doc_type=self.doc_type, body=self.to_json()) if res.get("_id",None): self._id = res.get("_id") return res
def create(self): """ first time sabve an instance to DB """ self.last_updated = datetime.datetime.now() self.created = self.last_updated res = es.index(index=dbname, doc_type=self.doc_type, body=self.to_json())
def index_one(id): es = Elasticsearch(host) d = Document.object.get(pk=id) doc = get_doc(d) es.index(index=_index,doc_type=_doc_type,body=doc,id=id)
import es import json import glob import time es.delete_index("messages") es.create_index("messages") nb = 0 for filename in glob.iglob('data/**/*.json', recursive=True): nb += 1 if nb % 500 == 0: time.sleep(1) with open(filename, encoding="utf8") as f: item = json.load(f) es.index("messages", "message", item)