def document_exist(path): body = {"query": {"match": {"path": path}}} r = ES.search(body=body, index=DOCUMENTS_INDEX) exists = r['hits']['total']['value'] > 0 for doc in r['hits']['hits']: ES.update(index=DOCUMENTS_INDEX, id=doc['_id'], body={"doc": { "exists": True }}) return exists
def index_document_metadata(id, path): try: file_meta = get_tika_meta(path) except: file_meta = {} ES.update(index=DOCUMENTS_INDEX, id=id, body={"doc": {"meta": file_meta}})
def reset_exists(): for doc in get_all_documents(): ES.update(index=DOCUMENTS_INDEX, id=doc['_id'], body={"doc": {"exists": False}})
def index_document_content(id, path): try: file_content = get_tika_content(path) except: file_content = "" ES.update(index=DOCUMENTS_INDEX, id=id, body={"doc": {"content": file_content}})