def init_initial_project(): """ Create the initial project which is hillary's emails :param es: :return: """ print("[INITIAL] Create initial project") test = { "name": "Hillary Clinton Emails", "date": datetime.datetime.now(), } es.index(index="projects-index", doc_type='project', id="hillary", body=test) test = { "name": "Reuters Mexico Sentiment", "date": datetime.datetime.now(), } es.index(index="projects-index", doc_type='project', id="reuters-mexico", body=test) test = { "name": "SEBA Master Lecture", "date": datetime.datetime.now(), } es.index(index="projects-index", doc_type='project', id="seba-master", body=test) test = { "name": "Student Similarity", "date": datetime.datetime.now(), } es.index(index="projects-index", doc_type='project', id="student-similarity", body=test)
def create_default_doctor(data): data["roles"] = {"doctor": True} data["password"] = generate_password_hash(data["password"].encode()) doc = {'size': 10000, 'query': {'match_all': {}}} scroll = "1m" try: response = es.search(index="users-index", doc_type="user", body=doc, scroll=scroll, id=data["user_name"]) if response["hits"]["total"] > 0: # Admin already exists print("Default doctor already exists") return except: print("Create new default doctor user") # create the new user resp = es.index(index="users-index", doc_type="user", body=data, id=data["user_name"]) return resp
def handle_crawler_file(project_uuid, file_path): # create a hashname from the file path id = hashlib.md5(str(file_path).encode("utf8")).hexdigest() # open file with tika # parsed_doc = parse_file(file_path) with open(file_path, 'r') as f: loaded_json = json.load(f) # run the nlp pipeline on text result = handle_document(project_uuid, id, loaded_json['content'], origin="crawler", similarity=True) # print(result) # remove content # its now called input # result["_meta"] = parsed_doc["meta"] result["file_path"] = file_path result["project_uuid"] = project_uuid response = es.index(index="document-index", doc_type="document", id=id, body=result) return response
def create_with_guid(self, generic_index, generic_type, data): # create new guid guid = str(uuid.uuid4()) response = es.index(index=generic_index, doc_type=generic_type, id=guid, body=data) return response
def post(self): req = request.get_json(silent=True) # parse arguments # create a hashname from the filepath id = hashlib.md5( str(''.join( random.choices(string.ascii_uppercase + string.digits, k=32))).encode("utf8")).hexdigest() debug = request.args.get('debug', default=False) document = req['document'] # request.args.get('sentence') # handle sentence result = handle_document(id, document) # Save in database if its not debugged if not debug: es.index(index="test-index", doc_type='sentence', id=id, body=result) # return json result return jsonify(result)
def handle_notebook_document(project_uuid, file_name, parsed_doc, save=True, ts=time.time()): # create a hash name from the filepath id = hashlib.md5(str(file_name).encode("utf8")).hexdigest() # run the nlp pipeline on text result = handle_document(project_uuid, id, parsed_doc, origin="crawler", similarity=True, ts=ts) # remove content # its now called input # result["_meta"] = parsed_doc["meta"] result["file_path"] = file_name result["project_uuid"] = project_uuid if save: es.index(index="document-index", id=id, body=result) else: return result return
def handle_file(project_uuid, file_path): # create a hashname from the filepath id = hashlib.md5(str(file_path).encode("utf8")).hexdigest() # open file with tika print("FILE PATH", file_path) parsed_doc = parse_file(file_path) # run the nlp pipeline on text result = handle_document(project_uuid, id, parsed_doc["content"]) # remove content # its now called input # result["_meta"] = parsed_doc["meta"] result["file_path"] = file_path # get the filename result["file_name"] = os.path.basename(file_path) result["project_uuid"] = project_uuid response = es.index(index="document-index", doc_type="document", id=id, body=result) return response
def create_admin(self, data): resp = es.index(index="users-index", doc_type="user", body=data) return resp
def create(self, generic_index, generic_type, guid, data): response = es.index(index=generic_index, doc_type=generic_type, id=guid, body=data) return response
def create_user(self, data): resp = es.index(index="users-index", doc_type="user", body=data, id=data["email"]) return resp