Exemplo n.º 1
0
def init_initial_project():
    """
    Create the initial project which is hillary's emails
    :param es:
    :return:
    """
    print("[INITIAL] Create initial project")
    test = {
        "name": "Hillary Clinton Emails",
        "date": datetime.datetime.now(),
    }
    es.index(index="projects-index", doc_type='project', id="hillary", body=test)

    test = {
        "name": "Reuters Mexico Sentiment",
        "date": datetime.datetime.now(),
    }
    es.index(index="projects-index", doc_type='project', id="reuters-mexico", body=test)

    test = {
        "name": "SEBA Master Lecture",
        "date": datetime.datetime.now(),
    }
    es.index(index="projects-index", doc_type='project', id="seba-master", body=test)

    test = {
        "name": "Student Similarity",
        "date": datetime.datetime.now(),
    }
    es.index(index="projects-index", doc_type='project', id="student-similarity", body=test)
    def create_default_doctor(data):
        data["roles"] = {"doctor": True}

        data["password"] = generate_password_hash(data["password"].encode())

        doc = {'size': 10000, 'query': {'match_all': {}}}
        scroll = "1m"
        try:
            response = es.search(index="users-index",
                                 doc_type="user",
                                 body=doc,
                                 scroll=scroll,
                                 id=data["user_name"])
            if response["hits"]["total"] > 0:
                # Admin already exists
                print("Default doctor already exists")
                return
        except:
            print("Create new default doctor user")

        # create the new user
        resp = es.index(index="users-index",
                        doc_type="user",
                        body=data,
                        id=data["user_name"])
        return resp
Exemplo n.º 3
0
def handle_crawler_file(project_uuid, file_path):
    # create a hashname from the file path
    id = hashlib.md5(str(file_path).encode("utf8")).hexdigest()
    # open file with tika
    # parsed_doc = parse_file(file_path)
    with open(file_path, 'r') as f:
        loaded_json = json.load(f)
        # run the nlp pipeline on text
        result = handle_document(project_uuid,
                                 id,
                                 loaded_json['content'],
                                 origin="crawler",
                                 similarity=True)
        # print(result)

    # remove content
    # its now called input
    # result["_meta"] = parsed_doc["meta"]
    result["file_path"] = file_path
    result["project_uuid"] = project_uuid

    response = es.index(index="document-index",
                        doc_type="document",
                        id=id,
                        body=result)
    return response
Exemplo n.º 4
0
 def create_with_guid(self, generic_index, generic_type, data):
     # create new guid
     guid = str(uuid.uuid4())
     response = es.index(index=generic_index,
                         doc_type=generic_type,
                         id=guid,
                         body=data)
     return response
Exemplo n.º 5
0
    def post(self):
        req = request.get_json(silent=True)
        # parse arguments
        # create a hashname from the filepath
        id = hashlib.md5(
            str(''.join(
                random.choices(string.ascii_uppercase + string.digits,
                               k=32))).encode("utf8")).hexdigest()

        debug = request.args.get('debug', default=False)
        document = req['document']  # request.args.get('sentence')
        # handle sentence
        result = handle_document(id, document)
        # Save in database if its not debugged
        if not debug:
            es.index(index="test-index",
                     doc_type='sentence',
                     id=id,
                     body=result)
        # return json result
        return jsonify(result)
Exemplo n.º 6
0
def handle_notebook_document(project_uuid,
                             file_name,
                             parsed_doc,
                             save=True,
                             ts=time.time()):
    # create a hash name from the filepath
    id = hashlib.md5(str(file_name).encode("utf8")).hexdigest()
    # run the nlp pipeline on text
    result = handle_document(project_uuid,
                             id,
                             parsed_doc,
                             origin="crawler",
                             similarity=True,
                             ts=ts)
    # remove content
    # its now called input
    # result["_meta"] = parsed_doc["meta"]
    result["file_path"] = file_name
    result["project_uuid"] = project_uuid
    if save:
        es.index(index="document-index", id=id, body=result)
    else:
        return result
    return
Exemplo n.º 7
0
def handle_file(project_uuid, file_path):
    # create a hashname from the filepath
    id = hashlib.md5(str(file_path).encode("utf8")).hexdigest()
    # open file with tika
    print("FILE PATH", file_path)
    parsed_doc = parse_file(file_path)
    # run the nlp pipeline on text
    result = handle_document(project_uuid, id, parsed_doc["content"])
    # remove content
    # its now called input
    # result["_meta"] = parsed_doc["meta"]
    result["file_path"] = file_path
    # get the filename
    result["file_name"] = os.path.basename(file_path)
    result["project_uuid"] = project_uuid
    response = es.index(index="document-index",
                        doc_type="document",
                        id=id,
                        body=result)
    return response
 def create_admin(self, data):
     resp = es.index(index="users-index", doc_type="user", body=data)
     return resp
 def create(self, generic_index, generic_type, guid, data):
     response = es.index(index=generic_index, doc_type=generic_type, id=guid, body=data)
     return response
Exemplo n.º 10
0
 def create_user(self, data):
     resp = es.index(index="users-index",
                     doc_type="user",
                     body=data,
                     id=data["email"])
     return resp