예제 #1
0
def mark_prior_author(uid, ):
    query = """
        MATCH (a:Author)-[:prior_{uid}]-(:Author)
        WITH DISTINCT a
        SET a.prior_{uid}=TRUE
    """.format(uid=uid)
    db.run(query, )
예제 #2
0
def mark_test_author(uid, num_author=100):
    query = """
        MATCH (a:Author)-[:test_{uid}]-(:Author)
        WHERE a.prior_{uid}=TRUE
        WITH DISTINCT a
        SET a.test_{uid}=TRUE
    """.format(uid=uid)
    db.run(query, parameters={"num_author": num_author, "TopK": 10})
예제 #3
0
def deleteAll():
    try:
        query = """
            call apoc.periodic.iterate("MATCH (n) RETURN n LIMIT 1000;", "DETACH DELETE n", {batchSize:1000})
            yield batches, total return batches, total
        """
        db.run(query)
    except:
        raise ValueError("Error: Not able to delete all.")
예제 #4
0
def update_work_prior(uid):
    query = """
        MATCH (a:Author)-[:UPLOAD]->(w:Work)
        WHERE a.prior_{uid} = TRUE AND
            w.prior_{uid}=TRUE 
        WITH a, COUNT(DISTINCT w) AS num_work
        SET a.work_prior_{uid}=num_work
    """.format(uid=uid, )
    db.run(query, )
예제 #5
0
def init_papers():
    query = """
        USING PERIODIC COMMIT 1000
        LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR '|'
        WITH line.`article:ID` AS work_id,
        line.`year:int` AS year,
        line.`title:string` AS title

        MATCH (w:Work { work_id: toInteger(work_id) })
        SET w.year = toInteger(year), w.title = title """

    db.run(query, parameters={"path": PAPER_DATABASE})
 def delete_all_authors(self):
     try:
         query = """
             MATCH (a:Author)
             DETACH DELETE a
             """
         print("query", query)
         db.run(query, )
     except:
         raise ValueError(
             "There is no connection found. Check connection and try again !"
         )
예제 #7
0
def init_authors():
    query = """
        // Load and commit every 1000 records
        USING PERIODIC COMMIT 1000
        LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR '|'
        WITH line.`ID` AS author_id,
            line.`author` AS name

        MATCH (a:Author)
        WHERE a.author_id = toInteger(author_id)
        SET a.name= name
        """
    print("create set name author:{0}".format(query))

    db.run(query, parameters={"path": AUTHOR_DATABASE})
예제 #8
0
def init_author_paper_upload():
    query = """
            // Load and commit every 1000 records
            USING PERIODIC COMMIT 1000
            LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR '|'
            WITH line.`START_ID` AS work_id,
            line.`END_ID` AS author_id

            MERGE (w:Work { work_id: toInteger(work_id)})
            MERGE  (a:Author { author_id: toInteger(author_id)})

            // Create relationships between Author and Paper
            CREATE (a)-[:UPLOAD]->(w) """
    print("create new author->paper:{0}".format(query))

    db.run(query, parameters={"path": AUTHOR_PAPER_DATABASE})
예제 #9
0
def create_test_connection(
    uid,
    start,
    end,
):
    query = """
        MATCH (a:Author)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b:Author)
        WHERE a<>b AND
            a.prior_{uid}=TRUE AND
            b.prior_{uid}=TRUE AND
            EXISTS(w.title) AND
            w.year>={start} AND
            w.year<={end} AND
            NOT EXISTS((a)-[:test_{uid}]-(b))
        CREATE (a)-[:test_{uid}]->(b)
    """.format(uid=uid, start=start, end=end)
    db.run(query, )
예제 #10
0
def mark_test_paper(
    uid,
    start,
    end,
):
    query = """
        MATCH (a:Author)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b:Author)
        WHERE a<>b AND
            a.prior_{uid} = TRUE AND
            b.prior_{uid} = TRUE AND
            EXISTS(w.title) AND
            w.year>={start} AND
            w.year<={end}
        WITH DISTINCT w
        SET w.test_{uid}=TRUE
    """.format(uid=uid, start=start, end=end)
    db.run(query, )
예제 #11
0
    def _predict(self, project_uid: int):
        from utils.file import get_current_directory, connect_path_file
        path = connect_path_file(get_current_directory(neo4j=True), _temp_file)
        query = """
            USING PERIODIC COMMIT 1000
            LOAD CSV FROM $path AS line FIELDTERMINATOR '|'
            WITH line
            
            WITH toInteger(line[0]) AS start_nodeId, 
                 toInteger(line[1]) AS end_nodeId, 
                 toFloat(line[2]) AS score

            MATCH (a:Author), (b:Author)
            WHERE a.author_id = start_nodeId AND
                b.author_id = end_nodeId AND 
                NOT EXISTS((a)-[:_{uid}_{_id}]->(b))
            CREATE (a)-[:_{uid}_{_id} """.format(uid=project_uid, _id=self._id)
        query += """ {score: score}]->(b)"""
        db.run(query, parameters={"path": path})
예제 #12
0
def get_num_author(uid: int, type_db: str) -> int:
    query = """
        MATCH (a:Author)
        WHERE a.{type_db}_{uid}=TRUE
        RETURN COUNT(DISTINCT a) AS num_au
    """.format(uid=uid, type_db=type_db)
    num_au = 0
    for row in db.run(query):
        num_au = int(row[0])
    return num_au
    def get_author(self,
                   author_id: int,
                   project_uid: int,
                   model_id: int,
                   similarity=None,
                   source_id=None) -> Author:
        query = """
            MATCH (a:Author)
            WHERE a.prior_{uid}=TRUE AND
                a.author_id= $aId
            RETURN a.author_id,
                a.name""".format(uid=project_uid, _id=model_id)

        parameters = {
            "aId": int(author_id),
        }

        if (source_id is not None):
            query += ", EXISTS((a)-[:prior_{uid}]-(:Author ".format(
                uid=project_uid)
            query += "{ author_id: $source_id })) AS acquaintance"
            query += ", EXISTS((a)-[:test_{uid}]-(:Author ".format(
                uid=project_uid)
            query += "{ author_id: $source_id })) AS real_connected"

            parameters["source_id"] = int(source_id)

        result = {
            "author_id": None,
            "name": None,
            "similarity": None,
            "acquaintance": None,
            "real_connected": None
        }

        for row in db.run(query, parameters=parameters):
            result["author_id"] = int(row[0])
            result["name"] = row[1]
            result["similarity"] = similarity
            if (source_id is not None):
                result["acquaintance"] = row[2]
                result["real_connected"] = row[3]

        if (result["name"] is None):
            return None

        result = Author(
            result["author_id"],
            result["name"],
            result["similarity"],
            result["acquaintance"],
            result["real_connected"],
        )
        return result
    def save(self, recommendation_list: list, project_uid: int,):
        self.delete_predictions(project_uid,)

        if(len(recommendation_list) == 0):
            raise ValueError(
                "Error: No authors were found to make any prediction!!")

        query = """
                MATCH (a:Author),(b:Author)
                WHERE a.author_id=$a_id AND
                    b.author_id=$b_id
                CREATE (a)-[:_{0}_{1} 
            """.format(project_uid, self._id)
        query += "{ top:$top }]->(b)"
        for idx, auth_rec in enumerate(recommendation_list):
            a_id = auth_rec["author_id"]
            for top, b_id in enumerate(auth_rec["topK"]):
                # print("a_id", a_id, "b_id", b_id)
                if (b_id > 0):
                    db.run(query, parameters={
                        "a_id": a_id, "b_id": b_id, "top": top})
예제 #15
0
def get_all_author_id(uid: int, type_db: str) -> list:
    query = """
        MATCH (a:Author)
        WHERE a.{type_db}_{uid}=TRUE
        RETURN DISTINCT a.author_id
    """.format(uid=uid, type_db=type_db)
    num_au = get_num_author(uid, type_db=type_db)
    authors = [0 for idx in range(num_au)]

    for (idx, row)in enumerate(db.run(query)):
        authors[idx] = int(row[0])
    return authors
    def _init_new_test_collaborations(self, uid):
        try:
            query = """
                    USING PERIODIC COMMIT 1000
                    LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR ','
                    WITH line.`a` AS a_author_id,
                        line.`b` AS b_author_id 

                    MATCH (a:Author)
                    MATCH (b:Author)
                    WHERE a.author_id=toInteger(a_author_id) AND
                        b.author_id=toInteger(b_author_id)
                    """
            query += """
                CREATE (a)-[:test_{uid}]->(b)""".format(uid=uid)
            print("query", query)
            db.run(query, parameters={"path": PATH_TEST_GRAPH_DATABASE})
        except:
            raise ValueError(
                "There is no connection found. Check connection and try again !"
            )
    def _init_new_prior_authors(self, uid):
        try:
            parameters = "{ "
            parameters += " author_id:toInteger(author_id), name:name, neigh_{uid}:num_col, work_prior_{uid}:num_work, prior_{uid}:TRUE".format(
                uid=uid, )
            parameters += " })"
            query = """
                    USING PERIODIC COMMIT 1000
                    LOAD CSV WITH HEADERS FROM $path AS line FIELDTERMINATOR ','
                    WITH line.`author_id` AS author_id,
                        line.`name` AS name,
                        line.`num_work` AS num_work,
                        line.`num_col` AS num_col

                    CREATE (:Author {parameters} """.format(
                parameters=parameters)
            print("query", query)
            db.run(query, parameters={"path": PATH_AUTHORS_DATABASE})
        except:
            raise ValueError(
                "There is no connection found. Check connection and try again !"
            )
예제 #18
0
def create_prior_connection(uid,
                            start,
                            end,
                            is_directed=False,
                            is_weighted=False):
    if (is_directed == False and is_weighted == False):
        query = """
            MATCH (a:Author)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b:Author)
            WHERE a<>b AND
                EXISTS(w.title) AND
                w.year>={start} AND
                w.year<={end} AND
                NOT EXISTS((a)-[:prior_{uid}]-(b))
            CREATE (a)-[:prior_{uid}]->(b)
        """.format(uid=uid, start=start, end=end)
        db.run(query, )

    elif (is_directed == False and is_weighted == True):
        query = """
            MATCH (a:Author)-[:prior_{uid}]->(b:Author)
            WHERE a.prior_{uid}=TRUE AND
                b.prior_{uid}=TRUE
            WITH a, b

            MATCH (a)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b) 
            WHERE w.prior_{uid}=TRUE
            WITH DISTINCT a,b,
                COUNT(w) AS same_work
            WITH  a, 
                 b,
                same_work* 1.0/(a.work_prior_{uid} + b.work_prior_{uid} - same_work) AS score
            WHERE NOT EXISTS((a)-[:prior_weight_{uid}]->(b))
            CREATE (a)-[:prior_weight_{uid}""".format(uid=uid, )
        query += """{ score:score } ]->(b)"""
        db.run(query, )

    elif (is_directed == True and is_weighted == True):
        query = """
            MATCH (a:Author)-[:prior_{uid}]-(b:Author)
            WHERE a.prior_{uid}=TRUE AND
                b.prior_{uid}=TRUE
            WITH a,b

            MATCH (a)-[:UPLOAD]->(w:Work)<-[:UPLOAD]-(b) 
            WHERE w.prior_{uid}=TRUE
            WITH DISTINCT  a, 
                b,
                COUNT(w) AS same_work
            WITH DISTINCT  a, 
                b,
                same_work* 1.0/(a.work_prior_{uid}) AS score
            CREATE (a)-[:prior_dir_weight_{uid}""".format(uid=uid,
                                                          start=start,
                                                          end=end)
        query += """{ score:score } ]->(b)"""
        db.run(query, )
예제 #19
0
def isExistPaper():
    rs = None
    query = """
        MATCH (w:Work)
        RETURN 1 AS existed LIMIT 1
    """
    try:
        for row in db.run(query, ):
            rs = int(row[0])

        if (rs != 1):
            return False
    except:
        raise ValueError(
            "Error: There is an error when connecting to database.")
    return True
    def isConnect(self):
        rs = None
        query = """
            MATCH (a:Author)
            RETURN 1 AS existed LIMIT 1
        """
        try:
            for row in db.run(query, ):
                rs = int(row[0])

            if (rs != 1):
                return False

        except:
            raise ValueError(
                "There is no connection. Check connection and try again !")

        return True
    def get_authors(self, project_uid: int, model_id: int,
                    num_authors: int) -> list:
        query = """
            MATCH (a:Author)
            WHERE a.prior_{uid}=TRUE
            RETURN DISTINCT a.author_id,
                a.name LIMIT $num_authors
            """.format(uid=project_uid, _id=model_id)
        result = [None for idx_author in range(num_authors)]
        for (idx, row) in enumerate(
                db.run(query, parameters={"num_authors": int(num_authors)})):
            result[idx] = Author(
                int(row[0]),
                row[1],
            )

        result = [author for author in result if author is not None]
        if (len(result) == 0):
            return None
        return result
예제 #22
0
    def _create_csv_file(self, project_uid: int, file_path=PATH_GRAPH_NODE2VEC_NEO4J, ):
        """
            Lấy file csv từ Neo4j
            Get CSV file from Neo4j
        """
        query = """
			WITH "MATCH (a:Author)-[r:prior_{0}]->(b:Author)
				WHERE a.prior_{0}=TRUE AND b.prior_{0}=TRUE
				RETURN DISTINCT a.author_id AS au1, b.author_id AS au2" AS query
			CALL apoc.export.csv.query(query, $file_path, """.format(project_uid)

        if (self.is_weighted == True and self.is_directed == False):
            query = """
			WITH "MATCH (a:Author)-[r:prior_weight_{0}]->(b:Author)
				WHERE a.prior_{0}=TRUE AND b.prior_{0}=TRUE
				RETURN DISTINCT a.author_id AS au1, b.author_id AS au2, r.score AS weight" AS query
			CALL apoc.export.csv.query(query, $file_path, """.format(project_uid)
        elif (self.is_weighted == True and self.is_directed == True):
            query = """
			WITH "MATCH (a:Author)-[r:prior_dir_weight_{0}]->(b:Author)
				WHERE a.prior_{0}=TRUE AND b.prior_{0}=TRUE
				RETURN DISTINCT a.author_id AS au1, b.author_id AS au2, r.score AS weight" AS query
			CALL apoc.export.csv.query(query, $file_path, """.format(project_uid)

        query += """ { })
			YIELD file, rows
			RETURN file, rows
            """
        # print("query", query)
        num_rows_effective = 0

        for row in db.run(query, parameters={"file_path": file_path}):
            file_name = row[0]
            num_rows_effective = row[1]

        return (file_name, num_rows_effective)
    def valuate(self, project_uid: int) -> dict:
        true_positive = 0
        false_positive = 10
        false_negative = 10

        precision = 0
        recall = 0
        f_measure = 0

        """
            true_positive: Số liên kết thật sự đúng nhưng không được tiên đoán

                1.Tìm số liên kết cần tiên đoán tại mỗi nút num_col,  num_col <= 5
                2.Số liên kết thật sự đúng và được chưa tiên đoán trong giới hạn num_col =
                    num_col - Số liên kết thật sự đúng và được tiên đoán trong giới hạn num_col
            """
        query = """
                MATCH (a:Author)-[r:_{0}_{1}]->(lily:Author)
                WHERE a.test_{0} = TRUE
                    AND EXISTS((a)-[:test_{0}]-(lily))
                RETURN COUNT(r) AS tp
                """.format(project_uid, self._id)
        for idx, row in enumerate(db.run(query,)):
            true_positive = row[0]

        """
            false_positive: Số liên kết không đúng nhưng vẫn được tiên đoán

                + lấy số liên kết cần tiên đoán tại mỗi nút
                + xem trong topk' với k'<5 và k'=số liên kết cần tiên đoán tại mỗi nút,
                    có bao nhiêu tiên đoán sai

                    MATCH (a:Author)-[r:test_{0}]-(b:Author)
                    WITH DISTINCT a, COUNT(DISTINCT b.author_id) AS num_col
                    
                    MATCH (a)-[r:_{0}_{1}]->(lily:Author)
                    WHERE a.test_{0} = TRUE
                        AND r.top < num_col
                        AND NOT EXISTS((a)-[:test_{0}]-(lily))
                    RETURN COUNT(r) AS fp
            """
        query = """
                    MATCH (a:Author)-[r:_{0}_{1}]->(lily:Author)
                    WHERE a.test_{0} = TRUE
                        AND NOT EXISTS((a)-[:test_{0}]-(lily))
                    RETURN COUNT(r) AS fp
                    """.format(project_uid, self._id)
        for idx, row in enumerate(db.run(query,)):
            false_positive = row[0]

        """
            false_negative: Số liên kết thật sự đúng nhưng không được tiên đoán

                1.Tìm số liên kết cần tiên đoán tại mỗi nút num_col,  num_col <= TopK
                2.Số liên kết thật sự đúng và được chưa tiên đoán trong giới hạn num_col =
                    min(num_col, TopK) - Số liên kết thật sự đúng và được tiên đoán trong giới hạn num_col
            """
        query = """
                MATCH (a:Author)-[r:test_{0}]-(lily:Author)
                WHERE a.test_{0} = TRUE
                    AND NOT EXISTS((a)-[:_{0}_{1}]->(lily))
                RETURN COUNT(r) AS fn
                """.format(project_uid, self._id)
        for idx, row in enumerate(db.run(query,)):
            false_negative = row[0]

        print("true_positive", true_positive, "false_positive",
              false_positive, "false_negative", false_negative)
        precision = true_positive * 1.0/(true_positive + false_positive)
        recall = true_positive * 1.0/(true_positive + false_negative)
        f_measure = 2*precision * recall * 1.0/(precision + recall)
        print("precision", precision, "recall", recall, "f_measure", f_measure)

        self.valuations = {
            "precision": round(precision, 2),
            "recall": round(recall, 2),
            "fmeasure": round(f_measure, 2),
        }

        return copy.deepcopy(self.valuations)
예제 #24
0
def delete_connection(type_db):
    query = """
        MATCH (:Author)-[r:{type_db}_{uid}]-(:Author)
        DELETE r
    """.format(uid=uid, type_db=type_db)
    db.run(query, )
 def delete_predictions(self, project_uid):
     query = """
             MATCH (:Author)-[r:_{project_uid}_{algorithm_id}]-(:Author)
             DELETE r
         """.format(project_uid=project_uid, algorithm_id=self._id)
     db.run(query)