Exemplo n.º 1
0
def dblp_simple_test():
    path = "test_files/dblp1_test.xml"
    parse_dblp.parse(path, database_path="aip_test")
    with database.db:
        with database.db.cursor() as cursor:
            # Only the first two records should be added, with a total of 8
            # authors
            cursor.execute('''SELECT COUNT(*) FROM publications''')
            res = cursor.fetchone()[0]
            assert res == 2

            cursor.execute('''SELECT COUNT(*) FROM authors''')
            res = cursor.fetchone()[0]
            assert res == 8

            cursor.execute('''SELECT COUNT(*) FROM author_paper_pairs''')
            res = cursor.fetchone()[0]
            assert res == 8

            cursor.execute('''
            SELECT author_position FROM author_paper_pairs''')

            # flatmap from:
            # https://stackoverflow.com/questions/1077015/python-list-comprehensions-compressing-a-list-of-lists
            res = [pos for app in cursor.fetchall() for pos in app]
            # check if the author positions have been correctly added
            assert res == [1, 2, 3, 4, 1, 2, 3, 4]

    db_cleanup()
Exemplo n.º 2
0
def process_file(path):
    print(path)
    if 'local_lock' not in globals():
        globals()['local_lock'] = None

    global local_lock

    ret_value = [False]

    if local_lock is None:
        local_lock = threading.Lock()

    local_lock.acquire()

    # Filename can't start with a number
    h = 'aipdb' + str(abs(
        hash(path)))  # Use the hash of the node name as database file.
    print(h)
    tmp_path = os.path.join(tmp_folder, "{}.db".format(h))
    os.makedirs(tmp_folder, exist_ok=True)
    if "dblp.xml" in path:
        ret_value = [parse_dblp.parse(path, tmp_path)]
    elif "s2-corpus" in path:
        ret_value = [
            parse_semantic_scholar.parse_semantic_scholar_corpus_file(
                path, tmp_path)
        ]
    elif "aminer_papers" in path:
        ret_value = [parse_aminer.parse_aminer_corpus_file(path, tmp_path)]

    local_lock.release()
    return ret_value
Exemplo n.º 3
0
def process_file(path, db_file="aip"):
    if re.match(".*dblp[\w-]+\.xml", path):
        return parse_dblp.parse(path, db_file)
    elif "s2-corpus" in path:
        return parse_semantic_scholar.parse_semantic_scholar_corpus_file(
            path, db_file)
    elif "aminer_papers" in path:
        return parse_aminer.parse_aminer_corpus_file(path,
                                                     db_file,
                                                     logger_disabled=True)

    return True  # Nothing that should be done.
Exemplo n.º 4
0
def process_file(path, db_file=aip_name):
    if re.match(".*dblp[\w-]+\.xml", path):
        return parse_dblp.parse(path, db_file)
    elif "aminer_papers" in path:
        start = time.time()
        ret = parse_aminer.parse_aminer_corpus_file(path,
                                                    db_file,
                                                    logger_disabled=True)
        print("Aminer parse time:", time.time() - start)
        return ret
    elif "mag_papers" in path:
        start = time.time()
        ret = parse_mag.parse_mag_corpus_file(path,
                                              db_file,
                                              logger_disabled=True)
        print("MAG parse time:", time.time() - start)
        return ret
    elif "s2-corpus" in path:
        return parse_semantic_scholar.parse_semantic_scholar_corpus_file(
            path, db_file)

    return True  # Nothing that should be done.