Exemplo n.º 1
0
def meta_node_df(meta_node):
    query = META_NODE_QUERY_TEMPLATE.format(meta_node=meta_node)
    driver = neo4j_connect()
    session = driver.session()
    data = session.run(query).data()
    df = pd.json_normalize(data)
    return df
Exemplo n.º 2
0
def query_graph(query):
    # collect to epigraph
    driver = neo4j_connect()
    session = driver.session()
    # query
    query_data = session.run(query).data()
    df = pd.json_normalize(query_data)
    return df
Exemplo n.º 3
0
def test_connect():
    query = """
        MATCH (n)
        RETURN n LIMIT 2;
    """
    driver = neo4j_connect()
    session = driver.session()
    data = session.run(query).data()
    logger.info(data)
    assert len(data) == 2
def get_disease_data():
    driver = neo4j_connect()
    session = driver.session()
    query = """
        match (d:Disease) unwind(d.efo) as mondo_efo_id return d.id as disease_id, mondo_efo_id;
    """
    query_data = session.run(query).data()
    df = pd.json_normalize(query_data)
    logger.info(df)
    return df
Exemplo n.º 5
0
def test_meta_node_index():
    query = "CALL db.indexes()"
    driver = neo4j_connect()
    with driver.session() as session:
        data = session.run(query).data()
    actual_node_indexes = {(_["labelsOrTypes"][0], _["properties"][0]) for _ in data}
    expected_node_indexes = {
        (key, value["index"]) for key, value in meta_node_dict.items()
    }
    # We would expect expected_indexes to be a subset of current_indexes
    assert len(expected_node_indexes.difference(actual_node_indexes)) == 0
def source_target_dict(meta_rel):
    query = SOURCE_TARGET_QUERY_TEMPLATE.format(meta_rel=meta_rel)
    driver = neo4j_connect()
    session = driver.session()
    data = session.run(query).data()
    source = [_["source"] for _ in data]
    target = [_["target"] for _ in data]
    res = {
        "source": [_ for sub_list in source for _ in sub_list],
        "target": [_ for sub_list in target for _ in sub_list],
    }
    return res
Exemplo n.º 7
0
def test_meta_rel_exist():
    query = """
        CALL db.relationshipTypes()
        YIELD relationshipType
        RETURN relationshipType AS label
    """
    driver = neo4j_connect()
    with driver.session() as session:
        data = session.run(query).data()
    db_meta_rel_names = set([_["label"] for _ in data])
    logger.info(f"meta_rel_names: {meta_rel_names}")
    logger.info(f"db_meta_rel_names: {db_meta_rel_names}")
    assert set(meta_rel_names) == db_meta_rel_names
Exemplo n.º 8
0
def test_meta_node_exist():
    query = """
        CALL db.labels() YIELD label RETURN label
    """
    driver = neo4j_connect()
    with driver.session() as session:
        data = session.run(query).data()
    db_meta_node_names = set([_["label"] for _ in data])
    if {"Meta"}.issubset(db_meta_node_names):
        db_meta_node_names.remove({"Meta"})
    logger.info(f"meta_node_names: {meta_node_names}")
    logger.info(f"db_meta_node_names: {db_meta_node_names}")
    assert set(meta_node_names) == db_meta_node_names
Exemplo n.º 9
0
def get_variants_from_graph():
    # collect to epigraph
    driver = neo4j_connect()
    session = driver.session()
    # query
    query = """
            match (v:Variant)
            return distinct(v._id) as id limit 100
            """
    logger.info(query)
    query_data = session.run(query).data()
    df = pd.json_normalize(query_data)
    df.to_csv(variant_data, index=False)
    copy_source_data(data_name=data_name, filename=variant_data)
    return df
Exemplo n.º 10
0
def check():
    driver = neo4j_connect()
    session = driver.session()
    # read data
    data = os.path.join(dataDir, FILE)
    df = pd.read_csv(data, sep="\t")
    print(df.head())

    ens_list = list(set(list(df["ensembl_gene_id"])))
    print(len(ens_list))

    com = """
    match (g:Gene) where g.ensembl_id in {ens_list} return g.ensembl_id;
    """.format(ens_list=ens_list)

    # print(com)
    result = session.run(com)
    res_df = pd.DataFrame([dict(record) for record in result])
    print(res_df)

    # find missing
    print(set(list(df["ensembl_gene_id"])) - set(list(res_df["g.ensembl_id"])))