示例#1
0
def test_path_traversal(metapath, hetmat, tmpdir):
    """
    Test PC (path count) and DWPC (degree-weighted path count) computation
    on the random subgraph of Hetionet v1.0. Evaluates max path count
    compound-disease pair where errors are most likely to appear.
    """
    # Read graph
    graph = get_graph('random-subgraph')
    graph_or_hetmat = graph
    if hetmat:
        graph_or_hetmat = get_graph('random-subgraph', hetmat=hetmat, directory=tmpdir)
    metapath = graph.metagraph.metapath_from_abbrev(metapath)

    # Matrix computations
    rows, cols, pc_matrix = dwpc(graph_or_hetmat, metapath, damping=0)
    rows, cols, dwpc_matrix = dwpc(graph_or_hetmat, metapath, damping=0.4)

    # Find compound-disease pair with the max path count
    i, j = numpy.unravel_index(pc_matrix.argmax(), pc_matrix.shape)
    compound = rows[i]
    disease = cols[j]

    # hetnetpy.pathtools computations
    paths = hetnetpy.pathtools.paths_between(
        graph,
        source=('Compound', compound),
        target=('Disease', disease),
        metapath=metapath,
        duplicates=False,
    )
    hetnetpy_dwpc = hetnetpy.pathtools.DWPC(paths, damping_exponent=0.4)

    # Check matrix values match hetnetpy.pathtools
    assert pc_matrix[i, j] == len(paths)
    assert dwpc_matrix[i, j] == pytest.approx(hetnetpy_dwpc)
示例#2
0
def test_disease_gene_example_dwwc(dwwc_method):
    """
    Test the PC & DWWC computations in Figure 2D of Himmelstein & Baranzini
    (2015) PLOS Comp Bio. https://doi.org/10.1371/journal.pcbi.1004259.g002
    """
    graph = get_graph('disease-gene-example')
    metagraph = graph.metagraph

    # Compute GiGaD path count and DWWC matrices
    metapath = metagraph.metapath_from_abbrev('GiGaD')
    rows, cols, wc_matrix = dwwc(graph,
                                 metapath,
                                 damping=0,
                                 dwwc_method=dwwc_method)
    rows, cols, dwwc_matrix = dwwc(graph,
                                   metapath,
                                   damping=0.5,
                                   dwwc_method=dwwc_method)

    # Check row and column name assignment
    assert rows == ['CXCR4', 'IL2RA', 'IRF1', 'IRF8', 'ITCH', 'STAT3', 'SUMO1']
    assert cols == ["Crohn's Disease", 'Multiple Sclerosis']

    # Check concordance with https://doi.org/10.1371/journal.pcbi.1004259.g002
    i = rows.index('IRF1')
    j = cols.index('Multiple Sclerosis')

    # Warning: the WC (walk count) and PC (path count) are only equivalent
    # because none of the GiGaD paths contain duplicate nodes. Since, GiGaD
    # contains duplicate metanodes, WC and PC are not guaranteed to be the
    # same. However, they happen to be equivalent for this example.
    assert wc_matrix[i, j] == 3
    assert dwwc_matrix[i, j] == pytest.approx(0.25 + 0.25 + 32**-0.5)
示例#3
0
def test_CbGiGiGaD_traversal():
    """
    Test path counts and degree-weighted path counts for the CbGiGiGaD
    metapath between bupropion and nicotine dependence. These values are not
    intended to correspond to the values from the entire Hetionet v1.0. Hence,
    the expected values are generated using hetio.pathtools.
    """
    graph = get_graph('bupropion-subgraph')
    compound = 'DB01156'  # Bupropion
    disease = 'DOID:0050742'  # nicotine dependence
    metapath = graph.metagraph.metapath_from_abbrev('CbGiGiGaD')
    paths = hetio.pathtools.paths_between(
        graph,
        source=('Compound', compound),
        target=('Disease', disease),
        metapath=metapath,
        duplicates=False,
    )
    hetio_dwpc = hetio.pathtools.DWPC(paths, damping_exponent=0.4)

    rows, cols, pc_matrix = dwpc(graph, metapath, damping=0)
    rows, cols, dwpc_matrix = dwpc(graph, metapath, damping=0.4)
    i = rows.index(compound)
    j = cols.index(disease)

    assert pc_matrix[i, j] == len(paths)
    assert dwpc_matrix[i, j] == pytest.approx(hetio_dwpc)
示例#4
0
def test_disease_gene_example_conversion_to_hetmat(tmpdir):
    """
    Test converting the hetmat from Figure 2C of https://doi.org/crz8 into a
    hetmat.
    """
    graph = get_graph('disease-gene-example')
    hetmat = hetmatpy.hetmat.hetmat_from_graph(graph, tmpdir)
    assert list(graph.metagraph.get_nodes()) == list(
        hetmat.metagraph.get_nodes())

    # Test GaD adjacency matrix
    hetnet_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix(
        graph, 'GaD', dense_threshold=0)
    hetmat_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix(
        hetmat, 'GaD', dense_threshold=0)
    assert hetnet_adj[0] == hetmat_adj[0]  # row identifiers
    assert hetnet_adj[1] == hetmat_adj[1]  # column identifiers
    assert numpy.array_equal(hetnet_adj[2], hetmat_adj[2])  # adj matrices

    # Test DaG adjacency matrix (hetmat only stores GaD and must transpose)
    hetnet_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix(
        graph, 'DaG', dense_threshold=0)
    hetmat_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix(
        hetmat, 'DaG', dense_threshold=0)
    assert hetnet_adj[0] == hetmat_adj[0]  # row identifiers
    assert hetnet_adj[1] == hetmat_adj[1]  # column identifiers
    assert numpy.array_equal(hetnet_adj[2], hetmat_adj[2])  # adj matrices
示例#5
0
def test_disease_gene_example_hetmat_archiving(tmpdir):
    """
    Test archiving the hetmat corresponding to the hetnet in Figure 2C at https://doi.org/crz8.
    """
    tmpdir = pathlib.Path(tmpdir)
    graph = get_graph('disease-gene-example')
    hetmat_0_dir = tmpdir.joinpath('disease-gene-example-0.hetmat')
    hetmat = hetmatpy.hetmat.hetmat_from_graph(graph, hetmat_0_dir)

    # Test creating archive
    archive_path = hetmatpy.hetmat.archive.create_hetmat_archive(hetmat)
    with zipfile.ZipFile(archive_path) as zip_file:
        name_list = zip_file.namelist()
    expected = [
        'edges/DlT.sparse.npz',
        'edges/GaD.sparse.npz',
        'edges/GeT.sparse.npz',
        'edges/GiG.sparse.npz',
        'metagraph.json',
        'nodes/Disease.tsv',
        'nodes/Gene.tsv',
        'nodes/Tissue.tsv',
    ]
    assert name_list == expected

    # Test round-tripped hetmat has same files
    hetmat_1_dir = tmpdir.joinpath('disease-gene-example-1.hetmat')
    hetmatpy.hetmat.archive.load_archive(archive_path, hetmat_1_dir)
    match, mismatch, errors = filecmp.cmpfiles(hetmat_0_dir,
                                               hetmat_1_dir,
                                               common=expected,
                                               shallow=False)
    assert match == expected
    assert not mismatch
    assert not errors
示例#6
0
def test_dtype(metapath, dtype, dwwc_method):
    graph = get_graph('disease-gene-example')
    metapath = graph.metagraph.metapath_from_abbrev(metapath)
    rows, cols, dwpc_matrix = dwpc(graph,
                                   metapath,
                                   dtype=dtype,
                                   dwwc_method=dwwc_method)
    assert dwpc_matrix.dtype == dtype
示例#7
0
def test_disjoint_dwpc(metapath, exp_row, exp_col, exp_data, shape):
    graph = get_graph('random-subgraph')
    metapath = graph.metagraph.metapath_from_abbrev(metapath)

    row, col, dwpc_matrix = dwpc(graph, metapath)

    # expected = numpy.array(expected, dtype=numpy.float64)
    expected = sparse.coo_matrix((exp_data, (exp_row, exp_col)), shape=shape)
    assert abs(dwpc_matrix - expected).max() == pytest.approx(0, abs=1e-7)
示例#8
0
def test_dwpc_approx(metapath, relative):
    graph = get_graph('random-subgraph')
    metapath = graph.metagraph.metapath_from_abbrev(metapath)
    rows, cols, dwpc_matrix = dwpc(graph, metapath)
    rows, cols, dwpc_approx = _dwpc_approx(graph, metapath)
    rows, cols, dwwc_matrix = dwwc(graph, metapath)
    if relative == 'equal':
        assert abs(
            (dwpc_approx - dwpc_matrix)).max() == pytest.approx(0, abs=1e-7)
    else:
        assert numpy.sum((dwpc_approx - dwpc_matrix)) >= 0
    assert abs((dwwc_matrix - dwpc_approx)).max() >= 0
示例#9
0
def test_no_and_short_repeat(metapath, expected, path_type):
    exp_row, exp_col = get_nodes(metapath)
    graph = get_graph('disease-gene-example')
    metapath = graph.metagraph.metapath_from_abbrev(metapath)
    func_dict = {0: dwwc, 1: _dwpc_short_repeat}

    row, col, dwpc_matrix = func_dict[path_type](graph, metapath, damping=0.5)

    expected = numpy.array(expected, dtype=numpy.float64)
    assert abs(dwpc_matrix - expected).max() == pytest.approx(0, abs=1e-7)
    assert row == exp_row
    assert col == exp_col
示例#10
0
def test__dwpc_baba(m_path):
    graph = get_graph('disease-gene-example')
    metagraph = graph.metagraph
    metapath = metagraph.metapath_from_abbrev(m_path)

    row_sol, col_sol, adj_sol = get_baba_matrices(m_path)
    row, col, dwpc_matrix = _dwpc_baba(graph,
                                       metapath,
                                       damping=0.5,
                                       dense_threshold=0)

    assert row_sol == row
    assert col_sol == col
    assert abs(adj_sol - dwpc_matrix).max() == pytest.approx(0, abs=1e-8)
示例#11
0
def test__dwpc_baab(metapath, expected):
    exp_row, exp_col = get_nodes(metapath)
    graph = get_graph('disease-gene-example')
    metapath = graph.metagraph.metapath_from_abbrev(metapath)

    row, col, dwpc_matrix = _dwpc_baab(graph,
                                       metapath,
                                       damping=0.5,
                                       dense_threshold=1)

    expected = numpy.array(expected, dtype=numpy.float64)

    assert abs(dwpc_matrix - expected).max() == pytest.approx(0, abs=1e-7)
    assert exp_row == row
    assert exp_col == col
示例#12
0
def test__dwpc_general_case(length):
    """
    Test the functionality of dwpc_same_metanode to find DWPC
    within a metapath (segment) of metanode and metaedge repeats.
    """
    graph = get_graph('disease-gene-example')
    metagraph = graph.metagraph
    m_path = 'GiG' + length * 'iG'
    metapath = metagraph.metapath_from_abbrev(m_path)
    rows, cols, dwpc_mat = _dwpc_general_case(graph, metapath, damping=0.5)
    exp_row, exp_col, exp_dwpc = get_general_solutions(length)

    # Test matrix, row, and column label output
    assert abs(dwpc_mat - exp_dwpc).max() == pytest.approx(0, abs=1e-7)
    assert rows == exp_row
    assert cols == exp_col
示例#13
0
def test_CbGpPWpGaD_traversal():
    """
    Test path counts and degree-weighted path counts for the CbGpPWpGaD
    metapath between bupropion and nicotine dependence. Expected values from
    the network traversal methods at https://git.io/vHBh2.
    """
    graph = get_graph('bupropion-subgraph')
    compound = 'DB01156'  # Bupropion
    disease = 'DOID:0050742'  # nicotine dependence
    metapath = graph.metagraph.metapath_from_abbrev('CbGpPWpGaD')
    rows, cols, pc_matrix = dwpc(graph, metapath, damping=0)
    rows, cols, dwpc_matrix = dwpc(graph, metapath, damping=0.4)
    i = rows.index(compound)
    j = cols.index(disease)
    assert pc_matrix[i, j] == 142
    assert dwpc_matrix[i, j] == pytest.approx(0.03287590886921623)
示例#14
0
def test_dwpc(metapath, expected, dense_threshold):
    if expected is not None:
        expected = numpy.array(expected, dtype=numpy.float64)

    graph = get_graph('disease-gene-example')
    metapath = graph.metagraph.metapath_from_abbrev(metapath)
    if expected is None:
        with pytest.raises(Exception):
            dwpc(graph, metapath, damping=0.5, dense_threshold=dense_threshold)
    else:
        row, col, dwpc_matrix = dwpc(graph,
                                     metapath,
                                     damping=0.5,
                                     dense_threshold=dense_threshold)
        assert abs(expected - dwpc_matrix).max() == pytest.approx(0, abs=1e-7)
        if dense_threshold == 1:
            assert sparse.issparse(dwpc_matrix)
        else:
            assert not sparse.issparse(dwpc_matrix)
示例#15
0
def test_path_count_priority_cache(tmpdir, allocate_GB):
    """
    Test PathCountPriorityCache by runnin the same DWWC computation three times.
    """
    hetmat = get_graph('bupropion-subgraph', hetmat=True, directory=tmpdir)
    cache = hetmatpy.hetmat.caching.PathCountPriorityCache(hetmat, allocate_GB)
    hetmat.path_counts_cache = cache
    print(cache.get_stats)

    # First run
    assert sum(cache.hits.values()) == 0
    row_ids, col_ids, matrix = hetmatpy.degree_weight.dwwc(
        graph=hetmat,
        metapath='CbGpPWpGaD',
        damping=0.5,
        dwwc_method=hetmatpy.degree_weight.dwwc_recursive,
    )
    assert sum(cache.hits.values()) > 0
    if allocate_GB == 0:
        assert cache.hits['memory'] == 0
        assert cache.hits['disk'] == 0
        assert cache.hits['absent'] == 4
    elif allocate_GB > 0:
        assert cache.hits['memory'] == 0
        assert cache.hits['disk'] == 0
        assert cache.hits['absent'] == 4

    # Second run
    row_ids, col_ids, matrix = hetmatpy.degree_weight.dwwc(
        graph=hetmat,
        metapath='CbGpPWpGaD',
        damping=0.5,
        dwwc_method=hetmatpy.degree_weight.dwwc_recursive,
    )
    if allocate_GB == 0:
        assert cache.hits['memory'] == 0
        assert cache.hits['disk'] == 0
        assert cache.hits['absent'] == 8
    elif allocate_GB > 0:
        assert cache.hits['memory'] == 1
        assert cache.hits['disk'] == 0
        assert cache.hits['absent'] == 4

    # Save DWWC matrix
    path = hetmat.get_path_counts_path('CbGpPWpGaD', 'dwwc', 0.5, 'npy')
    path.parent.mkdir(parents=True)
    hetmatpy.hetmat.save_matrix(matrix, path)

    # Third run
    row_ids, col_ids, matrix = hetmatpy.degree_weight.dwwc(
        graph=hetmat,
        metapath='CbGpPWpGaD',
        damping=0.5,
        dwwc_method=hetmatpy.degree_weight.dwwc_recursive,
    )
    if allocate_GB == 0:
        assert cache.hits['memory'] == 0
        assert cache.hits['disk'] == 1
        assert cache.hits['absent'] == 8
    elif allocate_GB > 0:
        assert cache.hits['memory'] == 2
        assert cache.hits['disk'] == 0
        assert cache.hits['absent'] == 4
    print(cache.get_stats)