Пример #1
0
def metapath_to_degree_dicts(graph, metapath):
    metapath = graph.metagraph.get_metapath(metapath)
    _, _, source_adj_mat = metaedge_to_adjacency_matrix(graph,
                                                        metapath[0],
                                                        dense_threshold=0.7)
    _, _, target_adj_mat = metaedge_to_adjacency_matrix(graph,
                                                        metapath[-1],
                                                        dense_threshold=0.7)
    source_degrees = source_adj_mat.sum(axis=1).flat
    target_degrees = target_adj_mat.sum(axis=0).flat
    source_degree_to_ind = degrees_to_degree_to_ind(source_degrees)
    target_degree_to_ind = degrees_to_degree_to_ind(target_degrees)
    return source_degree_to_ind, target_degree_to_ind
Пример #2
0
def dwpc_to_degrees(graph, metapath, damping=0.5, ignore_zeros=False):
    """
    Yield a description of each cell in a DWPC matrix adding source and target
    node degree info as well as the corresponding path count.
    """
    metapath = graph.metagraph.get_metapath(metapath)
    _, _, source_adj_mat = metaedge_to_adjacency_matrix(graph,
                                                        metapath[0],
                                                        dense_threshold=0.7)
    _, _, target_adj_mat = metaedge_to_adjacency_matrix(graph,
                                                        metapath[-1],
                                                        dense_threshold=0.7)
    source_degrees = source_adj_mat.sum(axis=1).flat
    target_degrees = target_adj_mat.sum(axis=0).flat
    del source_adj_mat, target_adj_mat

    source_path = graph.get_nodes_path(metapath.source(), file_format='tsv')
    source_node_df = pandas.read_table(source_path)
    source_node_names = list(source_node_df['name'])

    target_path = graph.get_nodes_path(metapath.target(), file_format='tsv')
    target_node_df = pandas.read_table(target_path)
    target_node_names = list(target_node_df['name'])

    row_names, col_names, dwpc_matrix = graph.read_path_counts(
        metapath, 'dwpc', damping)
    dwpc_matrix = numpy.arcsinh(dwpc_matrix / dwpc_matrix.mean())
    if scipy.sparse.issparse(dwpc_matrix):
        dwpc_matrix = dwpc_matrix.toarray()

    _, _, path_count = graph.read_path_counts(metapath, 'dwpc', 0.0)
    if scipy.sparse.issparse(path_count):
        path_count = path_count.toarray()

    row_inds, col_inds = range(len(row_names)), range(len(col_names))
    for row_ind, col_ind in itertools.product(row_inds, col_inds):
        dwpc_value = dwpc_matrix[row_ind, col_ind]
        if ignore_zeros and dwpc_value == 0:
            continue
        row = {
            'source_id': row_names[row_ind],
            'target_id': col_names[col_ind],
            'source_name': source_node_names[row_ind],
            'target_name': target_node_names[col_ind],
            'source_degree': source_degrees[row_ind],
            'target_degree': target_degrees[col_ind],
            'path_count': path_count[row_ind, col_ind],
            'dwpc': dwpc_value,
        }
        yield collections.OrderedDict(row)
Пример #3
0
def metaedge_to_data_array(graph, metaedge, dtype=numpy.bool_):
    """
    Return an xarray.DataArray that's an adjacency matrix where source nodes
    are columns and target nodes are rows.
    """
    source_node_ids, target_node_ids, adjacency_matrix = (
        metaedge_to_adjacency_matrix(graph, metaedge, dtype=dtype))

    dims = metaedge.source.identifier, metaedge.target.identifier
    coords = source_node_ids, target_node_ids

    data_array = xarray.DataArray(adjacency_matrix,
                                  coords=coords,
                                  dims=dims,
                                  name=metaedge.get_unicode_str())
    return data_array
Пример #4
0
def dwpc_to_degrees(graph,
                    metapath,
                    damping=0.5,
                    ignore_zeros=False,
                    ignore_redundant=True):
    """
    Yield a description of each cell in a DWPC matrix adding source and target
    node degree info as well as the corresponding path count.

    Parameters
    ----------
    ignore_redundant: bool
        When metapath is symmetric, only return a single orientation of a node pair.
        For example, yield source-target but not also target-source, which should have
        the same DWPC.
    """
    metapath = graph.metagraph.get_metapath(metapath)
    _, _, source_adj_mat = metaedge_to_adjacency_matrix(graph,
                                                        metapath[0],
                                                        dense_threshold=0.7)
    _, _, target_adj_mat = metaedge_to_adjacency_matrix(graph,
                                                        metapath[-1],
                                                        dense_threshold=0.7)
    source_degrees = source_adj_mat.sum(axis=1).flat
    target_degrees = target_adj_mat.sum(axis=0).flat
    del source_adj_mat, target_adj_mat

    source_path = graph.get_nodes_path(metapath.source(), file_format='tsv')
    source_node_df = pandas.read_csv(source_path, sep='\t')
    source_node_names = list(source_node_df['name'])

    target_path = graph.get_nodes_path(metapath.target(), file_format='tsv')
    target_node_df = pandas.read_csv(target_path, sep='\t')
    target_node_names = list(target_node_df['name'])

    row_names, col_names, dwpc_matrix = graph.read_path_counts(
        metapath, 'dwpc', damping)
    dwpc_matrix = numpy.arcsinh(dwpc_matrix / dwpc_matrix.mean())
    if scipy.sparse.issparse(dwpc_matrix):
        dwpc_matrix = dwpc_matrix.toarray()

    _, _, path_count = graph.read_path_counts(metapath, 'dwpc', 0.0)
    if scipy.sparse.issparse(path_count):
        path_count = path_count.toarray()

    if ignore_redundant and metapath.is_symmetric():
        pairs = itertools.combinations_with_replacement(
            range(len(row_names)), 2)
    else:
        pairs = itertools.product(range(len(row_names)), range(len(col_names)))
    for row_ind, col_ind in pairs:
        dwpc_value = dwpc_matrix[row_ind, col_ind]
        if ignore_zeros and dwpc_value == 0:
            continue
        row = {
            'source_id': row_names[row_ind],
            'target_id': col_names[col_ind],
            'source_name': source_node_names[row_ind],
            'target_name': target_node_names[col_ind],
            'source_degree': source_degrees[row_ind],
            'target_degree': target_degrees[col_ind],
            'path_count': path_count[row_ind, col_ind],
            'dwpc': dwpc_value,
        }
        yield collections.OrderedDict(row)