def run(args):
    # Load unpermuted network.
    edge_list = load_edge_list(args.edge_list_file, unweighted=True)

    # Permute network.
    G = nx.Graph()
    G.add_edges_from(edge_list)

    if args.seed is not None:
        random.seed(args.seed)
    minimum_swaps = int(math.ceil(args.Q*G.number_of_edges()))

    if not args.connected:
        G = nx.double_edge_swap(G, minimum_swaps, 2**30)
    else:
        # If G is not connected, then we perform the connected double edge swap algorithm on a
        # largest connected component of G.
        if not nx.is_connected(G):
            G = max(nx.connected_component_subgraphs(G), key=len)

        # The current connected double edge swap algorithm does not guarantee a minimum number of
        # successful edge swaps, so we enforce it.
        current_swaps = 0
        while current_swaps<minimum_swaps:
            remaining_swaps = max(minimum_swaps-current_swaps, 100)
            additional_swaps = nx.connected_double_edge_swap(G, remaining_swaps)
            current_swaps += additional_swaps

    permuted_edge_list = G.edges()

    # Save permuted_network.
    save_edge_list(args.permuted_edge_list_file, permuted_edge_list)
def run(args):
    # Load data.
    index_to_gene, gene_to_index = load_index_gene(args.index_gene_file)
    edge_list = load_edge_list(args.edge_list_file,
                               index_to_gene,
                               unweighted=True)
    gene_to_score = load_gene_score(args.gene_score_file)

    # Find degrees of network genes in subgraph induced by scored genes.
    G = nx.Graph()
    G.add_edges_from(edge_list)
    G = G.subgraph(gene_to_score)
    largest_cc = max(nx.connected_components(G), key=len)
    G = G.subgraph(largest_cc)
    common_genes = set(G.nodes)

    degree_to_nodes = defaultdict(set)
    for node in common_genes:
        degree = G.degree(node)
        degree_to_nodes[degree].add(node)
    distinct_degrees = set(degree_to_nodes)

    # Bin genes by degree.
    bins = list()
    current_bin = list()
    for degree in sorted(distinct_degrees, reverse=True):
        current_bin += sorted(degree_to_nodes[degree])
        if len(current_bin) >= args.min_size:
            bins.append(current_bin)
            current_bin = list()
    if bins:
        bins[-1] += current_bin
    elif current_bin:
        bins.append(current_bin)

    # Save degree bins.
    with open(args.output_file, 'w') as f:
        f.write('\n'.join('\t'.join(current_bin) for current_bin in bins))
示例#3
0
def run(args):
    # Load edge list.
    edge_list = load_edge_list(args.edge_list_file)

    # Construct adjacency matrix.
    k = min(min(edge[:2]) for edge in edge_list)
    l = max(max(edge[:2]) for edge in edge_list)

    A = np.zeros((l - k + 1, l - k + 1), dtype=np.float64)
    if args.directed:
        for i, j, weight in edge_list:
            A[j - k, i - k] = weight
    else:
        for i, j, weight in edge_list:
            A[i - k, j - k] = A[j - k, i - k] = weight

    # Choose beta.
    if args.beta is None:
        beta = balanced_beta(A, args.threshold, args.num_digits)
    elif 0 < args.beta < 1:
        beta = args.beta
    else:
        raise ValueError('{} invalid; beta must satisfy 0 < beta < 1.'.format(
            args.beta))

    # Construct Hierarchical HotNet similarity matrix.
    P = hh_similarity_matrix(A, beta)

    # Save results.
    if args.output_file is not None:
        save_matrix(args.output_file, P, args.name)

    if args.beta_output_file is not None:
        fmt = '{:.' + str(args.num_digits) + 'f}'
        with open(args.beta_output_file, 'w') as f:
            f.write(fmt.format(beta))
def cut_hierarchy(edge_list_file, index_gene_file, cut_height):
    T = load_edge_list(edge_list_file)
    index_to_gene, gene_to_index = load_index_gene(index_gene_file)
    clusters = find_clusters(T, index_to_gene, cut_height)

    return clusters
def find_statistics(edge_list_file, index_gene_file, reverse=True):
    T = load_edge_list(edge_list_file)
    index_to_gene, gene_to_index = load_index_gene(index_gene_file)
    return compute_statistics(T, index_to_gene, reverse)
示例#6
0
def run(args):
    # Load data.
    if args.verbose:
        progress('Loading data...')

    assert len(args.component_files) == len(args.index_gene_files) == len(
        args.edge_list_files) == len(args.networks) == len(args.scores)

    index_to_gene_collection = dict()
    gene_to_index_collection = dict()
    edge_list_collection = dict()
    components_collection = dict()

    for network_label, score_label, index_gene_file, edge_list_file, component_file in zip(
            args.networks, args.scores, args.index_gene_files,
            args.edge_list_files, args.component_files):
        index_to_gene, gene_to_index = load_index_gene(index_gene_file)
        edge_list = set(
            frozenset(edge) for edge in load_edge_list(
                edge_list_file, index_to_gene, unweighted=True))
        components = load_components(component_file)

        index_to_gene_collection[(network_label, score_label)] = index_to_gene
        gene_to_index_collection[(network_label, score_label)] = gene_to_index
        edge_list_collection[(network_label, score_label)] = edge_list
        components_collection[(network_label, score_label)] = components

    # Process data.
    if args.verbose:
        progress('Processing data...')

    edge_to_networks = defaultdict(set)
    edge_to_scores = defaultdict(set)
    edge_to_pairs = defaultdict(set)
    edge_to_tally = defaultdict(int)

    for network_label, score_label in zip(args.networks, args.scores):
        edge_list = edge_list_collection[(network_label, score_label)]
        components = components_collection[(network_label, score_label)]
        for component in components:
            for u, v in combinations(component, 2):
                edge = frozenset((u, v))
                if edge in edge_list:
                    edge_to_tally[edge] += 1

    thresholded_edges = set(edge for edge, tally in edge_to_tally.items()
                            if tally >= args.threshold)

    G = nx.Graph()
    G.add_edges_from(thresholded_edges)
    consensus_results = sorted(sorted(
        [sorted(x) for x in nx.connected_components(G)]),
                               key=len,
                               reverse=True)

    # Save data.
    if args.verbose:
        progress('Saving data...')

    if args.consensus_node_file is not None:
        output_string = '\n'.join('\t'.join(x) for x in consensus_results)
        with open(args.consensus_node_file, 'w') as f:
            f.write(output_string)

    if args.consensus_edge_file is not None:
        output_string = '\n'.join(
            '\t'.join(x) for x in sorted(map(sorted, thresholded_edges)))
        with open(args.consensus_edge_file, 'w') as f:
            f.write(output_string)

    if args.verbose:
        progress()