def get_greedy_communities():
    greedy_pickle_path = 'CachedFiles/greedy_communities.pickle'
    if path.exists(greedy_pickle_path):
        print('Loading Greedy Communities')
        return pickle.load(open(greedy_pickle_path, 'rb'))
    else:
        print('Generating Greedy Communities')
        G, Gn = load_graphs()
        c = list(nx.algorithms.community.modularity_max.greedy_modularity_communities(G))
        # convert from 2D frozenset to 2D list
        return [[y for y in x] for x in c]
Пример #2
0
def get_reduced_gtex(reactome):
    filepath = 'CachedFiles/gtex-pandas-mygene2-genes-only.pickle'
    global GTEX_GLOBAL
    if path.exists(filepath) and False:
        print('Loading Reduced GTEX')
        GTEX_GLOBAL = None
        return pickle.load(open(filepath, 'rb'))
    else:
        print('Generating Reduced GTEX')
        exp = get_expression_for_all_genes(reactome)
        # load mygene2
        G, Gn = load_graphs()
        genes = [x for x in G.nodes if x[0:3] != 'HP:']
        exp.index = exp['Description']
        GTEX_GLOBAL = None
        exp = exp.loc[genes]
        exp = remove_filler_rows(exp)
        pickle.dump(exp, open(filepath, 'wb'), protocol=4)
        return exp
Пример #3
0
            hpos_per_gene.append(len(set(hpos)))
    print('\n')
    print(name + ' Found in StringDB: ' + str(given_graph_found_count))
    print(name + ' Not Found in StringDB: ' + str(given_graph_not_found_count))
    # the vast majority of StringDB neighbors are not found in MyGene2
    print('Neighbors Found: ' + str(found_neighbor_count))
    print('Neighbors Not Found: ' + str(not_found_neighbor_count))
    return hpos_per_gene, [given_graph_found_count,
                           given_graph_not_found_count,
                           found_neighbor_count,
                           not_found_neighbor_count]


if __name__ == "__main__":
    print('Node Expansion Analysis')
    G, Gn = load_graphs()
    Gj = load_jenkins_gene_to_pheno()
    # get the number of HPOs associated with each MyGene2 gene
    mg2_hpos_counts = get_hpos_per_gene(G)
    print('MyGene2 average number of HPOs per gene: ' + str(np.mean(np.array(mg2_hpos_counts))))
    # get the number of HPOs associated with each Jenkins gene
    jenkins_hpos_counts = get_hpos_per_gene(Gj)
    print('Jenkins average number of HPOs per gene: ' + str(np.mean(np.array(jenkins_hpos_counts))))
    # get the number of HPOs associated with each MyGene2 gene after expansion with StringDB data
    mg2_expanded_hpo_counts, mg2_stats = get_hpos_per_expanded_gene(G, 'MyGene2', False)
    print('MyGene2 Expanded average number of HPOs per gene: ' + str(np.mean(np.array(mg2_expanded_hpo_counts))))
    # get the number of HPOs associated with each expanded Jenkins gene
    jenkins_expanded_hpos_counts, jenkins_stats = get_hpos_per_expanded_gene(Gj, 'Jenkins', False)
    print('Jenkins Expanded average number of HPOs per gene: ' + str(np.mean(np.array(jenkins_expanded_hpos_counts))))

    # produce data