def group_novel_isoforms(new_isoforms, all_filter_passing_query_isoforms,
                         pred_samfile_path):
    pred_samfile = pysam.AlignmentFile(pred_samfile_path, "r", check_sq=False)
    query_new_isoforms = [
        q_isoform for q_isoform in all_filter_passing_query_isoforms
        if q_isoform.query_name in new_isoforms
    ]
    G = nx.Graph()
    for n in new_isoforms:
        G.add_node(n)
    print("nr new:", len(query_new_isoforms))
    for i1 in query_new_isoforms:
        for i2 in query_new_isoforms:
            if i1.query_name == i2.query_name:
                continue
            else:
                if is_same_isoform_cigar(i1, i2) and is_same_isoform_cigar(
                        i2, i1):
                    G.add_edge(i1.query_name, i2.query_name)

    print(len(list(G.nodes())))
    print(len(list(G.edges())))
    maximal_cliques = [cl for cl in nx.find_cliques(G)]

    print([len(cl) for cl in maximal_cliques])
    print(sum([len(cl) for cl in maximal_cliques]))
    print(len([len(cl) for cl in maximal_cliques]),
          "unique splice sites isoforms")

    queries_to_new = {
        q_acc: "new_isoform_" + str(i)
        for i, cl in enumerate(sorted(maximal_cliques, key=len))
        for q_acc in cl
    }
    return queries_to_new
Пример #2
0
def get_cliques(netw, node):
    g=nx.Graph(netw.subgraph(netw.neighbors(node).append(node)))
    cliques = nx.find_cliques(g)
    re=[]
    for cli in cliques:
        if node in cli and len(cli)>len(re):
            re = cli
    return re
def all_large_cliques_upto(G,limit=1000):
    GG = G.copy()
    cliq_iter = nx.find_cliques(GG)
    
    for i in range(limit):
        try:
            cliq = []
            while len(cliq)<3:
                cliq = next(cliq_iter)
                
        except StopIteration:
            break
        
        yield cliq