Exemplo n.º 1
0
def get_consensus(read_fn, init_ref, consensus_fn, consens_seq_name, 
                  hp_correction = True,
                  min_iteration = 4, 
                  max_num_reads = 150,
                  entropy_th = 0.65):
    g = construct_aln_graph_from_fasta(read_fn, init_ref, max_num_reads = max_num_reads, remove_in_del = False)
    s,c = g.generate_consensus()
    with open(consensus_fn,"w") as f:
        print >>f, ">"+consens_seq_name
        print >>f, s.upper()
    if min_iteration > 1:
        for i in range(min_iteration-2):
            g = construct_aln_graph_from_fasta(read_fn, consensus_fn, max_num_reads = max_num_reads, remove_in_del = False)
            s,c = g.generate_consensus()
            with open(consensus_fn,"w") as f:
                print >>f, ">"+consens_seq_name
                print >>f, s.upper()

        if hp_correction:
            g = construct_aln_graph_from_fasta(read_fn, consensus_fn, max_num_reads = max_num_reads, remove_in_del = False)
            s = detect_missing(g, entropy_th = entropy_th)
            with open(consensus_fn,"w") as f:
                print >>f, ">"+consens_seq_name
                print >>f, s.upper()

        g = construct_aln_graph_from_fasta(read_fn, consensus_fn, max_num_reads = max_num_reads, remove_in_del = False)
        s,c = g.generate_consensus()
        s = mark_lower_case_base(g, entropy_th = entropy_th)
        with open(consensus_fn,"w") as f:
            print >>f, ">"+consens_seq_name
            print >>f, s
Exemplo n.º 2
0
def generate_haplotype_consensus(inpute_fasta_name, ref_fasta_name, prefix, consensus_name, 
                                 hpFix = True,
                                 min_iteration = 4, 
                                 max_num_reads = 150,
                                 entropy_th = 0.65):


    normalize_fasta(inpute_fasta_name, ref_fasta_name, "%s_input.fa" % prefix)

    get_consensus("%s_input.fa" % prefix, 
                  ref_fasta_name,
                  "%s.fa" % prefix, 
                  consensus_name,
                  hp_correction = False,
                  min_iteration = min_iteration,
                  max_num_reads = max_num_reads,
                  entropy_th = entropy_th)

    g = construct_aln_graph_from_fasta("%s_input.fa" % prefix, 
                               "%s.fa" % prefix, 
                               ref_group=consensus_name, 
                               max_num_reads = max_num_reads, 
                               remove_in_del = False)

    rv, hen = read_node_vector(g, entropy_th = 0.65)
    cluster, cluster_vec = clustering_read(rv, hen, k_cluster = 2, random_seed = 42)


    with open("%s.log" % prefix, "w") as logf:
        print >>logf, len(rv)

        for k in cluster:
            print >>logf, cluster_vec[k], k, len(cluster[k])

        for k in cluster:
            for r in cluster[k]:
                print >>logf, "".join(rv[r]), k, r
    
    if len(cluster[0]) > 0:
        get_subset_reads("%s_input.fa" % prefix, cluster, 0, "%s_h1_input.fa" % prefix)
        
        #rid,s = best_template_by_blasr("%s_h1_input.fa" % prefix)
        #print rid, len(s)
        #with open("%s_h1_ref.fa" % prefix, "w") as f:
        #    print >>f ,">%s_h1_ref" % prefix
        #    print >>f, s
        get_consensus("%s_h1_input.fa" % prefix, 
                      "%s.fa" % prefix, 
                      "%s_h1.fa" % prefix, 
                      "%s_h1" % consensus_name,
                      hp_correction = hpFix,
                      min_iteration = min_iteration,
                      max_num_reads = max_num_reads)
    
    if len(cluster[1]) > 0:
        get_subset_reads("%s_input.fa" % prefix, cluster, 1, "%s_h2_input.fa" % prefix)
        
        #rid,s = best_template_by_blasr("%s_h2_input.fa" % prefix)
        #print rid, len(s)
        #with open("%s_h2_ref.fa" % prefix, "w") as f:
        #    print >>f ,">%s_h2_ref" % prefix
        #    print >>f, s
        get_consensus("%s_h2_input.fa" % prefix, 
                      "%s.fa" % prefix, 
                      "%s_h2.fa" % prefix, 
                      "%s_h2" % consensus_name,
                      hp_correction = hpFix,
                      min_iteration = min_iteration,
                      max_num_reads = max_num_reads)