from coverage_consensus_diversity import coverage, consensus, get_fragment_boundaries
from minor_variant import trim_ac
from helpers import name_translations


if __name__ == '__main__':
    #ntt = name_translations('name_translation_table.tsv')
    #labid = '16CA403716'
    #name = ntt[labid]
    sample_location = 'samples_by_strain/'
    name = "EVD68_SWE_029_160904_NFLG"
    fs=16

    ac, ins = load_allele_counts(sample_location+name+"/")
    primer_boundaries = get_fragment_boundaries('primers.csv', ac)
    cov = coverage(ac[0][1])
    ref='KX675261.1'

    plt.figure(figsize=(8,4))
    plt.plot(cov, lw=3)
    for p in primer_boundaries[ref]:
        y = 50 if int(p[1])%2 else 70
        plt.plot([primer_boundaries[ref][p]['start'], primer_boundaries[ref][p]['end']],[y,y], lw=7, c=(0.7, 0.7, 0.7))

    plt.xlabel('position in genome', fontsize=fs)
    plt.ylabel('coverage', fontsize=fs)
    plt.ylim(30,10000)
    plt.yscale('log')
    plt.tick_params(labelsize=0.8*fs)
    plt.title(name, fontsize=fs)
    plt.tight_layout()
Пример #2
0
        primer_masks = get_primer_mask(args.primers, ac)

    freqs = trim_ac(ac)

    sample = args.sample.split('/')[-1]

    major_freq = {ref:np.max(x, axis=0) for ref, x in freqs.items()}

    minor_seqs = {}
    any_minors = False
    seqs = []
    from Bio import SeqIO, SeqRecord, Seq
    for ref, counts in ac:
        print("ref", ref)
        consensus_seq = consensus(counts, min_cov=args.min_cov)
        cov = coverage(counts)
        div_pos = np.where((major_freq[ref]<1.0-args.min_freq)&(cov>args.min_cov))[0]
        alterations = []
        insertions_to_include = []
        for pos in div_pos:
            tmp_freqs = freqs[ref][:, pos]
            if sorted(tmp_freqs)[-2]>args.min_freq:
                ii = np.argsort(tmp_freqs)[-2]
                alterations.append([pos, nuc_alpha[ii], tmp_freqs[ii]])

        if alterations:
            print(sample, ref, 'minor variants', alterations)
            consensus_seq[[p for p,n,f in alterations]] = [n for p,n,f in alterations]
            any_minors = True

        for pos in ins[ref]:
        }
    }

    days = {
        'SWE_012': '2 days',
        'SWE_021': '7 days',
        'SWE_024': '1 day',
        'SWE_037': '1 day',
        'SWE_039': '0 days'
    }

    for pt in samps:
        for key in samps[pt]:
            ac, ins = load_allele_counts(sample_location + samps[pt][key])
            sample = pt + '-' + key
            cov[sample] = coverage(ac[0][1])
            freqs[sample] = trim_ac(ac, n_states=5)
            major_freqs[sample] = {
                ref: np.max(x, axis=0)
                for ref, x in freqs[sample].items()
            }

##################################
##################################

#positions not to plot
#These positions are just before CDS, and show up in
#4/5 of these samples....
    exclude = [690, 694]

    fs = 12
from minor_variant import trim_ac
from helpers import add_panel_label, name_translations

if __name__ == '__main__':

    freqs = {}
    major_freqs = {}
    major_seqs = {}
    cov = {}
    snames = ['16CA514285', '16CA403717', '14CA515617']
    ntt = name_translations('name_translation_table.tsv')

    for sname in snames:
        ac, ins = load_allele_counts('mapped_data/' + sname)
        primer_boundaries = get_fragment_boundaries('primers.csv', ac)
        cov[sname] = coverage(ac[0][1])
        freqs[sname] = trim_ac(ac, n_states=5)
        major_freqs[sname] = {
            ref: np.max(x, axis=0)
            for ref, x in freqs[sname].items()
        }
        major_seqs[sname] = {
            ref: nuc_alpha[np.argmax(x, axis=0)]
            for ref, x in freqs[sname].items()
        }

    min_cov = 1000
    ref = 'KX675261.1'
    cp_labels = {0: 'non coding', 1: '1st', 2: '2nd', 3: '3rd'}
    variable_sites = {}
    fig, axs = plt.subplots(len(snames),