def plot_cysteine_positions(infile, outfile, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    data = find_cysteine_positioning(folders, outfile, regex, frequency)
    arr = np.array(data)
    n = len(data)
    ind = np.arange(n)
    font = fm.FontProperties(family = 'Sans-Serif', size = 20)
    font2 = fm.FontProperties(family = 'Sans-Serif', size = 25)
    font1 = fm.FontProperties(family = 'Sans-Serif', size = 15)
    values = range(9)
    c = cm = plt.get_cmap('CMRmap') 
    cNorm  = colors.Normalize(vmin=0, vmax=values[-1])
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=c)
    width = 0.35
    fig, ax = plt.subplots(figsize = (20,10))
    start_bars = ax.bar(ind + 0.15, arr[:,1], width, color =  scalarMap.to_rgba(values[2]))
    stop_bars = ax.bar(ind + 0.5, arr[:,3], width, color= scalarMap.to_rgba(values[4]))
    ax.set_ylabel('mean distance of cysteine from start/end of CDR3 as a fraction of CDR3 length', fontproperties = font1)
    ax.set_xlabel('CDR3 length (amino acids)', fontproperties = font2)
    ax.set_xticks(ind + 0.5)
    ax.set_xticklabels([int(i) for i in arr[:,0]])
    for label in (ax.get_xticklabels() + ax.get_yticklabels()):
        label.set_fontproperties(font)
    ax.legend((start_bars[0], stop_bars[0]),('5\' cysteine', '3\' cysteine'), prop = font2)
    plt.savefig(outfile + '.png')
def plot_space(infile, outfile, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    data = space(folders, outfile, regex, frequency)
    arr = np.array(data)
    fig = plt.figure(facecolor = 'white')
    values = range(6)
    c = cm = plt.get_cmap('CMRmap') 
    cNorm  = colors.Normalize(vmin=0, vmax=values[-1])
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=c)
    plt.ylabel('relative frequency of cysteine spacing')
    plt.xlabel('CDR3 length (AA)')
    plt.plot(arr[:,0], arr[:,1], color =  scalarMap.to_rgba(values[0]), label = '0-2',linewidth=1.5)
    plt.scatter(arr[:,0], arr[:,1], color =  scalarMap.to_rgba(values[0]))
    plt.plot(arr[:,0], arr[:,2], color =  scalarMap.to_rgba(values[1]), label = '3', linewidth=1.5)
    plt.scatter(arr[:,0], arr[:,2], color =  scalarMap.to_rgba(values[1]))
    plt.plot(arr[:,0], arr[:,3], color =  scalarMap.to_rgba(values[2]), label = '4', linewidth=1.5)
    plt.scatter(arr[:,0], arr[:,3], color =  scalarMap.to_rgba(values[2]))
    plt.plot(arr[:,0], arr[:,4], color =  scalarMap.to_rgba(values[3]), label = '5-6', linewidth=1.5)
    plt.scatter(arr[:,0], arr[:,4], color =  scalarMap.to_rgba(values[3]))
    plt.plot(arr[:,0], arr[:,5], color =  scalarMap.to_rgba(values[4]), label = '>7', linewidth=1.5)
    plt.scatter(arr[:,0], arr[:,5], color =  scalarMap.to_rgba(values[4]))
    lgd = plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.xlim(10,31)
    plt.ylim([0,1])
    plt.savefig(outfile + '.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
示例#3
0
def run_shared_motifs(infile, outfile, germline, threshold, regex = None, frequency = False):
        if frequency:
            outfile = outfile + '_freq'
        folders = helper_functions.create_list(infile)
        font = fm.FontProperties(family = 'Sans-Serif', size = 20)
        font2 = fm.FontProperties(family = 'Sans-Serif', size = 25)
        data = shared_motifs(folders, outfile, germline, threshold, regex, frequency)
        arr = np.array(data)
        values = range(9)

        c = cm = plt.get_cmap('CMRmap') 
        cNorm  = colors.Normalize(vmin=0, vmax=values[-1])
        scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=c)
        fig = plt.figure(facecolor = 'white')
        fig.set_size_inches(10, 8)
        colorVal = scalarMap.to_rgba(values[0]) 
        plt.plot(arr[:,0],arr[:,1], color = colorVal, linewidth = 2.0)
        plt.scatter(arr[:,0],arr[:,1], color = colorVal)
        ax = plt.gca()
        ax.set_xlim(0,29)
        ax.set_ylim(0,2600)
        ax.set_xlabel('number of individuals', fontproperties = font2)
        ax.set_ylabel('number of common motifs',fontproperties = font2)
        for label in (ax.get_xticklabels() + ax.get_yticklabels()):
            label.set_fontproperties(font)
        yticks = ax.yaxis.get_major_ticks()
        yticks[0].label1.set_visible(False)
        
        plt.savefig(outfile + '.png')
def plot_conserved_cysteines(infile, outfile, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    data = conserved_cysteines(folders, outfile, regex, frequency)
    arr = np.array(data)
    fig = plt.figure(facecolor = 'white')
    plt.plot(arr[:,0], arr[:,1])
    plt.ylabel('relative frequency of cysteine conservation')
    plt.xlabel('CDR3 length')
    plt.ylim([0,1.2])
    plt.savefig(outfile + '.png')
示例#5
0
def plot_vsegs(infile, outfile, condition, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    data = analyse_vsegs(folders, outfile, condition, regex, frequency)
    ind = np.arange(len(data[0]))
    labels = ['IGHV1','IGHV2','IGHV3','IGHV4','IGHV5','IGHV6','IGHV7']
    fig = plt.figure(facecolor = 'white')
    plt.bar(ind, data[0], yerr = data[1], align = 'center')
    plt.ylim([0,0.8])
    plt.ylabel('relative frequency of V-gene')
    plt.xticks(ind, labels, rotation=90, rotation_mode="anchor")
    plt.tick_params(axis = 'x', pad = 30, which = 'both', bottom = 'off', top = 'off')
    plt.tight_layout()
    plt.savefig(outfile + '.png')                  
def plot_cysteines_v_length(infile, outfile, regex=None, frequency=False):
    if frequency:
        outfile = outfile + "_freq"
    font = fm.FontProperties(family="Sans-Serif", size=20)
    font2 = fm.FontProperties(family="Sans-Serif", size=25)
    folders = helper_functions.create_list(infile)
    data = cysteines_v_length(folders, outfile, regex, frequency)
    values = range(9)
    data1 = gen_rand_seqs.generate_random_seqs_2(range(1, 40), 1000000)
    data2 = gen_rand_seqs.generate_random_seqs_3(range(1, 40), 1000000)
    jet = cm = plt.get_cmap("CMRmap")
    cNorm = colors.Normalize(vmin=0, vmax=values[-1])
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
    colorVal = scalarMap.to_rgba(values[4])
    colorVal1 = scalarMap.to_rgba(values[0])
    colorVal2 = scalarMap.to_rgba(values[2])
    arr = np.array(data)
    arr1 = np.array(data1)
    arr2 = np.array(data2)
    fig, ax1 = plt.subplots()
    fig.set_size_inches(10, 8)
    ax1.plot(arr[:, 0], arr[:, 1], ls="--", color=colorVal2)
    ax1.scatter(arr[:, 0], arr[:, 1], color=colorVal2)
    plt.ylim(0, 0.14)
    ax1.set_xlabel("CDR3 length (amino acids)")
    ax1.set_ylabel("relative frequency (n > 100)", color=colorVal2)
    for t1 in ax1.get_yticklabels():
        t1.set_color(colorVal2)
        t1.set_fontproperties(font)
    ax2 = ax1.twinx()
    ax2.plot(arr[:, 0], arr[:, 2], color=colorVal)
    ax2.scatter(arr[:, 0], arr[:, 2], color=colorVal)
    ax2.plot(arr1[:, 0], arr1[:, 1], color=colorVal, linestyle=":", label="simulation 1")
    ax2.plot(arr2[:, 0], arr2[:, 1], color=colorVal, linestyle="-.", label="simulation 2")
    plt.ylim(0, 0.5)
    plt.xlim(0, 40)
    plt.legend(prop=font2)
    ax2.set_ylabel("relative frequency of CDR3s containing two or more cysteines", color=colorVal)
    for t2 in ax2.get_yticklabels():
        t2.set_color(colorVal)
        t2.set_fontproperties(font)
    for t in ax1.get_xticklabels():
        t.set_fontproperties(font)
    for t in ax2.get_xticklabels():
        t.set_fontproperties(font)
    plt.savefig(outfile + ".png")
def plot_n_nucleotides(infile, outfile, condition, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    data = n_nucleotides(folders, outfile, condition, regex, frequency)
    arr = np.array(data)
    ind = np.arange(len(data))
    width = 0.35
    fig, ax = plt.subplots()
    n1_bars = ax.bar(ind, arr[:,1], width, color = 'b')
    n2_bars = ax.bar(ind + width, arr[:,2], width, color = 'r')
    ax.set_ylabel('number of n nucleotides')
    ax.set_ylim([0,60])
    ax.set_xlabel('CDR3 length')
    ax.set_xticks(ind + width)
    ax.set_xticklabels([int(i) for i in arr[:,0]])
    plt.setp( ax.xaxis.get_majorticklabels(), rotation=90 )
    ax.legend((n1_bars[0], n2_bars[0]),('n1', 'n2'))
    plt.savefig(outfile + '.png')                                                                                 
示例#8
0
def plot_dsegs(infile, outfile, condition, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    data = analyse_dsegs(folders, outfile, condition, regex, frequency)
    print len(data)
    ind = np.arange(len(data))
    labels = ['IGHD1-1','IGHD1-7','IGHD1-14','IGHD1-20','IGHD1-26',\
            'IGHD2-2','IGHD2-8','IGHD2-15','IGHD2-21',\
            'IGHD3-3','IGHD3-9','IGHD3-10','IGHD3-16','IGHD3-22',\
            'IGHD4-4/4-11','IGHD4-17','IGHD4-23',\
            'IGHD5-5/5-18','IGHD5-12','IGHD5-24',\
            'IGHD6-6','IGHD6-13','IGHD6-19','IGHD6-25','IGHD7-27','none']
    print len(ind)
    fig = plt.figure(facecolor = 'white')
    plt.bar(ind, data)
    plt.ylim([0,0.5])
    plt.ylabel('relative frequency of D-gene')
    plt.xticks(ind, labels, rotation=90, rotation_mode="anchor")
    plt.tick_params(axis = 'x', pad = 40, which = 'both', bottom = 'off', top = 'off')
    plt.tight_layout()
    plt.savefig(outfile + '.png')
def run_count(infile, outfile, condition, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    count(folders, outfile, condition, regex, frequency)
def plot_substitutions(infile, outfile, germline, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    amino_acids = ['P','G','A','V', 'L', 'I', 'F', 'Y', 'W',\
        'M', 'C', 'S', 'T', 'N', 'Q', 'H', 'K', 'R', 'D', 'E']
    seq = seq_dict[germline]
    folders = helper_functions.create_list(infile)
    data = substitutions(folders, outfile, germline, regex, frequency)
    if not data:
        return
    arr = np.array(data[0])
    ind = np.arange(4)
    values = range(4)
    c = cm = plt.get_cmap('CMRmap') 
    cNorm  = colors.Normalize(vmin=0, vmax=values[-1])
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=c)
    font = fm.FontProperties(family = 'Sans-Serif', size = 25)
    font1 = fm.FontProperties(family = 'Sans-Serif', size = 15)
    font2 = fm.FontProperties(family = 'Sans-Serif', size = 50)

    prob_list_d = [1.509482E-01, 2.112400E+00, 1.241095E+00, 4.076074E-01, 3.020502E-02, 3.916999E-02, 1.031734E-01,\
                   9.078338E-01, 1.423897E-02, 3.357577E-02, 2.340019E-02, 5.822988E-01, 1.516881E-01, 7.185182E+00,\
                   1.843856E-01, 1.126995E+01, 6.622772E-02, 5.795374E-02, 0, 7.426619E+00]
    total_d = sum(prob_list_d)
    d_ab = [prob/float(total_d) for prob in prob_list_d]
    prob_list_f = [3.556198E-01, 1.984772E-01, 1.130146E-01, 1.643946E+00, 3.390618E+00, 2.714705E+00, 0, 7.074464E+00,\
                   2.199856E-01, 3.947940E+00, 1.993818E-01, 9.813064E-01, 3.674486E-02, 1.955446E-01, 1.496610E-03,\
                   5.642309E+00, 4.649035E-03, 8.208677E-02, 1.031734E-01, 5.288625E-02]
    total_f = sum(prob_list_f)
    f_ab = [prob/float(total_f) for prob in prob_list_f]
    prob_list_g = [7.915056E-02, 0, 1.851951E+00, 4.228200E-01, 2.157936E-02, 3.560482E-02, 1.984772E-01,\
                   2.295842E-01, 1.386853E-01, 2.187264E-02, 5.891123E-02, 9.984215E-01, 7.862767E-02,\
                   4.868901E-01, 5.516340E-02, 9.139518E+00, 5.504400E-02, 1.089400E+00, 2.112400E+00, 1.389370E+00]
    total_g = sum(prob_list_g)
    g_ab = [prob/float(total_g) for prob in prob_list_g]
    prob_list_i = [5.016568E-02, 3.560482E-02, 1.140412E-01, 7.926675E+00, 1.601123E+00, 0, 2.714705E+00, 2.903165E-01, 6.318748E-02,\
                   1.096748E+01, 6.594967E-04, 6.007607E-01, 3.087152E+00, 1.762721E+00, 6.712736E-06, 4.706586E+00, 5.059793E-01,\
                   3.245175E-01, 3.916999E-02, 1.029959E-04]
    total_i = sum(prob_list_i)
    i_ab = [prob/float(total_i) for prob in prob_list_i]
    prob_list_l = [1.149931E+00, 2.157936E-02, 6.969101E-02, 3.595310E+00, 0, 1.601123E+00, 3.390618E+00, 1.521320E-01, 3.378544E-01,\
                   5.647985E+00, 6.079219E-03, 1.580539E-01, 5.702792E-01, 2.769442E-02, 6.802781E-01, 5.879103E+00, 2.158451E-02, \
                   3.932002E-01, 3.020502E-02, 1.283121E-03]
    total_l = sum(prob_list_l)
    l_ab = [prob/float(total_l) for prob in prob_list_l]
    prob_list_n = [7.342554E-03, 4.868901E-01, 9.291290E-02, 4.931206E-02, 2.769442E-02, 1.762721E+00, 1.955446E-01,\
                   2.622763E+00, 7.310937E-04, 3.972173E-02, 1.374268E-06, 7.348346E+00, 2.147175E+00, 0, 8.705989E-02,\
                   1.405444E+01, 6.190318E+00, 7.829130E-01, 7.185182E+00, 5.038373E-01]
    total_n = sum(prob_list_n)
    n_ab = [prob/float(total_n) for prob in prob_list_n]
    prob_list_s = [1.284651E+00, 9.984215E-01, 9.988358E-01, 7.477041E-02, 1.580539E-01, 6.007607E-01, 9.813064E-01,\
                8.104751E-01, 1.385142E-01, 1.861354E-02, 2.639482E-01, 0, 3.058575E+00, 7.348346E+00, 5.906405E-04,\
                5.439116E+00, 8.688405E-02, 1.926435E+00, 5.822988E-01, 6.776709E-02]
    total_s = sum(prob_list_s)
    s_ab = [prob/float(total_s) for prob in prob_list_s]
    prob_list_t = [9.057112E-01, 7.862767E-02, 2.912317E+00, 2.166054E-01, 5.702792E-01, 3.087152E+00, 3.674486E-02,\
                   9.984255E-02, 1.412361E-02, 1.415612E+00, 3.225214E-06, 3.058575E+00, 0, 2.147175E+00, 1.202094E-01,\
                   3.443285E+00, 1.039298E+00, 1.135258E+00, 1.516881E-01, 6.016624E-02]
    total_t = sum(prob_list_t)
    t_ab = [prob/float(total_t) for prob in prob_list_t]
    prob_list_v = [2.217442E-01, 4.228200E-01, 3.774477E+00, 0, 3.595310E+00, 7.926675E+00, 1.643946E+00, 5.010635E-01,\
                   9.663569E-02, 4.396720E+00, 2.243512E-02, 7.477041E-02, 2.166054E-01, 4.931206E-02, 9.047737E-03,\
                   6.890244E+00, 3.493440E-02, 1.366145E-01, 4.076074E-01, 5.795409E-01]
    total_v = sum(prob_list_v)
    v_ab = [prob/float(total_v) for prob in prob_list_v]
    prob_list_w = [5.516074E-03, 1.386853E-01, 7.939549E-02, 9.663569E-02, 3.378544E-01, 6.318748E-02, 2.199856E-01,\
                   6.121284E-01, 0, 1.011149E-01, 4.440833E-01, 1.385142E-01, 1.412361E-02, 7.310937E-04, 4.332983E-05,\
                   7.013890E+00, 8.024263E-03, 5.724286E-01, 1.423897E-02, 2.252612E-02]
    total_w = sum(prob_list_w)
    w_ab = [prob/float(total_w) for prob in prob_list_w]

    prob_list_d_lg = [0.394456, 0.844926, 0.395144, 0.037967, 0.015076, 0.010690, 0.017416, 0.135107, 0.029890,\
                      0.025548, 0.062556, 1.240275, 0.425860, 5.076149, 0.523386, 0.927114, 0.282959, 0.123954, 0, 5.243870]    
    total_d_lg = sum(prob_list_d_lg)
    d_lg = [prob/float(total_d_lg) for prob in prob_list_d_lg]
    prob_list_f_lg = [0.094464, 0.089586, 0.253701, 0.654683, 2.592692, 1.112727, 0, 7.803902, 2.457121, 1.798853,\
                      1.105251, 0.361819, 0.165001, 0.089525, 0.035855, 0.682139, 0.023918, 0.052722, 0.017416, 0.018811]
    total_f_lg = sum(prob_list_f_lg)
    f_lg = [prob/float(total_f_lg) for prob in prob_list_f_lg]
    prob_list_g_lg = [0.196961, 0, 2.066040, 0.076701, 0.044261, 0.008705, 0.089586, 0.054679, 0.268491, 0.139538,\
                      0.569265, 1.739990, 0.129836, 1.437645, 0.267959, 0.311484, 0.296636, 0.390192, 0.844926, 0.348847]
    total_g_lg = sum(prob_list_g)
    g_lg = [prob/float(total_g_lg) for prob in prob_list_g_lg]
    prob_list_i_lg = [0.078281, 0.008705, 0.149830, 10.649107, 4.145067, 0, 1.112727, 0.232523, 0.111660, 4.273607, 0.320627,\
                      0.064105, 1.033739, 0.191503, 0.072854, 0.108882, 0.159069, 0.126991, 0.010690, 0.044265]
    total_i_lg = sum(prob_list_i_lg)
    i_lg = [prob/float(total_i_lg) for prob in prob_list_i_lg]
    prob_list_l_lg = [0.249060, 0.044261, 0.395337, 1.702745, 0, 4.145067, 2.592692, 0.299648, 0.619632, 6.312358, 0.594007,\
                      0.182287, 0.302936, 0.068427, 0.582457, 0.366317, 0.137500, 0.301848, 0.015076, 0.069673]
    total_l_lg = sum(prob_list_l_lg)
    l_lg = [prob/float(total_l_lg) for prob in prob_list_l_lg]
    prob_list_n_lg = [0.161787, 1.437645, 0.276818, 0.083688, 0.068427, 0.191503, 0.089525, 0.612025, 0.045376,\
                      0.371004, 0.528768, 4.008358, 2.000679, 0, 1.695752, 4.509238, 2.145078, 0.751878, 5.076149, 0.541712]
    total_n_lg = sum(prob_list_n_lg)
    n_lg = [prob/float(total_n_lg) for prob in prob_list_n_lg]
    prob_list_s_lg = [1.338132, 1.739990, 4.727182, 0.098369, 0.182287, 0.064105, 0.361819, 0.400547, 0.248862,\
                      0.346960, 2.784478, 0, 6.472279, 4.008358, 1.223828, 0.990012, 0.748683, 0.858151, 1.240275, 0.611973]
    total_s_lg = sum(prob_list_s_lg)
    s_lg = [prob/float(total_s_lg) for prob in prob_list_s_lg]
    prob_list_t_lg = [0.571468, 0.129836, 2.139501, 2.188158, 0.302936, 1.033739, 0.165001, 0.245841, 0.140825,\
                      2.020366, 1.143480, 6.472279, 0, 2.000679, 1.080136, 0.584262, 1.136863, 0.578987, 0.425860, 0.604545]
    total_t_lg = sum(prob_list_t_lg)
    t_lg = [prob/float(total_t_lg) for prob in prob_list_t_lg]
    prob_list_v_lg = [0.296501, 0.076701, 2.547870, 0, 1.702745, 10.649107, 0.654683, 0.249313, 0.189510,\
                      1.898718, 1.959291, 0.098369, 2.188158, 0.083688, 0.210332, 0.119013, 0.185202, 0.170887,\
                      0.037967, 0.245034]
    total_v_lg = sum(prob_list_v_lg)
    v_lg = [prob/float(total_v_lg) for prob in prob_list_v_lg]
    prob_list_w_lg = [0.095131, 0.268491, 0.180717, 0.189510, 0.619632, 0.111660, 2.457121, 3.151815, 0, 0.696175,\
                      0.670128, 0.248862, 0.140825, 0.045376, 0.236199, 0.597054, 0.049906, 0.593607, 0.029890, 0.077852]
    total_w_lg = sum(prob_list_w_lg)
    w_lg = [prob/float(total_w_lg) for prob in prob_list_w_lg]

    
    ab = {'D': d_ab, 'F' : f_ab, 'G' : g_ab, 'I': i_ab, 'L': l_ab, 'N': n_ab, 'S': s_ab, 'T': t_ab, 'V': v_ab, 'W': w_ab}
    lg = {'D': d_lg, 'F' : f_lg, 'G' : g_lg, 'I': i_lg, 'L': l_lg, 'N': n_lg, 'S': s_lg, 'T': t_lg, 'V': v_lg, 'W': w_lg}
    fig = plt.figure(facecolor = 'white')
    plt.bar(ind, arr[:,0], yerr = arr[:,1], align = 'center')
    labels = [1,2,3,4]
    width = 0.3
    plt.xlabel('position')
    plt.ylabel('relative frequency of mutations')
    plt.xticks(ind, labels)
    plt.ylim([0,0.6])
    plt.savefig(outfile + '_' + str(data[1]) + '_mutations.png')
    fig, (ax0, ax1, ax2, ax3) = plt.subplots(4, sharex=True, sharey=False, facecolor = 'white', figsize = (10,10))
    ind2 = np.arange(20)
    plt.setp((ax0, ax1, ax2, ax3), xticks = ind2, xticklabels = amino_acids) 
    c1 = color = scalarMap.to_rgba(values[0])
    c2 = color = scalarMap.to_rgba(values[1])
    c3 = color = scalarMap.to_rgba(values[2])
    ax0.bar(ind2 + 0.05, arr[0,2:22], width, color = c3, label = 'observed substitutions')
    ax0.bar(ind2 + 0.35, ab[seq[0]], width, color = c2, label = 'AB model')
    ax0.bar(ind2 + 0.65, lg[seq[0]], width,color = c1, label = 'LG model')
    ax0.set_ylim([0,0.6])
    ax1.bar(ind2 + 0.05, arr[1,2:22], width, color = c3)
    ax1.bar(ind2 + 0.35, ab[seq[1]], width, color = c2)
    ax1.bar(ind2 + 0.65, lg[seq[1]], width, color = c1)
    ax1.set_ylim([0,0.6])
    ax2.bar(ind2 + 0.05, arr[2,2:22], width, color = c3)
    ax2.bar(ind2 + 0.35, ab[seq[2]], width, color = c2)
    ax2.bar(ind2 + 0.65, lg[seq[2]], width, color = c1)
    ax2.set_ylim([0,0.6])
    ax3.bar(ind2 +0.05, arr[3,2:22], width, color = c3)
    ax3.bar(ind2 + 0.35, ab[seq[3]], width, color = c2)
    ax3.bar(ind2 + 0.65, lg[seq[3]], width, color = c1)
    ax3.set_ylim([0,0.6])
    for label in (ax3.get_xticklabels()):
        label.set_fontproperties(font)
    for label in (ax0.get_yticklabels()):
        label.set_fontproperties(font1)
    for label in (ax1.get_yticklabels()):
        label.set_fontproperties(font1)
    for label in (ax2.get_yticklabels()):
        label.set_fontproperties(font1)
    for label in (ax3.get_yticklabels()):
        label.set_fontproperties(font1)
    ax0.set_ylabel('1', rotation = 0, fontproperties = font2)
    ax1.set_ylabel('2', rotation = 0, fontproperties = font2)
    ax2.set_ylabel('3', rotation = 0, fontproperties = font2)
    ax3.set_ylabel('4', rotation = 0, fontproperties = font2)
    ax0.yaxis.labelpad = 30
    ax1.yaxis.labelpad = 30
    ax2.yaxis.labelpad = 30
    ax3.yaxis.labelpad = 30
    ax0.set_xticks(ind2, minor=True)
    ax0.grid(True, which = 'minor')
    ax1.set_xticks(ind2, minor=True)
    ax1.grid(True, which = 'minor') 
    ax2.set_xticks(ind2, minor=True)
    ax2.grid(True, which = 'minor')
    ax3.set_xticks(ind2, minor=True)
    ax3.grid(True, which = 'minor')
    lgd = ax0.legend(loc='center left', bbox_to_anchor=(1, 0.5), prop = font)
    plt.xticks(ind2 + 0.5, amino_acids)
    plt.tight_layout()
    plt.savefig(outfile + '_' + str(data[1]) + '_amino_acids.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
示例#11
0
def run_lengths(infile, outfile, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    find_length_stats(folders, outfile, regex, frequency)
示例#12
0
def plot_dsegs(infile, outfile, regex=None, frequency=False):
    if frequency:
        outfile = outfile + "_freq"
    folders = helper_functions.create_list(infile)
    data = analyse_dsegs(folders, outfile, regex, frequency)
    ind = np.arange(len(data[0]))
    labels = [
        "D1-1",
        "D1-2",
        "D1-3",
        "D1-4",
        "D1-5",
        "D1-6",
        "D1-7",
        "D1-8",
        "D2-1",
        "D2-2",
        "D2-3",
        "D2-4",
        "D2-5",
        "D2-6",
        "D3-1",
        "D3-2",
        "D3-3",
        "D3-4",
        "D4-1",
        "D4-2",
        "D4-3",
        "D4-4",
        "D5-1",
        "D5-2",
        "D5-3",
        "D6-1",
        "D6-2",
        "D6-3",
        "D6-4",
        "D6-5",
        "D6-6",
        "D7-1",
        "none",
    ]
    font = fm.FontProperties(family="Sans-Serif", size=20)
    font2 = fm.FontProperties(family="Sans-Serif", size=25)
    font1 = fm.FontProperties(family="Sans-Serif", size=20, weight="bold")
    values = range(9)
    jet = cm = plt.get_cmap("CMRmap")
    cNorm = colors.Normalize(vmin=0, vmax=values[-1])
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
    fig = plt.figure(facecolor="white")
    fig.set_size_inches(10, 10)
    colorVal = scalarMap.to_rgba(values[4])
    colorVal2 = scalarMap.to_rgba(values[2])
    plt.bar(ind, data[0], color=colorVal, label="non-canonical")
    plt.bar(ind, data[1], color=colorVal2, bottom=data[0], label="canonincal")
    plt.ylim([0, 0.8])
    plt.xlim([0, 33])
    plt.xticks(ind, labels, rotation=90, rotation_mode="anchor")
    plt.tick_params(axis="x", pad=40, which="both", bottom="off", top="off")
    ax = plt.gca()
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_fontproperties(font)
    xticks = ax.xaxis.get_major_ticks()
    for t in xticks[8:14]:
        t.label1.set_fontproperties(font1)
    plt.tight_layout()
    plt.savefig(outfile + ".png")
示例#13
0
def run_gravy(infile, outfile, condition, regex = None, frequency = False):
    if frequency:
        outfile = outfile + '_freq'
    folders = helper_functions.create_list(infile)
    find_gravy_stats(folders, outfile, condition, regex, frequency)