Exemplo n.º 1
0
Plot supplementary figure showing Hah et al error rates 
against MAX EDGE values when Vespucci is built without knowledge
of RefSeq boundaries.
 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'plots')

    ax = yzer.set_up_plot()
    title = 'Benchmarking without Foreknowledge of RefSeq'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('MAX_EDGE value',
                         'Error rate defined by Hah et al. (%)')

    max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000]
    error_rates = [
        0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396,
        0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464
    ]
    error_pcts = [e * 100 for e in error_rates]
    yzer.plot(max_edges, error_pcts, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
    yzer.show_plot()
Exemplo n.º 2
0
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates')

    multipliers = [1000, 10000, 20000, 50000, 100000]
    datasets = OrderedDict()
    for m in multipliers:
        datasets[m] = pandas.read_csv(yzer.get_filename(
            dirpath, 'vs_gro5', 'gro5_count_dm_{}.txt'.format(m)),
                                      header=0,
                                      sep='\t')

    success_rates = []
    for m, data in datasets.iteritems():
        print 'Stats for DENSITY_MULTIPLIER = {}'.format(m)
        exactly_1 = sum(data['count'] == 1)
        greater_than_1 = sum(data['count'] > 1)
        success_rate = (exactly_1 - greater_than_1) / len(data)
        success_rates.append(success_rate)
        print success_rate

    ax = yzer.set_up_plot()
    title = 'Transcript initiation recapture versus DENSITY_MULTIPLIER'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('DENSITY_MULTIPLIER value',
                         'Initiation recapture rate')

    yzer.plot(multipliers, success_rates, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
    yzer.show_plot()
Exemplo n.º 3
0
if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates')

    edges = [100, 200, 500, 800, 1000, 1500, 2000, 5000]
    datasets = OrderedDict()
    for e in edges:
        datasets[e] = pandas.read_csv(yzer.get_filename(
            dirpath, 'vs_gro5', 'gro5_count_me_{}.txt'.format(e)),
                                      header=0,
                                      sep='\t')

    success_rates = []
    for e, data in datasets.iteritems():
        print 'Stats for MAX_EDGE = {}'.format(e)
        exactly_1 = sum(data['count'] == 1)
        greater_than_1 = sum(data['count'] > 1)
        success_rate = (exactly_1 - greater_than_1) / len(data)
        success_rates.append(success_rate)
        print success_rate

    ax = yzer.set_up_plot()
    title = 'Transcript initiation recapture versus MAX_EDGE'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('MAX_EDGE value', 'Initiation recapture rate')

    yzer.plot(edges, success_rates, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
    yzer.show_plot()
Exemplo n.º 4
0
    x_labels = [5] + range(11, 22)
    x_vals = range(len(x_labels))
    ctl_y = [
        100, 100, 100, 100, 87.5, 87.5, 87.5, 87.5, 75, 75, 75, 75, 62.5, 62.5,
        37.5, 37.5
    ]
    ctl_x = [0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11]
    low_y = [100, 100, 75, 75, 75, 75, 75, 50, 50, 50, 50, 50, 50, 50]
    low_x = [0, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, 11]
    med_y = [100, 100, 100, 75, 75, 50, 50, 50, 50, 25, 25, 25, 25, 25, 25]
    med_x = [0, 1, 2, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11]
    high_y = [100, 100, 100, 100, 75, 75, 50, 50, 50, 50, 50, 50, 25, 25, 25]
    high_x = [0, 1, 2, 3, 3, 4, 4, 5, 6, 7, 8, 9, 9, 10, 11]

    ax = yzer.set_up_plot()
    yzer.add_axis_labels('Week', 'Percent Normoglycemic')
    title = 'Diabetes Induction with In Vivo TDB Treatment'
    yzer.add_title(title, ax)

    pyplot.plot(ctl_x,
                ctl_y,
                'o-',
                color='black',
                label='Control (n=8)',
                linewidth=8)
    pyplot.plot(low_x,
                low_y,
                'o-',
                color='blue',
                label='Low Dose TDB (107 ug, n=4)',
                linewidth=6)