Exemplo n.º 1
0
if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates')

    edges = [100, 200, 500, 800, 1000, 1500, 2000, 5000]
    datasets = OrderedDict()
    for e in edges:
        datasets[e] = pandas.read_csv(yzer.get_filename(
            dirpath, 'vs_gro5', 'gro5_count_me_{}.txt'.format(e)),
                                      header=0,
                                      sep='\t')

    success_rates = []
    for e, data in datasets.iteritems():
        print 'Stats for MAX_EDGE = {}'.format(e)
        exactly_1 = sum(data['count'] == 1)
        greater_than_1 = sum(data['count'] > 1)
        success_rate = (exactly_1 - greater_than_1) / len(data)
        success_rates.append(success_rate)
        print success_rate

    ax = yzer.set_up_plot()
    title = 'Transcript initiation recapture versus MAX_EDGE'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('MAX_EDGE value', 'Initiation recapture rate')

    yzer.plot(edges, success_rates, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
    yzer.show_plot()
Exemplo n.º 2
0
Plot supplementary figure showing Hah et al error rates 
against MAX EDGE values when Vespucci is built without knowledge
of RefSeq boundaries.
 
'''
from __future__ import division
from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher

if __name__ == '__main__':
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'plots')

    ax = yzer.set_up_plot()
    title = 'Benchmarking without Foreknowledge of RefSeq'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('MAX_EDGE value',
                         'Error rate defined by Hah et al. (%)')

    max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000]
    error_rates = [
        0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396,
        0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464
    ]
    error_pcts = [e * 100 for e in error_rates]
    yzer.plot(max_edges, error_pcts, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
    yzer.show_plot()
Exemplo n.º 3
0
    yzer = SeqGrapher()
    dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters'
    dirpath = yzer.get_path(dirpath)
    img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates')

    multipliers = [1000, 10000, 20000, 50000, 100000]
    datasets = OrderedDict()
    for m in multipliers:
        datasets[m] = pandas.read_csv(yzer.get_filename(
            dirpath, 'vs_gro5', 'gro5_count_dm_{}.txt'.format(m)),
                                      header=0,
                                      sep='\t')

    success_rates = []
    for m, data in datasets.iteritems():
        print 'Stats for DENSITY_MULTIPLIER = {}'.format(m)
        exactly_1 = sum(data['count'] == 1)
        greater_than_1 = sum(data['count'] > 1)
        success_rate = (exactly_1 - greater_than_1) / len(data)
        success_rates.append(success_rate)
        print success_rate

    ax = yzer.set_up_plot()
    title = 'Transcript initiation recapture versus DENSITY_MULTIPLIER'
    yzer.add_title(title, ax)
    yzer.add_axis_labels('DENSITY_MULTIPLIER value',
                         'Initiation recapture rate')

    yzer.plot(multipliers, success_rates, '-o')
    yzer.save_plot_with_dir(save_dir=img_dirpath, title=title)
    yzer.show_plot()