Plot supplementary figure showing Hah et al error rates against MAX EDGE values when Vespucci is built without knowledge of RefSeq boundaries. ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'plots') ax = yzer.set_up_plot() title = 'Benchmarking without Foreknowledge of RefSeq' yzer.add_title(title, ax) yzer.add_axis_labels('MAX_EDGE value', 'Error rate defined by Hah et al. (%)') max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000] error_rates = [ 0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396, 0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464 ] error_pcts = [e * 100 for e in error_rates] yzer.plot(max_edges, error_pcts, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title) yzer.show_plot()
yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates') multipliers = [1000, 10000, 20000, 50000, 100000] datasets = OrderedDict() for m in multipliers: datasets[m] = pandas.read_csv(yzer.get_filename( dirpath, 'vs_gro5', 'gro5_count_dm_{}.txt'.format(m)), header=0, sep='\t') success_rates = [] for m, data in datasets.iteritems(): print 'Stats for DENSITY_MULTIPLIER = {}'.format(m) exactly_1 = sum(data['count'] == 1) greater_than_1 = sum(data['count'] > 1) success_rate = (exactly_1 - greater_than_1) / len(data) success_rates.append(success_rate) print success_rate ax = yzer.set_up_plot() title = 'Transcript initiation recapture versus DENSITY_MULTIPLIER' yzer.add_title(title, ax) yzer.add_axis_labels('DENSITY_MULTIPLIER value', 'Initiation recapture rate') yzer.plot(multipliers, success_rates, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title) yzer.show_plot()
if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates') edges = [100, 200, 500, 800, 1000, 1500, 2000, 5000] datasets = OrderedDict() for e in edges: datasets[e] = pandas.read_csv(yzer.get_filename( dirpath, 'vs_gro5', 'gro5_count_me_{}.txt'.format(e)), header=0, sep='\t') success_rates = [] for e, data in datasets.iteritems(): print 'Stats for MAX_EDGE = {}'.format(e) exactly_1 = sum(data['count'] == 1) greater_than_1 = sum(data['count'] > 1) success_rate = (exactly_1 - greater_than_1) / len(data) success_rates.append(success_rate) print success_rate ax = yzer.set_up_plot() title = 'Transcript initiation recapture versus MAX_EDGE' yzer.add_title(title, ax) yzer.add_axis_labels('MAX_EDGE value', 'Initiation recapture rate') yzer.plot(edges, success_rates, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title) yzer.show_plot()
x_labels = [5] + range(11, 22) x_vals = range(len(x_labels)) ctl_y = [ 100, 100, 100, 100, 87.5, 87.5, 87.5, 87.5, 75, 75, 75, 75, 62.5, 62.5, 37.5, 37.5 ] ctl_x = [0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11] low_y = [100, 100, 75, 75, 75, 75, 75, 50, 50, 50, 50, 50, 50, 50] low_x = [0, 1, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, 11] med_y = [100, 100, 100, 75, 75, 50, 50, 50, 50, 25, 25, 25, 25, 25, 25] med_x = [0, 1, 2, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11] high_y = [100, 100, 100, 100, 75, 75, 50, 50, 50, 50, 50, 50, 25, 25, 25] high_x = [0, 1, 2, 3, 3, 4, 4, 5, 6, 7, 8, 9, 9, 10, 11] ax = yzer.set_up_plot() yzer.add_axis_labels('Week', 'Percent Normoglycemic') title = 'Diabetes Induction with In Vivo TDB Treatment' yzer.add_title(title, ax) pyplot.plot(ctl_x, ctl_y, 'o-', color='black', label='Control (n=8)', linewidth=8) pyplot.plot(low_x, low_y, 'o-', color='blue', label='Low Dose TDB (107 ug, n=4)', linewidth=6)