if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates') edges = [100, 200, 500, 800, 1000, 1500, 2000, 5000] datasets = OrderedDict() for e in edges: datasets[e] = pandas.read_csv(yzer.get_filename( dirpath, 'vs_gro5', 'gro5_count_me_{}.txt'.format(e)), header=0, sep='\t') success_rates = [] for e, data in datasets.iteritems(): print 'Stats for MAX_EDGE = {}'.format(e) exactly_1 = sum(data['count'] == 1) greater_than_1 = sum(data['count'] > 1) success_rate = (exactly_1 - greater_than_1) / len(data) success_rates.append(success_rate) print success_rate ax = yzer.set_up_plot() title = 'Transcript initiation recapture versus MAX_EDGE' yzer.add_title(title, ax) yzer.add_axis_labels('MAX_EDGE value', 'Initiation recapture rate') yzer.plot(edges, success_rates, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title) yzer.show_plot()
Plot supplementary figure showing Hah et al error rates against MAX EDGE values when Vespucci is built without knowledge of RefSeq boundaries. ''' from __future__ import division from glasslab.dataanalysis.graphing.seq_grapher import SeqGrapher if __name__ == '__main__': yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/no_refseq' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'plots') ax = yzer.set_up_plot() title = 'Benchmarking without Foreknowledge of RefSeq' yzer.add_title(title, ax) yzer.add_axis_labels('MAX_EDGE value', 'Error rate defined by Hah et al. (%)') max_edges = [100, 500, 1000, 4000, 5000, 5500, 6000, 10000] error_rates = [ 0.388551822833, 0.372390444765, 0.263807982126, 0.124663089396, 0.121784970634, 0.121807917409, 0.123263849815, 0.142530838464 ] error_pcts = [e * 100 for e in error_rates] yzer.plot(max_edges, error_pcts, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title) yzer.show_plot()
yzer = SeqGrapher() dirpath = 'karmel/Desktop/Projects/GlassLab/Notes_and_Reports/Glass Atlas/NAR_review_data/parameters' dirpath = yzer.get_path(dirpath) img_dirpath = yzer.get_and_create_path(dirpath, 'success_rates') multipliers = [1000, 10000, 20000, 50000, 100000] datasets = OrderedDict() for m in multipliers: datasets[m] = pandas.read_csv(yzer.get_filename( dirpath, 'vs_gro5', 'gro5_count_dm_{}.txt'.format(m)), header=0, sep='\t') success_rates = [] for m, data in datasets.iteritems(): print 'Stats for DENSITY_MULTIPLIER = {}'.format(m) exactly_1 = sum(data['count'] == 1) greater_than_1 = sum(data['count'] > 1) success_rate = (exactly_1 - greater_than_1) / len(data) success_rates.append(success_rate) print success_rate ax = yzer.set_up_plot() title = 'Transcript initiation recapture versus DENSITY_MULTIPLIER' yzer.add_title(title, ax) yzer.add_axis_labels('DENSITY_MULTIPLIER value', 'Initiation recapture rate') yzer.plot(multipliers, success_rates, '-o') yzer.save_plot_with_dir(save_dir=img_dirpath, title=title) yzer.show_plot()