def make_fraction_plot(hright, hwrong, plotdir, plotname, xlabel, ylabel, xbounds, only_csv=False, write_csv=False): if 'fraction_uncertainty' not in sys.modules: import fraction_uncertainty # NOTE should really merge this with draw_no_root() xvals = hright.get_bin_centers() #ignore_overflows=True) right = hright.bin_contents wrong = hwrong.bin_contents yvals = [float(r) / (r + w) if r + w > 0. else 0. for r, w in zip(right, wrong)] # remove values corresponding to bins with no entries while yvals.count(0.) > 0: iv = yvals.index(0.) xvals.pop(iv) right.pop(iv) wrong.pop(iv) yvals.pop(iv) tmphilos = [sys.modules['fraction_uncertainty'].err(r, r + w) for r, w in zip(right, wrong)] yerrs = [err[1] - err[0] for err in tmphilos] # print '%s' % region # for iv in range(len(xvals)): # print ' %5.2f %5.0f / %5.0f = %5.2f +/- %.3f' % (xvals[iv], right[iv], right[iv] + wrong[iv], yvals[iv], yerrs[iv]) if write_csv: hist_for_csv = Hist(hright.n_bins, hright.xmin, hright.xmax) bincenters = hright.get_bin_centers() for ibin in range(hright.n_bins): bcenter = bincenters[ibin] if bcenter in xvals: # if we didn't remove it iy = xvals.index(bcenter) hist_for_csv.set_ibin(ibin, yvals[iy], error=yerrs[iy]) hist_for_csv.write(plotdir + '/' + plotname + '.csv') if not only_csv: fig, ax = mpl_init() ax.errorbar(xvals, yvals, yerr=yerrs, markersize=10, linewidth=1, marker='.') if xlabel == 'support': ax.plot((0, 1), (0, 1), color='black', linestyle='--', linewidth=3) # line with slope 1 and intercept 0 mpl_finish(ax, plotdir, plotname, xlabel=xlabel, ylabel=ylabel, title=plotconfig.plot_titles.get(plotname, plotname), xbounds=xbounds, ybounds=(-0.1, 1.1)) plt.close()
import matplotlib from matplotlib import pyplot as plt fig, ax = plotting.mpl_init() xvals, yvals = zip(*[(v['imax'], v['max_abs_diff']) for v in chfo.values()]) plt.scatter(xvals, yvals, alpha=0.4) print 'writing to %s' % args.plotdir plotting.mpl_finish(ax, args.plotdir, 'hexbin', title=args.title, xlabel='break point', ylabel='abs mfreq diff') plotting.draw_no_root(hmaxval, plotdir=args.plotdir, plotname='mfreq-diff', shift_overflows=True, xtitle='abs mfreq diff', ytitle='seqs') hmaxval.write('%s/%s.csv' % (args.plotdir, 'mfreq-diff')) plotting.draw_no_root(himax, plotdir=args.plotdir, plotname='imax', shift_overflows=True, xtitle='break point', ytitle='seqs') himax.write('%s/%s.csv' % (args.plotdir, 'imax'))
def gk(uids): return ':'.join(uids) glfo = glutils.read_glfo(args.infile.replace('.csv', '-glfo'), locus='igh') annotations = {} with open(args.infile) as csvfile: reader = csv.DictReader(csvfile) for line in reader: if line['v_gene'] == '': # failed (i.e. couldn't find an annotation) continue utils.process_input_line(line) # converts strings in the csv file to floats/ints/dicts/etc. utils.add_implicit_info(glfo, line) # add stuff to <line> that's useful, isn't written to the csv since it's redundant annotations[gk(line['unique_ids'])] = line chfo = {uid : utils.get_chimera_max_abs_diff(annotations[uid], iseq=0) for uid in annotations} biggest_adiffs = sorted(chfo, key=lambda q: chfo[q][1], reverse=True) for uid in biggest_adiffs[:10]: print chfo[uid] utils.print_reco_event(annotations[uid]) htmp = Hist(45, 0., 0.65) for uid in annotations: htmp.fill(chfo[uid][1]) utils.prep_dir(args.plotdir, wildlings=['*.svg', '*.csv']) plotname = 'mfreq-diff' plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname, shift_overflows=True, xtitle='abs mfreq diff', ytitle='seqs') plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname + '-log', shift_overflows=True, log='y', xtitle='abs mfreq diff', ytitle='seqs') print 'writing to %s' % args.plotdir htmp.write('%s/%s.csv' % (args.plotdir, plotname))