def plot_allele_obs_distrib_2D(variations, data_dir, max_allele_counts, chunk_size=SNPS_PER_CHUNK): # Allele observation distribution 2D masks = [call_is_het, call_is_hom_alt, call_is_hom_ref] names = ['Heterozygous', 'Alt Homozygous', 'Ref Homozygous'] fig = Figure(figsize=(22, 25)) canvas = FigureCanvas(fig) gs = gridspec.GridSpec(3, 2) fpath = join(data_dir, 'allele_obs_distrib_per_gt.png') fhand = open(fpath, 'w') counts_range = [[0, max_allele_counts], [0, max_allele_counts]] for i, (mask_func, name) in enumerate(zip(masks, names)): hist2d = hist2d_allele_observations(variations, n_bins=max_allele_counts, range_=counts_range, mask_func=mask_func, chunk_size=chunk_size) counts_distrib2d, xbins, ybins = hist2d axes = fig.add_subplot(gs[i, 0]) title = 'Allele counts distribution 2D {}'.format(name) plot_hist2d(numpy.log10(counts_distrib2d), xbins, ybins, axes=axes, mpl_params={'set_xlabel': {'args': ['Alt allele counts'], 'kwargs': {}}, 'set_ylabel': {'args': ['Ref allele counts'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}}, colorbar_label='log10(counts)', fig=fig) hist2d = hist2d_gq_allele_observations(variations, n_bins=max_allele_counts, range_=counts_range, mask_func=mask_func, chunk_size=chunk_size, hist_counts=counts_distrib2d) gq_distrib2d, xbins, ybins = hist2d axes = fig.add_subplot(gs[i, 1]) title = 'Allele counts GQ distribution 2D {}'.format(name) plot_hist2d(gq_distrib2d, xbins, ybins, axes=axes, fig=fig, mpl_params={'set_xlabel': {'args': ['Alt allele counts'], 'kwargs': {}}, 'set_ylabel': {'args': ['Ref allele counts'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}}, colorbar_label='Genotype Quality (GQ)') canvas.print_figure(fhand)
def test_calc_allele_obs_gq_distrib_2D(self): variations = {'/calls/AO': numpy.array([[[0, 0], [5, 0], [-1, -1], [0, -1], [0, 0], [0, 10], [20, 0], [25, 0], [20, 20], [0, 0]]]), '/calls/RO': numpy.array([[0, 5, 15, 7, 10, 0, 0, 25, 20, 10]]), '/calls/GQ': numpy.array([[40, 30, 35, 30, 0, 40, 30, 35, 30, 0]]), '/calls/GT': numpy.array([[[0, 0], [1, 0], [-1, -1], [0, -1], [0, 0], [0, 10], [1, 0], [0, 0], [0, 0], [1, 0]]])} hist, _, _ = hist2d_gq_allele_observations(variations, chunk_size=None) assert hist[0, 0] == 40 assert hist[-1, -1] == 35 hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') hist, xbins, ybins = hist2d_gq_allele_observations(hdf5) hist2, xbins2, ybins2 = hist2d_gq_allele_observations(hdf5, chunk_size=10) assert numpy.allclose(xbins, xbins2) assert numpy.allclose(ybins, ybins2) assert numpy.all(hist == hist2)
def plot_allele_obs_distrib_2D(variations, data_dir, max_allele_counts, chunk_size=SNPS_PER_CHUNK): # Allele observation distribution 2D masks = [call_is_het, call_is_hom_alt, call_is_hom_ref] names = ['Heterozygous', 'Alt Homozygous', 'Ref Homozygous'] fig = Figure(figsize=(22, 25)) canvas = FigureCanvas(fig) gs = gridspec.GridSpec(3, 2) fpath = join(data_dir, 'allele_obs_distrib_per_gt.png') fhand = open(fpath, 'w') counts_range = [[0, max_allele_counts], [0, max_allele_counts]] for i, (mask_func, name) in enumerate(zip(masks, names)): hist2d = hist2d_allele_observations(variations, n_bins=max_allele_counts, range_=counts_range, mask_func=mask_func, chunk_size=chunk_size) counts_distrib2d, xbins, ybins = hist2d axes = fig.add_subplot(gs[i, 0]) title = 'Allele counts distribution 2D {}'.format(name) plot_hist2d(numpy.log10(counts_distrib2d), xbins, ybins, axes=axes, mpl_params={ 'set_xlabel': { 'args': ['Alt allele counts'], 'kwargs': {} }, 'set_ylabel': { 'args': ['Ref allele counts'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } }, colorbar_label='log10(counts)', fig=fig) hist2d = hist2d_gq_allele_observations(variations, n_bins=max_allele_counts, range_=counts_range, mask_func=mask_func, chunk_size=chunk_size, hist_counts=counts_distrib2d) gq_distrib2d, xbins, ybins = hist2d axes = fig.add_subplot(gs[i, 1]) title = 'Allele counts GQ distribution 2D {}'.format(name) plot_hist2d(gq_distrib2d, xbins, ybins, axes=axes, fig=fig, mpl_params={ 'set_xlabel': { 'args': ['Alt allele counts'], 'kwargs': {} }, 'set_ylabel': { 'args': ['Ref allele counts'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } }, colorbar_label='Genotype Quality (GQ)') canvas.print_figure(fhand)