def plot_call_field_distribs_per_gt_type(variations, field, max_value, data_dir, chunk_size=SNPS_PER_CHUNK): # Field distribution per sample field_name = field.split('/')[-1] fpath = join(data_dir, '{}_distribution_per_sample.png'.format(field_name)) mask_funcs = [call_is_het, call_is_hom] names = ['Heterozygous', 'Homozygous'] distribs = [] for mask_func in mask_funcs: dp_distribs, bins = calc_field_distribs_per_sample(variations, field=field, range_=(0, max_value), n_bins=max_value, chunk_size=chunk_size, mask_func=mask_func, mask_field=GT_FIELD) distribs.append(dp_distribs) title = '{} distribution per sample'.format(field_name) mpl_params = {'set_xlabel': {'args': ['Samples'], 'kwargs': {}}, 'set_ylabel': {'args': [field_name], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}} figsize = (variations[GT_FIELD].shape[1], 7) plot_boxplot_from_distribs_series(distribs, fhand=open(fpath, 'w'), mpl_params=mpl_params, figsize=figsize, colors=['pink', 'tan'], labels=names, xticklabels=variations.samples) # Overall field distributions fpath = join(data_dir, '{}_distribution.png'.format(field_name)) fhand = open(fpath, 'w') fig = Figure(figsize=(20, 15)) canvas = FigureCanvas(fig) i = 1 for distrib, name in zip(distribs, names): distrib = numpy.sum(dp_distribs, axis=0) distrib_cum = calc_cum_distrib(distrib) axes = fig.add_subplot(len(names) * 100 + 20 + i) i += 1 title = '{} distribution all samples {}'.format(field_name, name) plot_distrib(distrib, bins, axes=axes, mpl_params={'set_xlabel': {'args': [field_name], 'kwargs': {}}, 'set_ylabel': {'args': ['Number of GTs'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}}) distrib_cum = distrib_cum/distrib_cum[0] * 100 axes = fig.add_subplot(len(names) * 100 + 20 + i) i += 1 title = '{} cumulative distribution all samples {}'.format(field_name, name) plot_distrib(distrib_cum, bins, axes=axes, mpl_params={'set_xlabel': {'args': [field_name], 'kwargs': {}}, 'set_ylabel': {'args': ['% calls > Depth '], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}}) canvas.print_figure(fhand)
def test_plot_boxplot_series(self): distribs = numpy.array( [ [0, 0, 0, 0, 0, 1, 3, 5, 3, 1], [0, 0, 0, 0, 1, 3, 5, 3, 1, 0], [0, 1, 3, 5, 3, 1, 0, 0, 0, 0], [1, 3, 5, 3, 1, 0, 0, 0, 0, 0], ] ) mpl_params = { "set_xlabel": {"args": ["Samples"], "kwargs": {}}, "set_ylabel": {"args": ["Depth"], "kwargs": {}}, } distribs_series = [distribs[[0, 2], :], distribs[[1, 3], :]] with NamedTemporaryFile(suffix=".png") as fhand: plot_boxplot_from_distribs_series(distribs_series, fhand=fhand, mpl_params=mpl_params)
def test_plot_boxplot_series(self): distribs = numpy.array([[0, 0, 0, 0, 0, 1, 3, 5, 3, 1], [0, 0, 0, 0, 1, 3, 5, 3, 1, 0], [0, 1, 3, 5, 3, 1, 0, 0, 0, 0], [1, 3, 5, 3, 1, 0, 0, 0, 0, 0]]) mpl_params = { 'set_xlabel': { 'args': ['Samples'], 'kwargs': {} }, 'set_ylabel': { 'args': ['Depth'], 'kwargs': {} } } distribs_series = [distribs[[0, 2], :], distribs[[1, 3], :]] with NamedTemporaryFile(suffix='.png') as fhand: plot_boxplot_from_distribs_series(distribs_series, fhand=fhand, mpl_params=mpl_params)
def plot_call_field_distribs_per_gt_type(variations, field, max_value, data_dir, chunk_size=SNPS_PER_CHUNK): # Field distribution per sample field_name = field.split('/')[-1] fpath = join(data_dir, '{}_distribution_per_sample.png'.format(field_name)) mask_funcs = [call_is_het, call_is_hom] names = ['Heterozygous', 'Homozygous'] distribs = [] for mask_func in mask_funcs: dp_distribs, bins = calc_field_distribs_per_sample( variations, field=field, range_=(0, max_value), n_bins=max_value, chunk_size=chunk_size, mask_func=mask_func, mask_field=GT_FIELD) distribs.append(dp_distribs) title = '{} distribution per sample'.format(field_name) mpl_params = { 'set_xlabel': { 'args': ['Samples'], 'kwargs': {} }, 'set_ylabel': { 'args': [field_name], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } } figsize = (variations[GT_FIELD].shape[1], 7) plot_boxplot_from_distribs_series(distribs, fhand=open(fpath, 'w'), mpl_params=mpl_params, figsize=figsize, colors=['pink', 'tan'], labels=names, xticklabels=variations.samples) # Overall field distributions fpath = join(data_dir, '{}_distribution.png'.format(field_name)) fhand = open(fpath, 'w') fig = Figure(figsize=(20, 15)) canvas = FigureCanvas(fig) i = 1 for distrib, name in zip(distribs, names): distrib = numpy.sum(dp_distribs, axis=0) distrib_cum = calc_cum_distrib(distrib) axes = fig.add_subplot(len(names) * 100 + 20 + i) i += 1 title = '{} distribution all samples {}'.format(field_name, name) plot_distrib(distrib, bins, axes=axes, mpl_params={ 'set_xlabel': { 'args': [field_name], 'kwargs': {} }, 'set_ylabel': { 'args': ['Number of GTs'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } }) distrib_cum = distrib_cum / distrib_cum[0] * 100 axes = fig.add_subplot(len(names) * 100 + 20 + i) i += 1 title = '{} cumulative distribution all samples {}'.format( field_name, name) plot_distrib(distrib_cum, bins, axes=axes, mpl_params={ 'set_xlabel': { 'args': [field_name], 'kwargs': {} }, 'set_ylabel': { 'args': ['% calls > Depth '], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } }) canvas.print_figure(fhand)