def test_plot_barplot(self): matrix = numpy.array([[1, 2, 3, 4], [2, 1, 4, 3], [4, 3, 2, 1]]) with NamedTemporaryFile(suffix=".png") as fhand: fpath = fhand.name fhand.close() plot_barplot(matrix, ["ref_hom", "het", "alt_hom", "missing"], fpath, stacked=True, figsize=(10, 10)) os.remove(fpath)
def test_plot_barplot(self): matrix = numpy.array([[1, 2, 3, 4], [2, 1, 4, 3], [4, 3, 2, 1]]) with NamedTemporaryFile(suffix='.png') as fhand: fpath = fhand.name fhand.close() plot_barplot(matrix, ['ref_hom', 'het', 'alt_hom', 'missing'], fpath, stacked=True, figsize=(10, 10)) os.remove(fpath)
def plot_gt_stats_per_sample(variations, data_dir, chunk_size=SNPS_PER_CHUNK): gt_stats = calc_gt_type_stats(variations, chunk_size=chunk_size) gt_stats = gt_stats.transpose() figsize = (variations[GT_FIELD].shape[1], 7) # All genotypes classes per sample fpath = join(data_dir, 'genotype_counts_per_sample.png') title = 'Genotypes counts per sample' mpl_params = {'set_xlabel': {'args': ['Samples'], 'kwargs': {}}, 'set_ylabel': {'args': ['Number of GTs'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}} samples = variations.samples if samples is not None: mpl_params['set_xticklabels'] = {'args': [samples], 'kwargs': {}} plot_barplot(gt_stats, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous', 'Missing GT'], mpl_params=mpl_params, color=['darkslategrey', 'c', 'paleturquoise', 'cadetblue'], fpath=fpath, stacked=True, figsize=figsize) # Missing per sample fpath = join(data_dir, 'missing_per_sample.png') title = 'Missing genotypes counts per sample' mpl_params['set_ylabel'] = {'args': ['Missing Genotypes Number'], 'kwargs': {}} mpl_params['set_title'] = {'args': [title], 'kwargs': {}} plot_barplot(gt_stats[:, -1], ['Missing GT'], mpl_params=mpl_params, fpath=fpath, stacked=True, figsize=figsize) # Heterozygous per sample fpath = join(data_dir, 'het_per_sample.png') title = 'Heterozygous counts per sample' mpl_params['set_ylabel'] = {'args': ['Heterozygous Number'], 'kwargs': {}} mpl_params['set_title'] = {'args': [title], 'kwargs': {}} plot_barplot(gt_stats[:, 1], ['Heterozygous'], mpl_params=mpl_params, fpath=fpath, stacked=True, figsize=figsize) # GT percentage without missing values fpath = join(data_dir, 'gt_perc_per_sample.png') title = 'Genotypes percentage per sample' mpl_params['set_ylabel'] = {'args': ['% Genotypes'], 'kwargs': {}} mpl_params['set_title'] = {'args': [title], 'kwargs': {}} gt_perc = gt_stats[:, :-1] / gt_stats[:, :-1].sum(axis=1, keepdims=True) gt_perc *= 100 plot_barplot(gt_perc, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous'], mpl_params=mpl_params, fpath=fpath, figsize=figsize)
def plot_gt_stats_per_sample(variations, data_dir, chunk_size=SNPS_PER_CHUNK): gt_stats = calc_gt_type_stats(variations, chunk_size=chunk_size) gt_stats = gt_stats.transpose() figsize = (variations[GT_FIELD].shape[1], 7) # All genotypes classes per sample fpath = join(data_dir, 'genotype_counts_per_sample.png') title = 'Genotypes counts per sample' mpl_params = { 'set_xlabel': { 'args': ['Samples'], 'kwargs': {} }, 'set_ylabel': { 'args': ['Number of GTs'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } } samples = variations.samples if samples is not None: mpl_params['set_xticklabels'] = {'args': [samples], 'kwargs': {}} plot_barplot( gt_stats, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous', 'Missing GT'], mpl_params=mpl_params, color=['darkslategrey', 'c', 'paleturquoise', 'cadetblue'], fpath=fpath, stacked=True, figsize=figsize) # Missing per sample fpath = join(data_dir, 'missing_per_sample.png') title = 'Missing genotypes counts per sample' mpl_params['set_ylabel'] = { 'args': ['Missing Genotypes Number'], 'kwargs': {} } mpl_params['set_title'] = {'args': [title], 'kwargs': {}} plot_barplot(gt_stats[:, -1], ['Missing GT'], mpl_params=mpl_params, fpath=fpath, stacked=True, figsize=figsize) # Heterozygous per sample fpath = join(data_dir, 'het_per_sample.png') title = 'Heterozygous counts per sample' mpl_params['set_ylabel'] = {'args': ['Heterozygous Number'], 'kwargs': {}} mpl_params['set_title'] = {'args': [title], 'kwargs': {}} plot_barplot(gt_stats[:, 1], ['Heterozygous'], mpl_params=mpl_params, fpath=fpath, stacked=True, figsize=figsize) # GT percentage without missing values fpath = join(data_dir, 'gt_perc_per_sample.png') title = 'Genotypes percentage per sample' mpl_params['set_ylabel'] = {'args': ['% Genotypes'], 'kwargs': {}} mpl_params['set_title'] = {'args': [title], 'kwargs': {}} gt_perc = gt_stats[:, :-1] / gt_stats[:, :-1].sum(axis=1, keepdims=True) gt_perc *= 100 plot_barplot(gt_perc, ['Ref Homozygous', 'Heterozygous', 'Alt Homozygous'], mpl_params=mpl_params, fpath=fpath, figsize=figsize)