def plot_maf_depth(variations, data_dir, min_depth=DEF_MIN_DEPTH, chunk_size=SNPS_PER_CHUNK): maf_dp_distribs = calc_maf_depth_distribs_per_sample(variations, min_depth=min_depth, n_bins=100, chunk_size=SNPS_PER_CHUNK) maf_dp_distribs, bins = maf_dp_distribs maf_dp_dir = os.path.join(data_dir, 'maf_depth') if not os.path.exists(maf_dp_dir): os.mkdir(maf_dp_dir) samples = variations.samples if samples is None: samples = range(maf_dp_distribs.shape[0]) for sample, distrib in zip(samples, maf_dp_distribs): fpath = join(maf_dp_dir, '{}.png'.format(sample)) title = 'Depth based Maximum allele frequency (MAF) distribution {}' title = title.format(sample) mpl_params = {'set_xlabel': {'args': ['MAF (depth)'], 'kwargs': {}}, 'set_ylabel': {'args': ['SNPs number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}, 'set_yscale': {'args': ['log'], 'kwargs': {}}} plot_distrib(distrib, bins, fhand=open(fpath, 'w'), figsize=(10, 10), mpl_params=mpl_params, n_ticks=10)
def test_calc_maf_depth_distribs_per_sample(self): variations = VariationsArrays() variations['/calls/AO'] = numpy.array([]) variations['/calls/RO'] = numpy.array([]) distribs, bins = calc_maf_depth_distribs_per_sample(variations, chunk_size=None) assert distribs is None assert bins is None variations = VariationsArrays() variations['/calls/AO'] = numpy.array([[[0, 0], [0, 0], [15, -1]]]) variations['/calls/RO'] = numpy.array([[10, 5, 15]]) variations.samples = list(range(3)) distribs, _ = calc_maf_depth_distribs_per_sample(variations, n_bins=4, min_depth=6, chunk_size=None) expected = [[0, 0, 0, 1], [0, 0, 0, 0], [0, 0, 1, 0]] assert numpy.all(distribs == expected) hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') distribs1, _ = calc_maf_depth_distribs_per_sample(hdf5, min_depth=6, chunk_size=None) distribs2, _ = calc_maf_depth_distribs_per_sample(hdf5, min_depth=6) assert numpy.all(distribs1 == distribs2)
def plot_maf_depth(variations, data_dir, min_depth=DEF_MIN_DEPTH, chunk_size=SNPS_PER_CHUNK): maf_dp_distribs = calc_maf_depth_distribs_per_sample( variations, min_depth=min_depth, n_bins=100, chunk_size=SNPS_PER_CHUNK) maf_dp_distribs, bins = maf_dp_distribs maf_dp_dir = os.path.join(data_dir, 'maf_depth') if not os.path.exists(maf_dp_dir): os.mkdir(maf_dp_dir) samples = variations.samples if samples is None: samples = range(maf_dp_distribs.shape[0]) for sample, distrib in zip(samples, maf_dp_distribs): fpath = join(maf_dp_dir, '{}.png'.format(sample)) title = 'Depth based Maximum allele frequency (MAF) distribution {}' title = title.format(sample) mpl_params = { 'set_xlabel': { 'args': ['MAF (depth)'], 'kwargs': {} }, 'set_ylabel': { 'args': ['SNPs number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} }, 'set_yscale': { 'args': ['log'], 'kwargs': {} } } plot_distrib(distrib, bins, fhand=open(fpath, 'w'), figsize=(10, 10), mpl_params=mpl_params, n_ticks=10)