def test_calc_obs_het_sample(self): hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') snps = VariationsArrays() snps.put_chunks(hdf5.iterate_chunks(kept_fields=['/calls/GT'])) het_h5 = calc_obs_het_by_sample(hdf5) het_array = calc_obs_het_by_sample(snps) assert numpy.all(het_array == het_h5) gts = numpy.array([[[0, 0], [0, 1], [0, -1], [-1, -1]], [[0, 0], [0, 0], [0, -1], [-1, -1]], [[0, 0], [0, 0], [0, 0], [-1, -1]]]) varis = {'/calls/GT': gts} het = calc_obs_het_by_sample(varis, chunk_size=None) assert numpy.allclose(het, [0, 1 / 3, 0, numpy.NaN], equal_nan=True) gts = numpy.array([]) varis = {'/calls/GT': gts} het = calc_obs_het_by_sample(varis, chunk_size=None) assert het.shape[0] == 0 snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') calc_obs_het_by_sample(snps, min_call_dp=3) calc_obs_het_by_sample(snps, min_call_dp=3, max_call_dp=20) het_0 = calc_obs_het_by_sample(snps) het = calc_obs_het_by_sample(snps, chunk_size=None) assert numpy.allclose(het_0, het)
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT): # Calculate observed heterozygosity distribution by snp _calc_obs_het_by_var = partial(calc_obs_het, min_num_genotypes=min_num_genotypes) distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var, n_bins=25, range_=(0, 1), chunk_size=chunk_size) obs_het_var_distrib, bins1 = distrib # Calculate observed heterozygosity distribution by sample obs_het_by_sample = calc_obs_het_by_sample(variations, chunk_size=chunk_size) obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25, range_=(0, 1)) # Plot distributions fpath = join(data_dir, 'obs_het.png') fhand = open(fpath, 'w') fig = Figure(figsize=(10, 10)) canvas = FigureCanvas(fig) axes = fig.add_subplot(211) title = 'SNP observed Heterozygosity distribution' plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'), mpl_params={'set_xlabel': {'args': ['Heterozygosity'], 'kwargs': {}}, 'set_ylabel': {'args': ['SNP number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}, 'set_yscale': {'args': ['log'], 'kwargs': {}}}, axes=axes, color='c') axes = fig.add_subplot(212) title = 'Sample observed Heterozygosity distribution' plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'), mpl_params={'set_xlabel': {'args': ['Heterozygosity'], 'kwargs': {}}, 'set_ylabel': {'args': ['Sample number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}}, axes=axes, color='c') canvas.print_figure(fhand)
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT): # Calculate observed heterozygosity distribution by snp _calc_obs_het_by_var = partial(calc_obs_het, min_num_genotypes=min_num_genotypes) distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var, n_bins=25, range_=(0, 1), chunk_size=chunk_size) obs_het_var_distrib, bins1 = distrib # Calculate observed heterozygosity distribution by sample obs_het_by_sample = calc_obs_het_by_sample(variations, chunk_size=chunk_size) obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25, range_=(0, 1)) # Plot distributions fpath = join(data_dir, 'obs_het.png') fhand = open(fpath, 'w') fig = Figure(figsize=(10, 10)) canvas = FigureCanvas(fig) axes = fig.add_subplot(211) title = 'SNP observed Heterozygosity distribution' plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'), mpl_params={ 'set_xlabel': { 'args': ['Heterozygosity'], 'kwargs': {} }, 'set_ylabel': { 'args': ['SNP number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} }, 'set_yscale': { 'args': ['log'], 'kwargs': {} } }, axes=axes, color='c') axes = fig.add_subplot(212) title = 'Sample observed Heterozygosity distribution' plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'), mpl_params={ 'set_xlabel': { 'args': ['Heterozygosity'], 'kwargs': {} }, 'set_ylabel': { 'args': ['Sample number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } }, axes=axes, color='c') canvas.print_figure(fhand)