Exemplo n.º 1
0
    def test_calc_obs_het_sample(self):
        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        snps = VariationsArrays()
        snps.put_chunks(hdf5.iterate_chunks(kept_fields=['/calls/GT']))
        het_h5 = calc_obs_het_by_sample(hdf5)
        het_array = calc_obs_het_by_sample(snps)
        assert numpy.all(het_array == het_h5)

        gts = numpy.array([[[0, 0], [0, 1], [0, -1], [-1, -1]],
                           [[0, 0], [0, 0], [0, -1], [-1, -1]],
                           [[0, 0], [0, 0], [0, 0], [-1, -1]]])

        varis = {'/calls/GT': gts}
        het = calc_obs_het_by_sample(varis, chunk_size=None)
        assert numpy.allclose(het, [0, 1 / 3, 0, numpy.NaN], equal_nan=True)

        gts = numpy.array([])
        varis = {'/calls/GT': gts}
        het = calc_obs_het_by_sample(varis, chunk_size=None)
        assert het.shape[0] == 0

        snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        calc_obs_het_by_sample(snps, min_call_dp=3)
        calc_obs_het_by_sample(snps, min_call_dp=3, max_call_dp=20)
        het_0 = calc_obs_het_by_sample(snps)
        het = calc_obs_het_by_sample(snps, chunk_size=None)
        assert numpy.allclose(het_0, het)
    def test_calc_obs_het_sample(self):
        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        snps = VariationsArrays()
        snps.put_chunks(hdf5.iterate_chunks(kept_fields=['/calls/GT']))
        het_h5 = calc_obs_het_by_sample(hdf5)
        het_array = calc_obs_het_by_sample(snps)
        assert numpy.all(het_array == het_h5)

        gts = numpy.array([[[0, 0], [0, 1], [0, -1], [-1, -1]],
                           [[0, 0], [0, 0], [0, -1], [-1, -1]],
                           [[0, 0], [0, 0], [0, 0], [-1, -1]]])

        varis = {'/calls/GT': gts}
        het = calc_obs_het_by_sample(varis, chunk_size=None)
        assert numpy.allclose(het, [0, 1 / 3, 0, numpy.NaN], equal_nan=True)

        gts = numpy.array([])
        varis = {'/calls/GT': gts}
        het = calc_obs_het_by_sample(varis, chunk_size=None)
        assert het.shape[0] == 0

        snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        calc_obs_het_by_sample(snps, min_call_dp=3)
        calc_obs_het_by_sample(snps, min_call_dp=3, max_call_dp=20)
        het_0 = calc_obs_het_by_sample(snps)
        het = calc_obs_het_by_sample(snps, chunk_size=None)
        assert numpy.allclose(het_0, het)
Exemplo n.º 3
0
def plot_obs_het(variations, data_dir, chunk_size=SNPS_PER_CHUNK,
                 min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    # Calculate observed heterozygosity distribution by snp
    _calc_obs_het_by_var = partial(calc_obs_het,
                                   min_num_genotypes=min_num_genotypes)
    distrib = histogram_for_chunks(variations, calc_funct=_calc_obs_het_by_var,
                                   n_bins=25, range_=(0, 1),
                                   chunk_size=chunk_size)
    obs_het_var_distrib, bins1 = distrib
    
    # Calculate observed heterozygosity distribution by sample
    obs_het_by_sample = calc_obs_het_by_sample(variations,
                                               chunk_size=chunk_size)
    obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample, n_bins=25,
                                              range_=(0, 1))
    
    # Plot distributions
    fpath = join(data_dir, 'obs_het.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 10))
    canvas = FigureCanvas(fig)
    axes = fig.add_subplot(211)
    title = 'SNP observed Heterozygosity distribution'
    plot_distrib(obs_het_var_distrib, bins=bins1, fhand=open(fpath, 'w'),
                 mpl_params={'set_xlabel': {'args': ['Heterozygosity'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['SNP number'], 'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}},
                             'set_yscale': {'args': ['log'], 'kwargs': {}}},
                 axes=axes, color='c')
    axes = fig.add_subplot(212)
    title = 'Sample observed Heterozygosity distribution'
    plot_distrib(obs_het_sample_distrib, bins=bins2, fhand=open(fpath, 'w'),
                 mpl_params={'set_xlabel': {'args': ['Heterozygosity'],
                                            'kwargs': {}},
                             'set_ylabel': {'args': ['Sample number'],
                                            'kwargs': {}},
                             'set_title': {'args': [title], 'kwargs': {}}},
                 axes=axes, color='c')
    canvas.print_figure(fhand)
Exemplo n.º 4
0
def plot_obs_het(variations,
                 data_dir,
                 chunk_size=SNPS_PER_CHUNK,
                 min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT):
    # Calculate observed heterozygosity distribution by snp
    _calc_obs_het_by_var = partial(calc_obs_het,
                                   min_num_genotypes=min_num_genotypes)
    distrib = histogram_for_chunks(variations,
                                   calc_funct=_calc_obs_het_by_var,
                                   n_bins=25,
                                   range_=(0, 1),
                                   chunk_size=chunk_size)
    obs_het_var_distrib, bins1 = distrib

    # Calculate observed heterozygosity distribution by sample
    obs_het_by_sample = calc_obs_het_by_sample(variations,
                                               chunk_size=chunk_size)
    obs_het_sample_distrib, bins2 = histogram(obs_het_by_sample,
                                              n_bins=25,
                                              range_=(0, 1))

    # Plot distributions
    fpath = join(data_dir, 'obs_het.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 10))
    canvas = FigureCanvas(fig)
    axes = fig.add_subplot(211)
    title = 'SNP observed Heterozygosity distribution'
    plot_distrib(obs_het_var_distrib,
                 bins=bins1,
                 fhand=open(fpath, 'w'),
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Heterozygosity'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['SNP number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     },
                     'set_yscale': {
                         'args': ['log'],
                         'kwargs': {}
                     }
                 },
                 axes=axes,
                 color='c')
    axes = fig.add_subplot(212)
    title = 'Sample observed Heterozygosity distribution'
    plot_distrib(obs_het_sample_distrib,
                 bins=bins2,
                 fhand=open(fpath, 'w'),
                 mpl_params={
                     'set_xlabel': {
                         'args': ['Heterozygosity'],
                         'kwargs': {}
                     },
                     'set_ylabel': {
                         'args': ['Sample number'],
                         'kwargs': {}
                     },
                     'set_title': {
                         'args': [title],
                         'kwargs': {}
                     }
                 },
                 axes=axes,
                 color='c')
    canvas.print_figure(fhand)