예제 #1
0
    def test_calc_called_gts_distribution_per_depth(self):
        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        dist, _ = calc_called_gts_distrib_per_depth(hdf5, depths=range(30),
                                                    chunk_size=10)
        assert dist[1, 1] == 1
        dist2, _ = calc_called_gts_distrib_per_depth(hdf5, depths=range(30),
                                                     chunk_size=None)
        assert numpy.all(dist == dist2)

        vars_ = VariationsArrays()
        vars_['/calls/GT'] = numpy.array([[[0, 0], [0, 1], [0, 1],
                                           [0, 0], [0, 1], [0, 0],
                                           [0, 0], [0, 1], [1, 1],
                                           [0, 0]]])
        vars_['/calls/DP'] = numpy.array([[10, 5, 15, 7, 10,
                                          0, 0, 25, 20, 10]])
        vars_.samples = list(range(10))
        dist, _ = calc_called_gts_distrib_per_depth(vars_, depths=[0, 5, 10,
                                                                   30])
        expected = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
        assert numpy.all(dist == expected)
    def test_calc_called_gts_distribution_per_depth(self):
        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        dist, _ = calc_called_gts_distrib_per_depth(hdf5, depths=range(30),
                                                    chunk_size=10)
        assert dist[1, 1] == 1
        dist2, _ = calc_called_gts_distrib_per_depth(hdf5, depths=range(30),
                                                     chunk_size=None)
        assert numpy.all(dist == dist2)

        vars_ = VariationsArrays()
        vars_['/calls/GT'] = numpy.array([[[0, 0], [0, 1], [0, 1],
                                           [0, 0], [0, 1], [0, 0],
                                           [0, 0], [0, 1], [1, 1],
                                           [0, 0]]])
        vars_['/calls/DP'] = numpy.array([[10, 5, 15, 7, 10,
                                          0, 0, 25, 20, 10]])
        vars_.samples = list(range(10))
        dist, _ = calc_called_gts_distrib_per_depth(vars_, depths=[0, 5, 10,
                                                                   30])
        expected = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                    [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
        assert numpy.all(dist == expected)
예제 #3
0
def plot_called_gts_distrib_per_depth(h5, depths, data_dir,
                                      chunk_size=SNPS_PER_CHUNK):
    # Distribution of the number of samples with a depth higher than
    # given values
    distribs, _ = calc_called_gts_distrib_per_depth(h5, depths=depths,
                                                    chunk_size=chunk_size)
    
    fpath = join(data_dir, 'gts_distribution_per_depth.png')
    title = 'Distribution of the number of samples with a depth higher than'
    title += ' given values'
    mpl_params = {'set_xlabel': {'args': ['Depth'], 'kwargs': {}},
                  'set_ylabel': {'args': ['Number of samples'], 'kwargs': {}},
                  'set_title': {'args': [title], 'kwargs': {}},
                  'set_xticklabels': {'args': [depths],
                                      'kwargs': {'rotation': 90}}}
    plot_boxplot_from_distribs(distribs, fhand=open(fpath, 'w'),
                               figsize=(15, 10), mpl_params=mpl_params,
                               color='tan')
예제 #4
0
def plot_called_gts_distrib_per_depth(h5,
                                      depths,
                                      data_dir,
                                      chunk_size=SNPS_PER_CHUNK):
    # Distribution of the number of samples with a depth higher than
    # given values
    distribs, _ = calc_called_gts_distrib_per_depth(h5,
                                                    depths=depths,
                                                    chunk_size=chunk_size)

    fpath = join(data_dir, 'gts_distribution_per_depth.png')
    title = 'Distribution of the number of samples with a depth higher than'
    title += ' given values'
    mpl_params = {
        'set_xlabel': {
            'args': ['Depth'],
            'kwargs': {}
        },
        'set_ylabel': {
            'args': ['Number of samples'],
            'kwargs': {}
        },
        'set_title': {
            'args': [title],
            'kwargs': {}
        },
        'set_xticklabels': {
            'args': [depths],
            'kwargs': {
                'rotation': 90
            }
        }
    }
    plot_boxplot_from_distribs(distribs,
                               fhand=open(fpath, 'w'),
                               figsize=(15, 10),
                               mpl_params=mpl_params,
                               color='tan')