예제 #1
0
    def test_calc_dp_means(self):
        snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        means = calc_depth_mean_by_sample(snps)

        means2 = calc_depth_mean_by_sample(snps, chunk_size=None)
        assert means.shape[0] == 153
        assert numpy.allclose(means, means2)
    def test_calc_dp_means(self):
        snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        means = calc_depth_mean_by_sample(snps)

        means2 = calc_depth_mean_by_sample(snps, chunk_size=None)
        assert means.shape[0] == 153
        assert numpy.allclose(means, means2)
예제 #3
0
    def test_filter_samples_by_pseudohet(self):
        snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')

        sample_dp_means = calc_depth_mean_by_sample(snps)
        flt = PseudoHetDuplicationFilter2(sample_dp_means=sample_dp_means,
                                          max_high_dp_freq=0.01,
                                          do_histogram=True,
                                          report_selection=True)
        res = flt(snps)

        assert res[FLT_STATS]['tot'] == snps.num_variations
        assert min(res[EDGES]) < 1
        assert max(res[EDGES]) > 0
        assert res[FLT_VARS].num_variations == res[FLT_STATS]['n_kept']
        assert res[SELECTED_VARS].shape

        # some samples
        samples = snps.samples[:50]
        sample_dp_means = sample_dp_means[:50]
        flt = PseudoHetDuplicationFilter2(sample_dp_means=sample_dp_means,
                                          max_high_dp_freq=0.01,
                                          do_histogram=True,
                                          samples=samples)
        res = flt(snps)

        assert res[FLT_STATS]['tot'] == snps.num_variations
        assert min(res[EDGES]) < 1
        assert max(res[EDGES]) > 0
        assert res[FLT_VARS].num_variations == res[FLT_STATS]['n_kept']
예제 #4
0
    def test_filter_samples_by_pseudohet(self):
        snps = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')

        sample_dp_means = calc_depth_mean_by_sample(snps)
        flt = PseudoHetDuplicationFilter2(sample_dp_means=sample_dp_means,
                                          max_high_dp_freq=0.01,
                                          do_histogram=True,
                                          report_selection=True)
        res = flt(snps)

        assert res[FLT_STATS]['tot'] == snps.num_variations
        assert min(res[EDGES]) < 1
        assert max(res[EDGES]) > 0
        assert res[FLT_VARS].num_variations == res[FLT_STATS]['n_kept']
        assert res[SELECTED_VARS].shape

        # some samples
        samples = snps.samples[:50]
        sample_dp_means = sample_dp_means[:50]
        flt = PseudoHetDuplicationFilter2(sample_dp_means=sample_dp_means,
                                          max_high_dp_freq=0.01,
                                          do_histogram=True,
                                          samples=samples)
        res = flt(snps)

        assert res[FLT_STATS]['tot'] == snps.num_variations
        assert min(res[EDGES]) < 1
        assert max(res[EDGES]) > 0
        assert res[FLT_VARS].num_variations == res[FLT_STATS]['n_kept']