def test_get_chromosome_1_loop_through_size_20_lower_pval(self):
        start = 0
        size = 20
        pval_interval = FloatInterval().set_tuple(0.00001, 0.00001)

        looped_through = 1
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        datasets, index_marker = self.searcher.search_chromosome(
            chromosome=1, start=start, size=size, pval_interval=pval_interval)
        d = utils.extend_dsets_with_subset(d, datasets)
        while len(datasets[REFERENCE_DSET]) > 0:
            assert_studies_in_list(datasets, ['s1', 's3'])
            assert_datasets_have_size(datasets, TO_QUERY_DSETS, 20)

            start = start + index_marker
            datasets, index_marker = self.searcher.search_chromosome(
                chromosome=1,
                start=start,
                size=size,
                pval_interval=pval_interval)
            d = utils.extend_dsets_with_subset(d, datasets)
            looped_through += 1

        assert looped_through == 5
        # start changes on each loop!
        assert index_marker == 0
        # 80 unique variants
        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET]) == 80
Пример #2
0
    def test_loop_through_t2_size_42(self):
        start = 0
        size = 42

        looped_through = 1
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)

        while True:
            datasets, index_marker = self.searcher.search_trait(trait='t2',
                                                                start=start,
                                                                size=size)
            d = utils.extend_dsets_with_subset(d, datasets)
            if len(datasets[REFERENCE_DSET]) <= 0:
                break

            if looped_through <= 1:
                assert_number_of_times_study_is_in_datasets(datasets, 's3', 42)
                assert_studies_from_list(datasets, ['s3'])
            elif looped_through == 2:
                assert_number_of_times_study_is_in_datasets(datasets, 's3', 8)
                assert_number_of_times_study_is_in_datasets(datasets, 's4', 34)
                assert_studies_from_list(datasets, ['s3', 's4'])
            else:
                assert_number_of_times_study_is_in_datasets(datasets, 's4', 16)
                assert_studies_from_list(datasets, ['s4'])
            looped_through += 1
            start = start + index_marker

        assert looped_through == 4
        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET])
    def test_get_chromosome_1_loop_through_size_20_s3(self):
        start = 0
        size = 20

        looped_through = 1
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        datasets, index_marker = self.searcher.search_chromosome(chromosome=1,
                                                                 start=start,
                                                                 size=size,
                                                                 study='s3')
        d = utils.extend_dsets_with_subset(d, datasets)
        while len(datasets[REFERENCE_DSET]) > 0:
            assert_studies_from_list(datasets, ['s3'])
            if looped_through <= 2:
                assert_datasets_have_size(datasets, TO_QUERY_DSETS, 20)
            else:
                assert_datasets_have_size(datasets, TO_QUERY_DSETS, 10)

            start = start + index_marker
            datasets, index_marker = self.searcher.search_chromosome(
                chromosome=1, start=start, size=size, study='s3')
            d = utils.extend_dsets_with_subset(d, datasets)
            looped_through += 1

        assert looped_through == 4
        assert index_marker == 0
        # 50 unique variants
        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET]) == 50
Пример #4
0
    def __init__(self,
                 snp,
                 start,
                 size,
                 config_properties=None,
                 chromosome=None):
        self.snp = snp
        self.chromosome = chromosome
        self.start = start
        self.size = size

        self.properties = properties_handler.get_properties(config_properties)
        self.search_path = properties_handler.get_search_path(self.properties)

        self.chr_dir = self.properties.chr_dir
        self.snp_dir = self.properties.snp_dir
        self.bp_step = self.properties.bp_step

        self.datasets = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        self.index_marker = 0

        if chromosome is None:
            self.service = self._calculate_snp_service()
        else:
            self.service = self._get_snp_service()
    def test_loop_through_w_restrinction_and_always_get_size_20_results(self):
        start = 0
        size = 20

        looped_through = 1

        # s2 and s3 p-value limits
        pval_interval = FloatInterval().set_tuple(0.0002, 0.06)
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        while True:
            print("start", start)
            datasets, next_index = self.searcher.search_all_assocs(
                start=start, size=size, pval_interval=pval_interval)
            print("next index", next_index)
            print(datasets[STUDY_DSET])
            d = utils.extend_dsets_with_subset(d, datasets)
            if len(datasets[REFERENCE_DSET]) <= 0:
                break
            if looped_through <= 2:
                assert_studies_from_list(datasets, ['s2'])
                assert_datasets_have_size(datasets, TO_QUERY_DSETS, 20)
            elif looped_through == 3:
                assert_studies_from_list(datasets, ['s2', 's3'])
                assert_number_of_times_study_is_in_datasets(datasets, 's2', 10)
                assert_number_of_times_study_is_in_datasets(datasets, 's3', 10)
            else:
                assert_studies_from_list(datasets, ['s3'])
                assert_datasets_have_size(datasets, TO_QUERY_DSETS, 20)
            looped_through += 1
            start = start + next_index

        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET]) == 100
Пример #6
0
    def test_loop_through_t2_size_5_w_restriction_to_s4(self):
        start = 0
        size = 5

        looped_through = 1
        pval_interval = FloatInterval().set_tuple(0.06, 0.3)
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)

        while True:
            datasets, index_marker = self.searcher.search_trait(
                trait='t2',
                start=start,
                size=size,
                pval_interval=pval_interval)
            d = utils.extend_dsets_with_subset(d, datasets)
            if len(datasets[REFERENCE_DSET]) <= 0:
                break

            # already on the first loop I want to have reached s4
            if looped_through == 1:
                # all of s3 + the first 5 elements of s4
                assert index_marker == 55
            assert_number_of_times_study_is_in_datasets(datasets, 's4', 5)
            assert_studies_from_list(datasets, ['s4'])

            looped_through += 1
            start = start + index_marker

        assert looped_through == 11
        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET])
    def test_get_chr_1_second_range_loop_20_upper_pval(self):
        start = 0
        size = 20

        # index 25-40 for first non-empty block: 48500000
        # index 40-50 for second non-empty block: 49200000
        bp_interval = IntInterval().set_string_tuple("1200001:49200000")
        # index 20-35
        pval_interval = FloatInterval().set_tuple(0.1, 0.1)
        looped_through = 1
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        datasets, index_marker = self.searcher.search_chromosome(chromosome=1, start=start, size=size, bp_interval=bp_interval, pval_interval=pval_interval)
        d = utils.extend_dsets_with_subset(d, datasets)
        while len(datasets[REFERENCE_DSET]) > 0:
            assert_studies_in_list(datasets, ['s1','s3'])

            assert_datasets_have_size(datasets, TO_QUERY_DSETS, 20)
            start = start + index_marker

            datasets, index_marker = self.searcher.search_chromosome(chromosome=1, start=start, size=size, bp_interval=bp_interval, pval_interval=pval_interval)
            assert_datasets_have_size(datasets, TO_QUERY_DSETS, 0)
            d = utils.extend_dsets_with_subset(d, datasets)
            looped_through += 1

        assert looped_through == 2
        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET]) == 20
    def test_get_snp_loop_through_filter_lower_pval(self):
        start = 0
        size = 2
        pval_interval = FloatInterval().set_tuple(0.01, 0.01)

        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)

        datasets, index_marker = self.searcher.search_snp(
            snp='rs138808727',
            start=start,
            size=size,
            pval_interval=pval_interval)
        d = utils.extend_dsets_with_subset(d, datasets)

        assert_datasets_have_size(datasets, TO_QUERY_DSETS, 2)
        assert_studies_from_list(datasets, ['s1', 's3'])
        assert index_marker == 6

        start = start + index_marker
        datasets, index_marker = self.searcher.search_snp(
            snp='rs138808727',
            start=start,
            size=size,
            pval_interval=pval_interval)
        assert_datasets_have_size(datasets, TO_QUERY_DSETS, 1)

        assert_studies_from_list(datasets, ['s5'])
        d = utils.extend_dsets_with_subset(d, datasets)

        assert len(d[REFERENCE_DSET]) == 3
    def test_create_dictionary_of_empty_dsets(self):
        datasets = utils.create_dictionary_of_empty_dsets(['dset1', 'dset2'])

        assert len(datasets) == 2

        assert isinstance(datasets['dset1'], Dataset)
        assert len(datasets['dset1']) == 0

        assert isinstance(datasets['dset2'], Dataset)
        assert len(datasets['dset2']) == 0
Пример #10
0
    def __init__(self, start, size, config_properties=None):
        self.starting_point = start
        self.start = start
        self.size = size

        self.properties = properties_handler.get_properties(config_properties)
        self.search_path = properties_handler.get_search_path(self.properties)
        self.trait_dir = self.properties.trait_dir

        self.datasets = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        # index marker will be returned along with the datasets
        # it is the number that when added to the 'start' value that we started the query with
        # will pinpoint where the next search needs to continue from
        self.index_marker = self.search_traversed = 0
Пример #11
0
    def test_get_all_loop_through_size_20(self):
        start = 0
        size = 20

        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        datasets, index_marker = self.searcher.search_all_assocs(start=start, size=size)
        d = utils.extend_dsets_with_subset(d, datasets)
        while len(datasets[REFERENCE_DSET]) > 0:
            if start + index_marker >= 240:
                assert_datasets_have_size(datasets, TO_QUERY_DSETS, 10)
            else:
                assert_datasets_have_size(datasets, TO_QUERY_DSETS, 20)
            start = start + index_marker
            datasets, index_marker = self.searcher.search_all_assocs(start=start, size=size)
            d = utils.extend_dsets_with_subset(d, datasets)

        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET])
Пример #12
0
    def __init__(self, chromosome, start, size, config_properties=None):
        self.chromosome = chromosome
        self.start = start
        self.size = size

        self.properties = properties_handler.get_properties(config_properties)
        self.search_path = properties_handler.get_search_path(self.properties)
        self.chr_dir = self.properties.chr_dir

        self.datasets = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        self.index_marker = 0

        self.h5file = fsutils.create_h5file_path(path=self.search_path, dir_name=self.chr_dir, file_name=chromosome)

        if not os.path.isfile(self.h5file):
            raise NotFoundError("Chromosome " + str(chromosome))
        self.service = chromosome_service.ChromosomeService(self.h5file)
    def test_get_snp_loop_through_size_1(self):
        start = 0
        size = 1
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)

        datasets, index_marker = self.searcher.search_snp(snp='rs138808727',
                                                          start=start,
                                                          size=size)
        d = utils.extend_dsets_with_subset(d, datasets)

        while len(datasets[REFERENCE_DSET]) > 0:
            assert_datasets_have_size(datasets, TO_QUERY_DSETS, 1)
            start = start + index_marker
            datasets, index_marker = self.searcher.search_snp(
                snp='rs138808727', start=start, size=size)
            d = utils.extend_dsets_with_subset(d, datasets)

        assert len(d[REFERENCE_DSET]) == 5
    def test_get_chr_1_second_range_loop_5(self):
        start = 0
        size = 5

        bp_interval = IntInterval().set_string_tuple("1200001:49200000")

        looped_through = 1
        d = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        datasets, index_marker = self.searcher.search_chromosome(chromosome=1, start=start, size=size, bp_interval=bp_interval)
        d = utils.extend_dsets_with_subset(d, datasets)
        while len(datasets[REFERENCE_DSET]) > 0:
            assert_studies_in_list(datasets, ['s1', 's3'])
            assert_datasets_have_size(datasets, TO_QUERY_DSETS, 5)
            start = start + index_marker
            datasets, index_marker = self.searcher.search_chromosome(chromosome=1, start=start, size=size, bp_interval=bp_interval)
            d = utils.extend_dsets_with_subset(d, datasets)
            looped_through += 1

        assert looped_through == 11
        assert len(set(d[SNP_DSET])) == len(d[SNP_DSET])
Пример #15
0
    def __init__(self, trait, start, size, config_properties=None):
        self.trait = trait
        self.start = start
        self.size = size

        self.properties = properties_handler.get_properties(config_properties)
        self.search_path = properties_handler.get_search_path(self.properties)
        self.trait_dir = self.properties.trait_dir

        self.datasets = utils.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)
        # index marker will be returned along with the datasets
        # it is the number that when added to the 'start' value that we started the query with
        # will pinpoint where the next search needs to continue from
        self.index_marker = 0

        self.h5file = fsutils.create_h5file_path(self.search_path,
                                                 dir_name=self.trait_dir,
                                                 file_name=trait)
        if not os.path.isfile(self.h5file):
            raise NotFoundError("Trait " + trait)
        self.service = study_service.StudyService(self.h5file)
    def test_get_dsets_group(self):
        chr_group_2 = gu.Group(self.f.get("/2"))

        bp_interval = IntInterval().set_tuple(48500000, 48500000)
        block = bk.Block(bp_interval)
        block_groups = block.get_block_groups_from_parent(chr_group_2)

        block_group = next(block_groups)

        block_sub_groups = block_group.get_all_subgroups()
        d = du.create_dictionary_of_empty_dsets(TO_QUERY_DSETS)

        for block_sub_group in block_sub_groups:
            datasets = query.get_dsets_from_group(block_sub_group, self.start,
                                                  self.size)
            assert len(datasets) == len(TO_STORE_DSETS)
            d = du.extend_dsets_with_subset(d, datasets)

        for dset_name, dset in d.items():
            if dset_name is STUDY_DSET:
                assert len(set(dset)) == 3
            else:
                assert len(set(dset)) == 1