def test_interval_restriction_pval_with_lower_bigger_than_upper_limit_raises_error( self): mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals( pvals_same_exp_diff_mantissa) with pytest.raises(ValueError): IntervalRestrictionPval(1.2e-2, 1.3e-10, Dataset(mantissa_array), Dataset(exp_array))
def test_get_interval_pval_restriction(self): dataset_mantissa = Dataset([1, 12, 13]) dataset_exp = Dataset([-1, -2, -3]) restriction = get_restriction( FloatInterval().set_string_tuple("1e-3:1e-2"), [dataset_mantissa, dataset_exp]) assert isinstance(restriction, IntervalRestrictionPval)
def get_dset(self, dset_name, start, size): """ :param dset_name: the name of the dataset we are interested in :param start: the offset we will start retrieving data from in the dataset (Dataset list) :param size: the number of data points that will be returned (size of the Dataset list) :return: Subset of the Dataset list based on start and size, or empty Dataset if it doesn't exist or out of bounds """ dset = self.group.get(dset_name) if dset is not None: if start <= dset.shape[0]: end = min(dset.shape[0], (start + size)) return Dataset(dset[start:end]) return Dataset([])
def setup_method(self, method): dataset_mantissa, dataset_exp = get_mantissa_and_exp_arrays_from_pvals( pvalsarray) self.loader_dictionary = { SNP_DSET: Dataset(snpsarray), MANTISSA_DSET: Dataset(dataset_mantissa), EXP_DSET: Dataset(dataset_exp), CHR_DSET: Dataset(chrarray), STUDY_DSET: Dataset(studyarray), OR_DSET: Dataset(orarray), BP_DSET: Dataset(bparray), EFFECT_DSET: Dataset(effectarray), OTHER_DSET: Dataset(otherarray), FREQ_DSET: Dataset(frequencyarray) }
def test_various_pvals_2(self): mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals( pvals) restrictions = [ IntervalRestrictionPval(4e-9, 5e-3, Dataset(mantissa_array), Dataset(exp_array)) ] datasets = { MANTISSA_DSET: Dataset(mantissa_array), EXP_DSET: Dataset(exp_array) } filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(filtered_dsets[MANTISSA_DSET]) == 3 assert len(filtered_dsets[EXP_DSET]) == 3
def test_pvals_with_same_mantissa_diff_exp_cross_on_limits(self): mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals( pvals_same_mantissa_diff_exp) restrictions = [ IntervalRestrictionPval(1.2e-9, 1.2e-2, Dataset(mantissa_array), Dataset(exp_array)) ] datasets = { MANTISSA_DSET: Dataset(mantissa_array), EXP_DSET: Dataset(exp_array) } filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(filtered_dsets[MANTISSA_DSET]) == 5 assert len(filtered_dsets[EXP_DSET]) == 5
def test_pvals_with_same_exp_diff_mantissa_right_on_limits(self): mantissa_array, exp_array = get_mantissa_and_exp_arrays_from_pvals( pvals_same_exp_diff_mantissa) restrictions = [ IntervalRestrictionPval(1.2e-15, 9e-15, Dataset(mantissa_array), Dataset(exp_array)) ] datasets = { MANTISSA_DSET: Dataset(mantissa_array), EXP_DSET: Dataset(exp_array) } print(mantissa_array) print(exp_array) filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(filtered_dsets[MANTISSA_DSET]) == 4 assert len(filtered_dsets[EXP_DSET]) == 4
def test_slice_datasets_where_chromosome(self): self.loader_dictionary[CHR_DSET] = Dataset([1, 1, 2, 2]) self.loader_dictionary[SNP_DSET] = Dataset( ['snp1', 'snp2', 'snp3', 'snp4']) load = loader.Loader(None, self.h5file, self.study, self.loader_dictionary) datasets = load._slice_datasets_where_chromosome(1) assert len(datasets[CHR_DSET]) == 2 assert set(datasets[CHR_DSET]).pop() == 1 assert len(datasets[SNP_DSET]) == 2 assert "snp1" in datasets[SNP_DSET] assert "snp2" in datasets[SNP_DSET] assert "snp3" not in datasets[SNP_DSET] assert "snp4" not in datasets[SNP_DSET]
def test_create_dataset(self): load = prepare_load_object_with_study_and_trait(h5file=self.h5file, study="Study1", trait="Trait1", loader=loader) trait_group = load._create_trait_group() study_group = load._create_study_group(trait_group) dset_name = CHR_DSET data = Dataset([1, 2, 3]) study_group.generate_dataset(dset_name, data) dataset = self.f.get("/Trait1/Study1/" + CHR_DSET) assert dataset is not None assert dataset.name == "/Trait1/Study1/" + CHR_DSET assert len(dataset[:]) == len(data) data_2 = Dataset([2, 3, 4]) with pytest.raises(RuntimeError): study_group.generate_dataset(dset_name, data_2) dset_name = "random" with pytest.raises(KeyError): study_group.generate_dataset(dset_name, data_2)
def test_get_equality_str_restriction(self): dataset_str = Dataset(['rs1', 'rs2', 'rs3']) restriction = get_restriction('rs1', dataset_str) assert isinstance(restriction, EqualityRestriction)
def test_get_float_restriction_ceiling_none(self): dataset_float = Dataset([1., 2., 3.]) restriction = get_restriction(FloatInterval().set_tuple(1., None), dataset_float) assert isinstance(restriction, IntervalRestriction)
def test_empty_array(self): assert utils.empty_array(None) assert utils.empty_array([]) assert not utils.empty_array("not an array") assert not utils.empty_array([1, 2, 3]) assert not utils.empty_array(Dataset([1, 2, 3]))
def test_get_equality_int_restriction(self): dataset_float = Dataset([1., 2., 3.]) restriction = get_restriction(1., dataset_float) assert isinstance(restriction, EqualityRestriction)
def test_filter_dsets_with_restrictions(self): datasets = { SNP_DSET: Dataset(["rs1", "rs1", "rs1", "rs2", "rs3"]), PVAL_DSET: Dataset([1., 2.1, 3, 3.1, 4]), CHR_DSET: Dataset([1, 1, 1, 1, 2]) } restrictions = [ EqualityRestriction("rs1", datasets[SNP_DSET]), IntervalRestriction(1., 2.1, datasets[PVAL_DSET]), EqualityRestriction(1, datasets[CHR_DSET]) ] filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(list(filtered_dsets.keys())) == 3 assert len(filtered_dsets[SNP_DSET]) == 2 assert len(set(filtered_dsets[SNP_DSET])) == 1 assert filtered_dsets[SNP_DSET][0] == "rs1" assert len(filtered_dsets[PVAL_DSET]) == 2 for pval in filtered_dsets[PVAL_DSET]: assert pval >= 1. assert pval <= 2.1 assert len(filtered_dsets[CHR_DSET]) == 2 for chromosome in filtered_dsets[CHR_DSET]: assert chromosome == 1 restrictions = [ IntervalRestriction(3., 3.1, datasets[PVAL_DSET]), EqualityRestriction(1, datasets[CHR_DSET]) ] filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(list(filtered_dsets.keys())) == 3 assert len(filtered_dsets[SNP_DSET]) == 2 assert filtered_dsets[SNP_DSET][0] == "rs1" assert filtered_dsets[SNP_DSET][1] == "rs2" assert len(filtered_dsets[PVAL_DSET]) == 2 for pval in filtered_dsets[PVAL_DSET]: assert pval >= 3. assert pval <= 3.1 assert len(filtered_dsets[CHR_DSET]) == 2 for chromosome in filtered_dsets[CHR_DSET]: assert chromosome == 1 restrictions = [ IntervalRestriction(4., 4., datasets[PVAL_DSET]), ] filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(list(filtered_dsets.keys())) == 3 assert len(filtered_dsets[SNP_DSET]) == 1 assert filtered_dsets[SNP_DSET][0] == "rs3" assert len(filtered_dsets[PVAL_DSET]) == 1 assert filtered_dsets[PVAL_DSET][0] == 4. assert len(filtered_dsets[CHR_DSET]) == 1 assert filtered_dsets[CHR_DSET][0] == 2 # restrictions = [] filtered_dsets = filter_dsets_with_restrictions(datasets, restrictions) assert len(list(filtered_dsets.keys())) == 3 for dset_name in datasets: assert len(datasets[dset_name]) == 5
def _create_dset_placeholder(value, size): assert value is not None, "Can't create dataset with empty content!" return Dataset([value for _ in range(size)])
def test_get_float_restriction(self): dataset_float = Dataset([1., 2., 3.]) restriction = get_restriction( FloatInterval().set_string_tuple("1.:2."), dataset_float) assert isinstance(restriction, IntervalRestriction)
def test_filter_dictionary_by_mask(self): loader_dictionary = { 'dset1': Dataset([1, 2, 3]), 'dset2': Dataset([1, 3, 3]) } pvals = Dataset([1, 2, 2]) mask = pvals.equality_mask(1) print(mask) loader_dictionary = utils.filter_dictionary_by_mask( loader_dictionary, mask) for dset in loader_dictionary: assert np.array_equal(loader_dictionary[dset], [1]) loader_dictionary = { 'dset1': Dataset(["a", "b", "c"]), 'dset2': Dataset(["c", "d", "e"]) } pvals = Dataset([1, 2, 2]) mask = pvals.equality_mask(1) print(mask) loader_dictionary = utils.filter_dictionary_by_mask( loader_dictionary, mask) assert np.array_equal(loader_dictionary["dset1"], ["a"]) assert np.array_equal(loader_dictionary["dset2"], ["c"]) loader_dictionary = { 'dset1': Dataset(["a", "b", "c"]), 'dset2': Dataset(["c", "d", "e"]) } pvals = Dataset([1, 2, 2]) mask = pvals.equality_mask(2, ) print(mask) loader_dictionary = utils.filter_dictionary_by_mask( loader_dictionary, mask) assert np.array_equal(loader_dictionary["dset1"], ["b", "c"]) assert np.array_equal(loader_dictionary["dset2"], ["d", "e"])