class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin): def setUp(self): self.upper_bound = 10 self.im_empty = IntervalMetadata(self.upper_bound) self.im_1 = IntervalMetadata(self.upper_bound) self.im_1_1 = Interval(interval_metadata=self.im_1, bounds=[(1, 2), (4, self.upper_bound)], metadata={ 'gene': 'sagA', 'bound': 0 }) self.im_2 = IntervalMetadata(self.upper_bound) self.im_2_1 = Interval(interval_metadata=self.im_2, bounds=[(1, 2), (4, self.upper_bound)], metadata={ 'gene': 'sagA', 'bound': 0 }) self.im_2_2 = Interval(interval_metadata=self.im_2, bounds=[(3, 5)], metadata={ 'gene': 'sagB', 'bound': 0, 'spam': [0] }) def test_copy_empty(self): obs = copy(self.im_empty) self.assertEqual(obs, self.im_empty) self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_copy(self): obs = copy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_deepcopy(self): obs = deepcopy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1.metadata, i2.metadata) i2.metadata['spam'].append(1) self.assertEqual(i2.metadata, { 'gene': 'sagB', 'bound': 0, 'spam': [0, 1] }) self.assertEqual(i1.metadata, { 'gene': 'sagB', 'bound': 0, 'spam': [0] }) def test_deepcopy_memo_is_respected(self): memo = {} deepcopy(self.im_1, memo) self.assertGreater(len(memo), 2) def test_init(self): self.assertFalse(self.im_empty._is_stale_tree) self.assertEqual(self.im_empty._intervals, []) def test_init_upper_bound_lt_lower_bound(self): # test that no exception is raised IntervalMetadata(0) with self.assertRaises(ValueError): IntervalMetadata(-1) def test_upper_bound_is_none(self): im = IntervalMetadata(None) # should not raise error im.add([(0, 1000000000)]) self.assertIsNone(im.upper_bound) with self.assertRaisesRegex(TypeError, 'upper bound is `None`'): im._reverse() with self.assertRaisesRegex(TypeError, 'upper bound is `None`'): IntervalMetadata.concat([self.im_1, im]) def test_init_copy_from(self): for i in [None, 99, 999]: obs = IntervalMetadata(i, self.im_1) exp = IntervalMetadata(i) exp.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={ 'gene': 'sagA', 'bound': 0 }) self.assertEqual(obs, exp) def test_init_copy_from_empty(self): for i in [None, 0, 9, 99, 999]: obs = IntervalMetadata(i, self.im_empty) exp = IntervalMetadata(i) self.assertEqual(obs, exp) # test it is shallow copy self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_init_copy_from_shallow_copy(self): obs = IntervalMetadata(self.upper_bound, self.im_2) self.assertEqual(self.im_2, obs) # test it is shallow copy self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_init_copy_from_error(self): i = self.upper_bound - 1 with self.assertRaisesRegex(ValueError, r'larger than upper bound \(%r\)' % i): IntervalMetadata(i, self.im_2) def test_num_interval_features(self): self.assertEqual(self.im_empty.num_interval_features, 0) self.assertEqual(self.im_1.num_interval_features, 1) self.assertEqual(self.im_2.num_interval_features, 2) def test_duplicate(self): '''Test query and drop methods on duplicate Intervals.''' intvl_1 = self.im_empty.add([(1, 2)]) intvl_2 = self.im_empty.add([(1, 2)]) self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2) self.im_empty.drop([intvl_1]) self.assertEqual(len(self.im_empty._intervals), 1) self.assertTrue(self.im_empty._intervals[0] is intvl_2) def test_duplicate_bounds(self): intvl = self.im_empty.add([(1, 2), (1, 2)]) intvls = list(self.im_empty.query([(1, 2)])) self.assertEqual(len(intvls), 1) self.assertTrue(intvl is intvls[0]) def test_concat_empty(self): for i in 0, 1, 2: obs = IntervalMetadata.concat([self.im_empty] * i) exp = IntervalMetadata(self.upper_bound * i) self.assertEqual(obs, exp) obs = IntervalMetadata.concat([]) self.assertEqual(obs, IntervalMetadata(0)) def test_concat(self): im1 = IntervalMetadata(3) im2 = IntervalMetadata(4) im3 = IntervalMetadata(5) im1.add([(0, 2)], [(True, True)]) im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'}) im2.add([(2, 4)], metadata={'gene': 'sagB'}) im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'}) obs = IntervalMetadata.concat([im1, im2, im3]) exp = IntervalMetadata(12) exp.add(bounds=[(0, 2)], fuzzy=[(True, True)]) exp.add(bounds=[(3, 6)], fuzzy=[(True, False)], metadata={'gene': 'sagA'}) exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'}) exp.add(bounds=[(8, 12)], fuzzy=[(False, True)], metadata={'gene': 'sagC'}) self.assertEqual(obs, exp) def test_merge(self): # empty + empty im = IntervalMetadata(self.upper_bound) self.im_empty.merge(im) self.assertEqual(self.im_empty, im) # empty + non-empty self.im_empty.merge(self.im_1) self.assertEqual(self.im_empty, self.im_1) # non-empty + non-empty self.im_empty.merge(self.im_2) self.im_2.merge(self.im_1) self.assertEqual(self.im_empty, self.im_2) def test_merge_unequal_upper_bounds(self): n = 3 im1 = IntervalMetadata(n) for im in [self.im_empty, self.im_1]: with self.assertRaisesRegex( ValueError, r'not equal \(%d != %d\)' % (self.upper_bound, n)): im.merge(im1) def test_merge_to_unbounded(self): for im in [self.im_empty, self.im_1, IntervalMetadata(None)]: obs = IntervalMetadata(None) obs.merge(im) self.assertIsNone(obs.upper_bound) self.assertEqual(obs._intervals, im._intervals) def test_merge_unbounded_to_bounded(self): im = IntervalMetadata(None) with self.assertRaisesRegex( ValueError, 'Cannot merge an unbound IntervalMetadata object ' 'to a bounded one'): self.im_1.merge(im) # original im is not changed self.assertIsNone(im.upper_bound) self.assertEqual(im._intervals, []) def test_sort(self): interval = Interval(self.im_2, [(1, 2), (3, 8)], metadata={ 'gene': 'sagA', 'bound': 0 }) im = deepcopy(self.im_2) self.im_2.sort(False) # check sorting does not have other side effects self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [self.im_2_2, self.im_2_1, interval]) self.im_2.sort() self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [interval, self.im_2_1, self.im_2_2]) self.im_empty.sort() self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound)) def test_add_eq_upper_bound(self): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={ 'gene': 'sagA', 'bound': 0 }) self.assertTrue(self.im_empty._is_stale_tree) interval = self.im_empty._intervals[0] self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)]) self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0}) self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree)) def test_add_gt_upper_bound(self): with self.assertRaises(ValueError): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound + 1)], metadata={ 'gene': 'sagA', 'bound': 0 }) def test_add_eq_start_end_bound(self): for i in 0, 1, self.upper_bound: # test that no exception is raised self.im_empty.add(bounds=[(i, i)], metadata={ 'gene': 'sagA', 'bound': 0 }) def test_query_attribute(self): intervals = self.im_2._query_attribute({}) for i, j in zip(intervals, self.im_2._intervals): self.assertEqual(i, j) intervals = list(self.im_2._query_attribute(None)) self.assertEqual(len(intervals), 0) for i in self.im_2._intervals: intervals = list(self.im_2._query_attribute(i.metadata)) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], i) def test_query_interval(self): intervals = list(self.im_2._query_interval((1, 2))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) intervals = list(self.im_2._query_interval((3, 4))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = {repr(i) for i in self.im_2._query_interval((1, 7))} self.assertEqual(len(intervals), 2) self.assertSetEqual(intervals, {repr(i) for i in self.im_2._intervals}) def test_query_interval_upper_bound(self): intervals = list( self.im_2._query_interval( (self.upper_bound - 1, self.upper_bound))) self.assertEqual(intervals, [self.im_2_1]) def test_query(self): intervals = list( self.im_2.query(bounds=[(1, 5)], metadata={'gene': 'sagA'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) def test_query_empty(self): intervals = list(self.im_1.query()) self.assertEqual(len(intervals), 0) def test_query_no_hits(self): intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)])) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) intervals = list( self.im_2.query(bounds=[(1, 2)], metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) def test_query_interval_only(self): for loc in [[(1, 7)], [(1, 2), (3, 4)]]: intervals = list(self.im_2.query(bounds=loc)) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_query_metadata_only(self): intervals = list(self.im_2.query(metadata={'gene': 'sagB'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = list(self.im_2.query(metadata={'bound': 0})) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_drop(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl]) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], self.im_2_2) # test the intvl was set to dropped self.assertTrue(intvl.dropped) def test_drop_all(self): self.im_2.drop(self.im_2._intervals) self.assertEqual(self.im_2, self.im_empty) def test_drop_negate(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl], negate=True) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], intvl) # test the dropped intvl was set to dropped self.assertTrue(self.im_2_2.dropped) def test_reverse(self): self.im_2._reverse() Interval(interval_metadata=self.im_empty, bounds=[(0, 6), (8, 9)], metadata={ 'gene': 'sagA', 'bound': 0 }) Interval(interval_metadata=self.im_empty, bounds=[(5, 7)], metadata={ 'gene': 'sagB', 'bound': 0, 'spam': [0] }) self.assertEqual(self.im_2, self.im_empty) def test_eq_ne(self): im1 = IntervalMetadata(10) im1.add(metadata={ 'gene': 'sagA', 'bound': '0' }, bounds=[(0, 2), (4, 7)]) im1.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) # The ordering shouldn't matter im2 = IntervalMetadata(10) im2.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) im2.add(metadata={ 'gene': 'sagA', 'bound': '0' }, bounds=[(0, 2), (4, 7)]) im3 = IntervalMetadata(10) im3.add(metadata={ 'gene': 'sagA', 'bound': '3' }, bounds=[(0, 2), (4, 7)]) im3.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) self.assertReallyEqual(im1, im2) self.assertReallyNotEqual(im1, im3) def test_ne_diff_bounds(self): im1 = IntervalMetadata(10) im2 = IntervalMetadata(9) intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}} im1.add(**intvl) im2.add(**intvl) self.assertReallyNotEqual(im1, im2) def test_repr(self): exp = '''0 interval features -------------------''' self.assertEqual(repr(self.im_empty), exp) self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'}) exp = '''1 interval feature ------------------ Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)''' self.assertRegex(repr(self.im_empty), exp) self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'}) exp = '''6 interval features ------------------- Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\) Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\) ... Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\) Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)''' self.assertRegex(repr(self.im_empty), exp)
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin): def setUp(self): self.upper_bound = 10 self.im_empty = IntervalMetadata(self.upper_bound) self.im_1 = IntervalMetadata(self.upper_bound) self.im_1_1 = Interval( interval_metadata=self.im_1, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2 = IntervalMetadata(self.upper_bound) self.im_2_1 = Interval( interval_metadata=self.im_2, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2_2 = Interval( interval_metadata=self.im_2, bounds=[(3, 5)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_copy_empty(self): obs = copy(self.im_empty) self.assertEqual(obs, self.im_empty) self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_copy(self): obs = copy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_deepcopy(self): obs = deepcopy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1.metadata, i2.metadata) i2.metadata['spam'].append(1) self.assertEqual(i2.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]}) self.assertEqual(i1.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_deepcopy_memo_is_respected(self): memo = {} deepcopy(self.im_1, memo) self.assertGreater(len(memo), 2) def test_init(self): self.assertFalse(self.im_empty._is_stale_tree) self.assertEqual(self.im_empty._intervals, []) def test_init_upper_bound_lt_lower_bound(self): # test that no exception is raised IntervalMetadata(0) with self.assertRaises(ValueError): IntervalMetadata(-1) def test_num_interval_features(self): self.assertEqual(self.im_empty.num_interval_features, 0) self.assertEqual(self.im_1.num_interval_features, 1) self.assertEqual(self.im_2.num_interval_features, 2) def test_duplicate(self): '''Test query and drop methods on duplicate Intervals.''' intvl_1 = self.im_empty.add([(1, 2)]) intvl_2 = self.im_empty.add([(1, 2)]) self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2) self.im_empty.drop([intvl_1]) self.assertEqual(len(self.im_empty._intervals), 1) self.assertTrue(self.im_empty._intervals[0] is intvl_2) def test_duplicate_bounds(self): intvl = self.im_empty.add([(1, 2), (1, 2)]) intvls = list(self.im_empty.query([(1, 2)])) self.assertEqual(len(intvls), 1) self.assertTrue(intvl is intvls[0]) def test_concat_empty(self): for i in 0, 1, 2: obs = IntervalMetadata.concat([self.im_empty] * i) exp = IntervalMetadata(self.upper_bound * i) self.assertEqual(obs, exp) obs = IntervalMetadata.concat([]) self.assertEqual(obs, IntervalMetadata(0)) def test_concat(self): im1 = IntervalMetadata(3) im2 = IntervalMetadata(4) im3 = IntervalMetadata(5) im1.add([(0, 2)], [(True, True)]) im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'}) im2.add([(2, 4)], metadata={'gene': 'sagB'}) im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'}) obs = IntervalMetadata.concat([im1, im2, im3]) exp = IntervalMetadata(12) exp.add(bounds=[(0, 2)], fuzzy=[(True, True)]) exp.add(bounds=[(3, 6)], fuzzy=[(True, False)], metadata={'gene': 'sagA'}) exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'}) exp.add(bounds=[(8, 12)], fuzzy=[(False, True)], metadata={'gene': 'sagC'}) self.assertEqual(obs, exp) def test_sort(self): interval = Interval( self.im_2, [(1, 2), (3, 8)], metadata={'gene': 'sagA', 'bound': 0}) im = deepcopy(self.im_2) self.im_2.sort(False) # check sorting does not have other side effects self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [self.im_2_2, self.im_2_1, interval]) self.im_2.sort() self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [interval, self.im_2_1, self.im_2_2]) self.im_empty.sort() self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound)) def test_add_eq_upper_bound(self): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.assertTrue(self.im_empty._is_stale_tree) interval = self.im_empty._intervals[0] self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)]) self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0}) self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree)) def test_add_gt_upper_bound(self): with self.assertRaises(ValueError): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)], metadata={'gene': 'sagA', 'bound': 0}) def test_add_eq_start_end_bound(self): for i in 0, 1, self.upper_bound: # test that no exception is raised self.im_empty.add(bounds=[(i, i)], metadata={'gene': 'sagA', 'bound': 0}) def test_query_attribute(self): intervals = self.im_2._query_attribute({}) for i, j in zip(intervals, self.im_2._intervals): self.assertEqual(i, j) intervals = list(self.im_2._query_attribute(None)) self.assertEqual(len(intervals), 0) for i in self.im_2._intervals: intervals = list(self.im_2._query_attribute(i.metadata)) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], i) def test_query_interval(self): intervals = list(self.im_2._query_interval((1, 2))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) intervals = list(self.im_2._query_interval((3, 4))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = {repr(i) for i in self.im_2._query_interval((1, 7))} self.assertEqual(len(intervals), 2) self.assertSetEqual(intervals, {repr(i) for i in self.im_2._intervals}) def test_query_interval_upper_bound(self): intervals = list(self.im_2._query_interval((self.upper_bound-1, self.upper_bound))) self.assertEqual(intervals, [self.im_2_1]) def test_query(self): intervals = list(self.im_2.query(bounds=[(1, 5)], metadata={'gene': 'sagA'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) def test_query_empty(self): intervals = list(self.im_1.query()) self.assertEqual(len(intervals), 0) def test_query_no_hits(self): intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)])) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(bounds=[(1, 2)], metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) def test_query_interval_only(self): for loc in [[(1, 7)], [(1, 2), (3, 4)]]: intervals = list(self.im_2.query(bounds=loc)) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_query_metadata_only(self): intervals = list(self.im_2.query(metadata={'gene': 'sagB'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = list(self.im_2.query(metadata={'bound': 0})) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_drop(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl]) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], self.im_2_2) # test the intvl was set to dropped self.assertTrue(intvl.dropped) def test_drop_all(self): self.im_2.drop(self.im_2._intervals) self.assertEqual(self.im_2, self.im_empty) def test_reverse(self): self.im_2._reverse() Interval( interval_metadata=self.im_empty, bounds=[(0, 6), (8, 9)], metadata={'gene': 'sagA', 'bound': 0}) Interval( interval_metadata=self.im_empty, bounds=[(5, 7)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) self.assertEqual(self.im_2, self.im_empty) def test_eq_ne(self): im1 = IntervalMetadata(10) im1.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im1.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) # The ordering shouldn't matter im2 = IntervalMetadata(10) im2.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) im2.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im3 = IntervalMetadata(10) im3.add(metadata={'gene': 'sagA', 'bound': '3'}, bounds=[(0, 2), (4, 7)]) im3.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) self.assertReallyEqual(im1, im2) self.assertReallyNotEqual(im1, im3) def test_ne_diff_bounds(self): im1 = IntervalMetadata(10) im2 = IntervalMetadata(9) intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}} im1.add(**intvl) im2.add(**intvl) self.assertReallyNotEqual(im1, im2) def test_repr(self): exp = '''0 interval features -------------------''' self.assertEqual(repr(self.im_empty), exp) self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'}) exp = '''1 interval feature ------------------ Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)''' self.assertRegex(repr(self.im_empty), exp) self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'}) exp = '''6 interval features ------------------- Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\) Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\) ... Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\) Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)''' self.assertRegex(repr(self.im_empty), exp)
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin): def setUp(self): self.upper_bound = 10 self.im_empty = IntervalMetadata(self.upper_bound) self.im_1 = IntervalMetadata(self.upper_bound) self.im_1_1 = Interval( interval_metadata=self.im_1, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2 = IntervalMetadata(self.upper_bound) self.im_2_1 = Interval( interval_metadata=self.im_2, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2_2 = Interval( interval_metadata=self.im_2, bounds=[(3, 5)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_copy_empty(self): obs = copy(self.im_empty) self.assertEqual(obs, self.im_empty) self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_copy(self): obs = copy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_deepcopy(self): obs = deepcopy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1.metadata, i2.metadata) i2.metadata['spam'].append(1) self.assertEqual(i2.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]}) self.assertEqual(i1.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_deepcopy_memo_is_respected(self): memo = {} deepcopy(self.im_1, memo) self.assertGreater(len(memo), 2) def test_init(self): self.assertFalse(self.im_empty._is_stale_tree) self.assertEqual(self.im_empty._intervals, []) def test_init_upper_bound_lt_lower_bound(self): try: IntervalMetadata(0) except ValueError: self.fail('`IntervalMetdata` raised ValueError unexpectedly') with self.assertRaises(ValueError): IntervalMetadata(-1) def test_num_interval_features(self): self.assertEqual(self.im_empty.num_interval_features, 0) self.assertEqual(self.im_1.num_interval_features, 1) self.assertEqual(self.im_2.num_interval_features, 2) def test_duplicate(self): '''Test query and drop methods on duplicate Intervals.''' intvl_1 = self.im_empty.add([(1, 2)]) intvl_2 = self.im_empty.add([(1, 2)]) self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2) self.im_empty.drop([intvl_1]) self.assertEqual(len(self.im_empty._intervals), 1) self.assertTrue(self.im_empty._intervals[0] is intvl_2) def test_duplicate_bounds(self): intvl = self.im_empty.add([(1, 2), (1, 2)]) intvls = list(self.im_empty.query([(1, 2)])) self.assertEqual(len(intvls), 1) self.assertTrue(intvl is intvls[0]) def test_concat_empty(self): for i in 0, 1, 2: obs = IntervalMetadata.concat([self.im_empty] * i) exp = IntervalMetadata(self.upper_bound * i) self.assertEqual(obs, exp) obs = IntervalMetadata.concat([]) self.assertEqual(obs, IntervalMetadata(0)) def test_concat(self): im1 = IntervalMetadata(3) im2 = IntervalMetadata(4) im3 = IntervalMetadata(5) im1.add([(0, 2)], [(True, True)]) im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'}) im2.add([(2, 4)], metadata={'gene': 'sagB'}) im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'}) obs = IntervalMetadata.concat([im1, im2, im3]) exp = IntervalMetadata(12) exp.add(bounds=[(0, 2)], fuzzy=[(True, True)]) exp.add(bounds=[(3, 6)], fuzzy=[(True, False)], metadata={'gene': 'sagA'}) exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'}) exp.add(bounds=[(8, 12)], fuzzy=[(False, True)], metadata={'gene': 'sagC'}) self.assertEqual(obs, exp) def test_sort(self): interval = Interval( self.im_2, [(1, 2), (3, 8)], metadata={'gene': 'sagA', 'bound': 0}) im = deepcopy(self.im_2) self.im_2.sort(False) # check sorting does not have other side effects self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [self.im_2_2, self.im_2_1, interval]) self.im_2.sort() self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [interval, self.im_2_1, self.im_2_2]) self.im_empty.sort() self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound)) def test_add_eq_upper_bound(self): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.assertTrue(self.im_empty._is_stale_tree) interval = self.im_empty._intervals[0] self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)]) self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0}) self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree)) def test_add_gt_upper_bound(self): with self.assertRaises(ValueError): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)], metadata={'gene': 'sagA', 'bound': 0}) def test_add_eq_start_end_bound(self): for i in 0, 1, self.upper_bound: with self.assertRaisesRegex(ValueError, '{i}.*{i}'.format(i=i)): self.im_empty.add(bounds=[(i, i)], metadata={'gene': 'sagA', 'bound': 0}) def test_query_attribute(self): intervals = self.im_2._query_attribute({}) for i, j in zip(intervals, self.im_2._intervals): self.assertEqual(i, j) intervals = list(self.im_2._query_attribute(None)) self.assertEqual(len(intervals), 0) for i in self.im_2._intervals: intervals = list(self.im_2._query_attribute(i.metadata)) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], i) def test_query_interval(self): intervals = list(self.im_2._query_interval((1, 2))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) intervals = list(self.im_2._query_interval((3, 4))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = {repr(i) for i in self.im_2._query_interval((1, 7))} self.assertEqual(len(intervals), 2) self.assertSetEqual(intervals, {repr(i) for i in self.im_2._intervals}) def test_query_interval_upper_bound(self): intervals = list(self.im_2._query_interval((self.upper_bound-1, self.upper_bound))) self.assertEqual(intervals, [self.im_2_1]) def test_query(self): intervals = list(self.im_2.query(bounds=[(1, 5)], metadata={'gene': 'sagA'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) def test_query_empty(self): intervals = list(self.im_1.query()) self.assertEqual(len(intervals), 0) def test_query_no_hits(self): intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)])) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(bounds=[(1, 2)], metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) def test_query_interval_only(self): for loc in [[(1, 7)], [(1, 2), (3, 4)]]: intervals = list(self.im_2.query(bounds=loc)) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_query_metadata_only(self): intervals = list(self.im_2.query(metadata={'gene': 'sagB'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = list(self.im_2.query(metadata={'bound': 0})) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_drop(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl]) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], self.im_2_2) # test the intvl was set to dropped self.assertTrue(intvl.dropped) def test_drop_all(self): self.im_2.drop(self.im_2._intervals) self.assertEqual(self.im_2, self.im_empty) def test_reverse(self): self.im_2._reverse() Interval( interval_metadata=self.im_empty, bounds=[(0, 6), (8, 9)], metadata={'gene': 'sagA', 'bound': 0}) Interval( interval_metadata=self.im_empty, bounds=[(5, 7)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) self.assertEqual(self.im_2, self.im_empty) def test_eq_ne(self): im1 = IntervalMetadata(10) im1.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im1.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) # The ordering shouldn't matter im2 = IntervalMetadata(10) im2.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) im2.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im3 = IntervalMetadata(10) im3.add(metadata={'gene': 'sagA', 'bound': '3'}, bounds=[(0, 2), (4, 7)]) im3.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) self.assertReallyEqual(im1, im2) self.assertReallyNotEqual(im1, im3) def test_ne_diff_bounds(self): im1 = IntervalMetadata(10) im2 = IntervalMetadata(9) intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}} im1.add(**intvl) im2.add(**intvl) self.assertReallyNotEqual(im1, im2) def test_repr(self): exp = '''0 interval features -------------------''' self.assertEqual(repr(self.im_empty), exp) self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'}) exp = '''1 interval feature ------------------ Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)''' self.assertRegex(repr(self.im_empty), exp) self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'}) exp = '''6 interval features ------------------- Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\) Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\) ... Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\) Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], \ fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)''' self.assertRegex(repr(self.im_empty), exp)
class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin): def setUp(self): self.upper_bound = 10 self.im_empty = IntervalMetadata(self.upper_bound) self.im_1 = IntervalMetadata(self.upper_bound) self.im_1_1 = Interval( interval_metadata=self.im_1, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2 = IntervalMetadata(self.upper_bound) self.im_2_1 = Interval( interval_metadata=self.im_2, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2_2 = Interval( interval_metadata=self.im_2, bounds=[(3, 5)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_copy_empty(self): obs = copy(self.im_empty) self.assertEqual(obs, self.im_empty) self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_copy(self): obs = copy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_deepcopy(self): obs = deepcopy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1.metadata, i2.metadata) i2.metadata['spam'].append(1) self.assertEqual(i2.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]}) self.assertEqual(i1.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_deepcopy_memo_is_respected(self): memo = {} deepcopy(self.im_1, memo) self.assertGreater(len(memo), 2) def test_init(self): self.assertFalse(self.im_empty._is_stale_tree) self.assertEqual(self.im_empty._intervals, []) def test_init_upper_bound_lt_lower_bound(self): # test that no exception is raised IntervalMetadata(0) with self.assertRaises(ValueError): IntervalMetadata(-1) def test_upper_bound_is_none(self): im = IntervalMetadata(None) # should not raise error im.add([(0, 1000000000)]) self.assertIsNone(im.upper_bound) with self.assertRaisesRegex( TypeError, r'upper bound is `None`'): im._reverse() with self.assertRaisesRegex( TypeError, r'upper bound is `None`'): IntervalMetadata.concat([self.im_1, im]) def test_init_copy_from(self): for i in [None, 99, 999]: obs = IntervalMetadata(i, self.im_1) exp = IntervalMetadata(i) exp.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.assertEqual(obs, exp) def test_init_copy_from_empty(self): for i in [None, 0, 9, 99, 999]: obs = IntervalMetadata(i, self.im_empty) exp = IntervalMetadata(i) self.assertEqual(obs, exp) # test it is shallow copy self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_init_copy_from_shallow_copy(self): obs = IntervalMetadata(self.upper_bound, self.im_2) self.assertEqual(self.im_2, obs) # test it is shallow copy self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_init_copy_from_error(self): i = self.upper_bound - 1 with self.assertRaisesRegex( ValueError, r'larger than upper bound \(%r\)' % i): IntervalMetadata(i, self.im_2) def test_num_interval_features(self): self.assertEqual(self.im_empty.num_interval_features, 0) self.assertEqual(self.im_1.num_interval_features, 1) self.assertEqual(self.im_2.num_interval_features, 2) def test_duplicate(self): '''Test query and drop methods on duplicate Intervals.''' intvl_1 = self.im_empty.add([(1, 2)]) intvl_2 = self.im_empty.add([(1, 2)]) self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2) self.im_empty.drop([intvl_1]) self.assertEqual(len(self.im_empty._intervals), 1) self.assertTrue(self.im_empty._intervals[0] is intvl_2) def test_duplicate_bounds(self): intvl = self.im_empty.add([(1, 2), (1, 2)]) intvls = list(self.im_empty.query([(1, 2)])) self.assertEqual(len(intvls), 1) self.assertTrue(intvl is intvls[0]) def test_concat_empty(self): for i in 0, 1, 2: obs = IntervalMetadata.concat([self.im_empty] * i) exp = IntervalMetadata(self.upper_bound * i) self.assertEqual(obs, exp) obs = IntervalMetadata.concat([]) self.assertEqual(obs, IntervalMetadata(0)) def test_concat(self): im1 = IntervalMetadata(3) im2 = IntervalMetadata(4) im3 = IntervalMetadata(5) im1.add([(0, 2)], [(True, True)]) im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'}) im2.add([(2, 4)], metadata={'gene': 'sagB'}) im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'}) obs = IntervalMetadata.concat([im1, im2, im3]) exp = IntervalMetadata(12) exp.add(bounds=[(0, 2)], fuzzy=[(True, True)]) exp.add(bounds=[(3, 6)], fuzzy=[(True, False)], metadata={'gene': 'sagA'}) exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'}) exp.add(bounds=[(8, 12)], fuzzy=[(False, True)], metadata={'gene': 'sagC'}) self.assertEqual(obs, exp) def test_merge(self): # empty + empty im = IntervalMetadata(self.upper_bound) self.im_empty.merge(im) self.assertEqual(self.im_empty, im) # empty + non-empty self.im_empty.merge(self.im_1) self.assertEqual(self.im_empty, self.im_1) # non-empty + non-empty self.im_empty.merge(self.im_2) self.im_2.merge(self.im_1) self.assertEqual(self.im_empty, self.im_2) def test_merge_unequal_upper_bounds(self): n = 3 im1 = IntervalMetadata(n) for im in [self.im_empty, self.im_1]: with self.assertRaisesRegex( ValueError, r'not equal \(%d != %d\)' % (self.upper_bound, n)): im.merge(im1) def test_merge_to_unbounded(self): for im in [self.im_empty, self.im_1, IntervalMetadata(None)]: obs = IntervalMetadata(None) obs.merge(im) self.assertIsNone(obs.upper_bound) self.assertEqual(obs._intervals, im._intervals) def test_merge_unbounded_to_bounded(self): im = IntervalMetadata(None) with self.assertRaisesRegex( ValueError, r'Cannot merge an unbound IntervalMetadata object ' 'to a bounded one'): self.im_1.merge(im) # original im is not changed self.assertIsNone(im.upper_bound) self.assertEqual(im._intervals, []) def test_sort(self): interval = Interval( self.im_2, [(1, 2), (3, 8)], metadata={'gene': 'sagA', 'bound': 0}) im = deepcopy(self.im_2) self.im_2.sort(False) # check sorting does not have other side effects self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [self.im_2_2, self.im_2_1, interval]) self.im_2.sort() self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [interval, self.im_2_1, self.im_2_2]) self.im_empty.sort() self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound)) def test_add_eq_upper_bound(self): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.assertTrue(self.im_empty._is_stale_tree) interval = self.im_empty._intervals[0] self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)]) self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0}) self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree)) def test_add_gt_upper_bound(self): with self.assertRaises(ValueError): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)], metadata={'gene': 'sagA', 'bound': 0}) def test_add_eq_start_end_bound(self): for i in 0, 1, self.upper_bound: # test that no exception is raised self.im_empty.add(bounds=[(i, i)], metadata={'gene': 'sagA', 'bound': 0}) def test_query_attribute(self): intervals = self.im_2._query_attribute({}) for i, j in zip(intervals, self.im_2._intervals): self.assertEqual(i, j) intervals = list(self.im_2._query_attribute(None)) self.assertEqual(len(intervals), 0) for i in self.im_2._intervals: intervals = list(self.im_2._query_attribute(i.metadata)) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], i) def test_query_interval(self): intervals = list(self.im_2._query_interval((1, 2))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) intervals = list(self.im_2._query_interval((3, 4))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = {repr(i) for i in self.im_2._query_interval((1, 7))} self.assertEqual(len(intervals), 2) self.assertSetEqual(intervals, {repr(i) for i in self.im_2._intervals}) def test_query_interval_upper_bound(self): intervals = list(self.im_2._query_interval((self.upper_bound-1, self.upper_bound))) self.assertEqual(intervals, [self.im_2_1]) def test_query(self): intervals = list(self.im_2.query(bounds=[(1, 5)], metadata={'gene': 'sagA'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) def test_query_empty(self): intervals = list(self.im_1.query()) self.assertEqual(len(intervals), 0) def test_query_no_hits(self): intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)])) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(bounds=[(1, 2)], metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) def test_query_interval_only(self): for loc in [[(1, 7)], [(1, 2), (3, 4)]]: intervals = list(self.im_2.query(bounds=loc)) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_query_metadata_only(self): intervals = list(self.im_2.query(metadata={'gene': 'sagB'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = list(self.im_2.query(metadata={'bound': 0})) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_drop(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl]) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], self.im_2_2) # test the intvl was set to dropped self.assertTrue(intvl.dropped) def test_drop_all(self): self.im_2.drop(self.im_2._intervals) self.assertEqual(self.im_2, self.im_empty) def test_drop_negate(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl], negate=True) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], intvl) # test the dropped intvl was set to dropped self.assertTrue(self.im_2_2.dropped) def test_reverse(self): self.im_2._reverse() Interval( interval_metadata=self.im_empty, bounds=[(0, 6), (8, 9)], metadata={'gene': 'sagA', 'bound': 0}) Interval( interval_metadata=self.im_empty, bounds=[(5, 7)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) self.assertEqual(self.im_2, self.im_empty) def test_eq_ne(self): im1 = IntervalMetadata(10) im1.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im1.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) # The ordering shouldn't matter im2 = IntervalMetadata(10) im2.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) im2.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im3 = IntervalMetadata(10) im3.add(metadata={'gene': 'sagA', 'bound': '3'}, bounds=[(0, 2), (4, 7)]) im3.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) self.assertReallyEqual(im1, im2) self.assertReallyNotEqual(im1, im3) def test_ne_diff_bounds(self): im1 = IntervalMetadata(10) im2 = IntervalMetadata(9) intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}} im1.add(**intvl) im2.add(**intvl) self.assertReallyNotEqual(im1, im2) def test_repr(self): exp = '''0 interval features -------------------''' self.assertEqual(repr(self.im_empty), exp) self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'}) exp = ("1 interval feature\n" "------------------\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)") self.assertRegex(repr(self.im_empty), exp) self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'}) exp = ("6 interval features\n" "-------------------\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\)\n" r"...\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\)\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)") self.assertRegex(repr(self.im_empty), exp)