class DissimilarityMatrixTests(DissimilarityMatrixTestData): def setUp(self): super(DissimilarityMatrixTests, self).setUp() self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a']) self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3] self.dm_redundant_forms = [np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data)] def test_io(self): # Very basic check that read/write public API is present and appears to # be functioning. Roundtrip from memory -> disk -> memory and ensure # results match. fh = StringIO() self.dm_3x3.write(fh) fh.seek(0) deserialized = DissimilarityMatrix.read(fh) self.assertEqual(deserialized, self.dm_3x3) self.assertTrue(type(deserialized) == DissimilarityMatrix) def test_deprecated_io(self): fh = StringIO() npt.assert_warns(UserWarning, self.dm_3x3.to_file, fh) fh.seek(0) deserialized = npt.assert_warns(UserWarning, DissimilarityMatrix.from_file, fh) self.assertEqual(deserialized, self.dm_3x3) self.assertTrue(type(deserialized) == DissimilarityMatrix) def test_init_from_dm(self): ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = DissimilarityMatrix(self.dm_3x3_data, ids) obs = DissimilarityMatrix( DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_no_ids(self): exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2')) obs = DissimilarityMatrix(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): # Empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b', 'c']) # Non-hollow. data = [[0.0, 1.0], [1.0, 0.01]] with self.assertRaises(DissimilarityMatrixError): DissimilarityMatrix(data, ['a', 'b']) def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue(np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = DissimilarityMatrix( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = DissimilarityMatrix([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = DissimilarityMatrix([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) def test_str(self): for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): # Slice of first dimension only. Test that __getslice__ defers to # __getitem__. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b']) def test_pprint_ids(self): # No truncation. exp = 'a, b, c' obs = self.dm_3x3._pprint_ids() self.assertEqual(obs, exp) # Truncation. exp = 'a, b, ...' obs = self.dm_3x3._pprint_ids(max_chars=5) self.assertEqual(obs, exp)
dm = np.zeros([len(samples),len(samples)]) pm = np.zeros([len(samples),len(samples)]) # fill matrices with values for s1, s2, d, p in zip(mash_vec[0],mash_vec[1],mash_vec[2],mash_vec[3]): i1 = samples.index(s1) i2 = samples.index(s2) print('s1: %s, s2: %s, i1: %s, i2: %s, d: %s, p: %s' % (s1, s2, i1, i2, d, p)) dm[i1,i2] = d dm[i2,i1] = d pm[i1,i2] = p pm[i2,i1] = p ids = [os.path.basename(x) for x in samples] sk_dm = DissimilarityMatrix(dm, ids=ids) sk_pm = DissimilarityMatrix(pm, ids=ids) sk_dm.write(output['dist_matrix']) sk_pm.write(output['p_matrix']) #### Mash rules rule mash: input: expand(mash_dir + '{sample}/mash/{sample}.msh', sample=samples), expand(mash_dir + '{sample}/mash/{sample}.refseq.txt', sample=samples), mash_dir + 'combined_analysis/mash.dist.dm', mash_dir + 'combined_analysis/mash.dist.p'