示例#1
0
    def setUp(self):
        super(DissimilarityMatrixTests, self).setUp()

        self.dm_1x1 = DissimilarityMatrix(self.dm_1x1_data, ['a'])
        self.dm_2x2 = DissimilarityMatrix(self.dm_2x2_data, ['a', 'b'])
        self.dm_2x2_asym = DissimilarityMatrix(self.dm_2x2_asym_data,
                                               ['a', 'b'])
        self.dm_3x3 = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])

        self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3]
        self.dm_f_lines = [
            DM_1x1_F, DM_2x2_F, self.dm_2x2_asym_lines, self.dm_3x3_lines
        ]
        self.dm_fs = [
            self.dm_1x1_f, self.dm_2x2_f, self.dm_2x2_asym_f, self.dm_3x3_f
        ]
        self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)]
        self.dm_sizes = [1, 4, 4, 9]
        self.dm_transposes = [
            self.dm_1x1, self.dm_2x2,
            DissimilarityMatrix([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3
        ]
        self.dm_redundant_forms = [
            np.array(self.dm_1x1_data),
            np.array(self.dm_2x2_data),
            np.array(self.dm_2x2_asym_data),
            np.array(self.dm_3x3_data)
        ]
示例#2
0
    def test_init_invalid_input(self):
        # Empty data.
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix([], [])

        # Another type of empty data.
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix(np.empty((0, 0)), [])

        # Invalid number of dimensions.
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix([1, 2, 3], ['a'])

        # Dimensions don't match.
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix([[1, 2, 3]], ['a'])

        data = [[0, 1], [1, 0]]

        # Duplicate IDs.
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix(data, ['a', 'a'])

        # Number of IDs don't match dimensions.
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix(data, ['a', 'b', 'c'])
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix(data, [])

        # Non-hollow.
        data = [[0.0, 1.0], [1.0, 0.01]]
        with self.assertRaises(DissimilarityMatrixError):
            DissimilarityMatrix(data, ['a', 'b'])
示例#3
0
    def test_filter_asymmetric(self):
        # 2x2
        ids = ['b', 'a']
        exp = DissimilarityMatrix([[0, -2], [1, 0]], ids)
        obs = self.dm_2x2_asym.filter(ids)
        self.assertEqual(obs, exp)

        # 3x3
        dm = DissimilarityMatrix([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]],
                                 ('bro', 'brah', 'breh'))
        ids = ['breh', 'brah']
        exp = DissimilarityMatrix([[0, 1], [22.5, 0]], ids)
        obs = dm.filter(ids)
        self.assertEqual(obs, exp)
示例#4
0
    def test_from_iterable_asymmetric_data(self):
        iterable = (x for x in range(4))

        exp = DissimilarityMatrix([[0, 1, 2, 3], [-1, 0, 1, 2], [-2, -1, 0, 1],
                                   [-3, -2, -1, 0]])
        res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: b - a)
        self.assertEqual(res, exp)
示例#5
0
    def test_preprocess_input_raises_error(self):
        # Requires a DistanceMatrix.
        with self.assertRaises(TypeError):
            _preprocess_input(
                DissimilarityMatrix([[0, 2], [3, 0]], ['a', 'b']), [1, 2],
                None)

        # Requires column if DataFrame.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, self.df, None)

        # Cannot provide column if not data frame.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, self.grouping, 'Group')

        # Column must exist in data frame.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, self.df, 'foo')

        # All distance matrix IDs must be in data frame.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, self.df_missing_id, 'Group')

        # Grouping vector length must match number of objects in dm.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, [1, 2], None)

        # Grouping vector cannot have only unique values.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, [1, 2, 3], None)

        # Grouping vector cannot have only a single group.
        with self.assertRaises(ValueError):
            _preprocess_input(self.dm, [1, 1, 1], None)
示例#6
0
    def test_init_invalid_input(self):
        # Requires a DistanceMatrix.
        with self.assertRaises(TypeError):
            CategoricalStats(DissimilarityMatrix([[0, 2], [3, 0]], ['a', 'b']),
                             [1, 2])

        # Requires column if DataFrame.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, self.df)

        # Cannot provide column if not data frame.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, self.grouping, column='Group')

        # Column must exist in data frame.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, self.df, column='foo')

        # All distance matrix IDs must be in data frame.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, self.df_missing_id, column='Group')

        # Grouping vector length must match number of objects in dm.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, [1, 2])

        # Grouping vector cannot have only unique values.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, [1, 2, 3])

        # Grouping vector cannot have only a single group.
        with self.assertRaises(ValueError):
            CategoricalStats(self.dm, [1, 1, 1])
示例#7
0
 def test_eq(self):
     """Test data equality between different matrix types."""
     # Compare DistanceMatrix to DissimilarityMatrix, where both have the
     # same data and IDs.
     eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
     self.assertTrue(self.dm_3x3 == eq_dm)
     self.assertTrue(eq_dm == self.dm_3x3)
示例#8
0
 def test_filter_reorder(self):
     # Don't filter anything, but reorder the distance matrix.
     order = ['c', 'a', 'b']
     exp = DissimilarityMatrix(
         [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order)
     obs = self.dm_3x3.filter(order)
     self.assertEqual(obs, exp)
示例#9
0
    def test_from_iterable_non_hollow_data(self):
        iterable = (x for x in range(4))

        exp = DissimilarityMatrix([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1],
                                   [1, 1, 1, 1]])
        res = DissimilarityMatrix.from_iterable(iterable, lambda a, b: 1)
        self.assertEqual(res, exp)
示例#10
0
    def test_from_iterable_no_key(self):
        iterable = (x for x in range(4))

        exp = DissimilarityMatrix([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1],
                                   [3, 2, 1, 0]])
        res = DissimilarityMatrix.from_iterable(iterable,
                                                lambda a, b: abs(b - a))
        self.assertEqual(res, exp)
示例#11
0
    def test_filter_subset(self):
        ids = ('c', 'a')
        exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids)
        obs = self.dm_3x3.filter(ids)
        self.assertEqual(obs, exp)

        ids = ('b', 'a')
        exp = DissimilarityMatrix([[0, 0.01], [0.01, 0]], ids)
        obs = self.dm_3x3.filter(ids)
        self.assertEqual(obs, exp)

        # 4x4
        dm = DissimilarityMatrix([[0, 1, 55, 7], [1, 0, 16, 1],
                                  [55, 16, 0, 23], [7, 1, 23, 0]])
        ids = np.asarray(['3', '0', '1'])
        exp = DissimilarityMatrix([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids)
        obs = dm.filter(ids)
        self.assertEqual(obs, exp)
示例#12
0
    def test_from_iterable_with_key(self):
        iterable = (x for x in range(4))

        exp = DissimilarityMatrix(
            [[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]],
            ['0', '1', '4', '9'])
        res = DissimilarityMatrix.from_iterable(iterable,
                                                lambda a, b: abs(b - a),
                                                key=lambda x: str(x**2))
        self.assertEqual(res, exp)
示例#13
0
    def setUp(self):
        super(DissimilarityAndDistanceMatrixReaderWriterTests, self).setUp()

        self.lsmat_1x1_data = [[0.0]]
        self.lsmat_2x2_data = [[0.0, 0.123], [0.123, 0.0]]
        self.lsmat_2x2_asym_data = [[0.0, 1.0], [-2.0, 0.0]]
        self.lsmat_3x3_data = [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0],
                               [4.2, 12.0, 0.0]]

        # We repeat the 3x3 example because there are two file format
        # representations of it, one that is messy and one that is not. Both
        # should be read into an equivalent object and written to an equivalent
        # format though, which is why we duplicate the 3x3 objects and strings.
        self.dissim_objs = [
            DissimilarityMatrix(self.lsmat_1x1_data, ['a']),
            DissimilarityMatrix(self.lsmat_2x2_data, ['a', 'b']),
            DissimilarityMatrix(self.lsmat_2x2_asym_data, ['a', 'b']),
            DissimilarityMatrix(self.lsmat_3x3_data, ['a', 'b', 'c']),
            DissimilarityMatrix(self.lsmat_3x3_data, ['a', 'b', 'c'])
        ]

        self.dissim_strs = [
            LSMat_1x1, LSMat_2x2, LSMat_2x2_ASYM, LSMat_3x3, LSMat_3x3
        ]

        self.dissim_fhs = [
            self.lsmat_1x1_fh, self.lsmat_2x2_fh, self.lsmat_2x2_asym_fh,
            self.lsmat_3x3_fh, self.lsmat_3x3_whitespace_fh
        ]

        self.dist_objs = [
            DistanceMatrix(self.lsmat_1x1_data, ['a']),
            DistanceMatrix(self.lsmat_2x2_data, ['a', 'b']),
            DistanceMatrix(self.lsmat_3x3_data, ['a', 'b', 'c']),
            DistanceMatrix(self.lsmat_3x3_data, ['a', 'b', 'c'])
        ]

        self.dist_strs = [LSMat_1x1, LSMat_2x2, LSMat_3x3, LSMat_3x3]

        self.dist_fhs = [
            self.lsmat_1x1_fh, self.lsmat_2x2_fh, self.lsmat_3x3_fh,
            self.lsmat_3x3_whitespace_fh
        ]
示例#14
0
文件: plotter.py 项目: onocy/nlp_f18
def create_correlation_plot(EDM, artists):
    """
    Creates and plots a correlation heatmap graph from a given EDM matrix and list of artist labels
    """
    from skbio.stats.distance import DissimilarityMatrix
    import matplotlib.pyplot as plt

    dm = DissimilarityMatrix(EDM, artists)
    fig = dm.plot(cmap='Reds', title='Lyrical Similarity')
    fig.show()
    plt.pause(500)
示例#15
0
    def test_init_from_dm(self):
        ids = ['foo', 'bar', 'baz']

        # DissimilarityMatrix -> DissimilarityMatrix
        exp = DissimilarityMatrix(self.dm_3x3_data, ids)
        obs = DissimilarityMatrix(self.dm_3x3, ids)
        self.assertEqual(obs, exp)
        # Test that copy of data is not made.
        self.assertTrue(obs.data is self.dm_3x3.data)
        obs.data[0, 1] = 424242
        self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data))

        # DistanceMatrix -> DissimilarityMatrix
        exp = DissimilarityMatrix(self.dm_3x3_data, ids)
        obs = DissimilarityMatrix(
            DistanceMatrix(self.dm_3x3_data, ('a', 'b', 'c')), ids)
        self.assertEqual(obs, exp)

        # DissimilarityMatrix -> DistanceMatrix
        with self.assertRaises(DistanceMatrixError):
            DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar'])
示例#16
0
    def test_avoid_copy_on_construction(self):
        # ((data, expect_copy))
        tests = (([[0, 1], [1, 0]], True),
                 ([(0, 1), (1, 0)], True),
                 (((0, 1), (1, 0)), True),
                 (np.array([[0, 1], [1, 0]], dtype='int'), True),
                 (np.array([[0, 1], [1, 0]], dtype='float'), False),
                 (np.array([[0, 1], [1, 0]], dtype=np.float32), False),
                 (np.array([[0, 1], [1, 0]], dtype=np.float64), False),
                 (np.array([[0, 1], [1, 0]], dtype='double'), False))

        for data, expect in tests:
            obj = DissimilarityMatrix(data)
            self.assertEqual(id(obj.data) != id(data), expect)
示例#17
0
 def test_plot_no_default(self):
     ids = ['0', 'one', '2', 'three', '4.000']
     data = ([0, 1, 2, 3, 4], [1, 0, 1, 2, 3], [2, 1, 0, 1, 2],
             [3, 2, 1, 0, 1], [4, 3, 2, 1, 0])
     dm = DissimilarityMatrix(data, ids)
     fig = dm.plot(cmap='Reds', title='Testplot')
     self.assertIsInstance(fig, mpl.figure.Figure)
     axes = fig.get_axes()
     self.assertEqual(len(axes), 2)
     ax = axes[0]
     self.assertEqual(ax.get_title(), 'Testplot')
     xticks = []
     for tick in ax.get_xticklabels():
         xticks.append(tick.get_text())
     self.assertEqual(xticks, ['0', 'one', '2', 'three', '4.000'])
     yticks = []
     for tick in ax.get_yticklabels():
         yticks.append(tick.get_text())
     self.assertEqual(yticks, ['0', 'one', '2', 'three', '4.000'])
示例#18
0
    def test_from_iterable_skbio_hamming_metric_with_metadata(self):
        # test for #1254
        seqs = [
            Sequence('ACGT'),
            Sequence('ACGA', metadata={'id': 'seq1'}),
            Sequence('AAAA', metadata={'id': 'seq2'}),
            Sequence('AAAA', positional_metadata={'qual': range(4)})
        ]

        exp = DissimilarityMatrix(
            [[0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5],
             [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]],
            ['a', 'b', 'c', 'd'])

        dm = DissimilarityMatrix.from_iterable(
            seqs,
            metric=skbio.sequence.distance.hamming,
            keys=['a', 'b', 'c', 'd'])

        self.assertEqual(dm, exp)
示例#19
0
 def test_init_no_ids(self):
     exp = DissimilarityMatrix(self.dm_3x3_data, ('0', '1', '2'))
     obs = DissimilarityMatrix(self.dm_3x3_data)
     self.assertEqual(obs, exp)
     self.assertEqual(obs['1', '2'], 12.0)
示例#20
0
 def test_eq(self):
     # Compare DistanceMatrix to DissimilarityMatrix, where both have the
     # same data and IDs.
     eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c'])
     self.assertTrue(self.dm_3x3 == eq_dm)
     self.assertTrue(eq_dm == self.dm_3x3)
示例#21
0
 def test_to_data_frame_default_ids(self):
     df = DissimilarityMatrix(self.dm_2x2_data).to_data_frame()
     exp = pd.DataFrame([[0.0, 0.123], [0.123, 0.0]],
                        index=['0', '1'],
                        columns=['0', '1'])
     assert_data_frame_almost_equal(df, exp)
示例#22
0
 def test_filter_missing_ids_strict_false(self):
     # no exception should be raised
     ids = ('c', 'a')
     exp = DissimilarityMatrix([[0, 4.2], [4.2, 0]], ids)
     obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False)
     self.assertEqual(obs, exp)
示例#23
0
        dm = np.zeros([len(samples),len(samples)])
        pm = np.zeros([len(samples),len(samples)])

        # fill matrices with values
        for s1, s2, d, p in zip(mash_vec[0],mash_vec[1],mash_vec[2],mash_vec[3]):
            i1 = samples.index(s1)
            i2 = samples.index(s2)
            print('s1: %s, s2: %s, i1: %s, i2: %s, d: %s, p: %s' % (s1, s2, i1, i2, d, p))
            dm[i1,i2] = d
            dm[i2,i1] = d
            pm[i1,i2] = p
            pm[i2,i1] = p

        ids = [os.path.basename(x) for x in samples]
        sk_dm = DissimilarityMatrix(dm, ids=ids)
        sk_pm = DissimilarityMatrix(pm, ids=ids)

        sk_dm.write(output['dist_matrix'])
        sk_pm.write(output['p_matrix'])

#### Mash rules
rule mash:
    input:
        expand(mash_dir + '{sample}/mash/{sample}.msh',
               sample=samples),
        expand(mash_dir + '{sample}/mash/{sample}.refseq.txt',
               sample=samples),
        mash_dir + 'combined_analysis/mash.dist.dm',
        mash_dir + 'combined_analysis/mash.dist.p'
示例#24
0
 def test_filter_single_id(self):
     ids = ['b']
     exp = DissimilarityMatrix([[0]], ids)
     obs = self.dm_2x2_asym.filter(ids)
     self.assertEqual(obs, exp)
示例#25
0
# Define a dissimilarity matrix with five objects labeled A-E:

from skbio.stats.distance import DissimilarityMatrix
dm = DissimilarityMatrix([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3], [2, 1, 0, 1, 2],
                          [3, 2, 1, 0, 1], [4, 3, 2, 1, 0]],
                         ['A', 'B', 'C', 'D', 'E'])

# Plot the dissimilarity matrix as a heatmap:

fig = dm.plot(cmap='Reds', title='Example heatmap')
示例#26
0
 def test_from_iterable_single(self):
     exp = DissimilarityMatrix([[100]])
     res = DissimilarityMatrix.from_iterable(["boo"], lambda a, b: 100)
     self.assertEqual(res, exp)
示例#27
0
 def test_constructor(self):
     """Test generating random dist mats with a specific constructor."""
     exp = DissimilarityMatrix(np.asarray([[0.0]]), ['1'])
     obs = randdm(1, constructor=DissimilarityMatrix)
     self.assertEqual(obs, exp)
     self.assertEqual(type(obs), DissimilarityMatrix)
示例#28
0
 def test_constructor(self):
     exp = DissimilarityMatrix(np.asarray([[0.0]]), ['1'])
     obs = randdm(1, constructor=DissimilarityMatrix)
     self.assertEqual(obs, exp)
     self.assertEqual(type(obs), DissimilarityMatrix)
示例#29
0
 def test_init_non_hollow_dm(self):
     data = [[1, 1], [1, 1]]
     obs = DissimilarityMatrix(data, ['a', 'b'])
     self.assertTrue(np.array_equal(obs.data, data))
     data_hollow = skbio.stats.distance._utils.is_hollow(obs.data)
     self.assertEqual(data_hollow, False)
示例#30
0
 def test_init_non_hollow_dm(self):
     data = [[1, 1], [1, 1]]
     obs = DissimilarityMatrix(data, ['a', 'b'])
     self.assertTrue(np.array_equal(obs.data, data))