Пример #1
0
    def testEuclideanDistancesExecution(self):
        dense_raw_x = np.random.rand(30, 10)
        dense_raw_y = np.random.rand(40, 10)
        sparse_raw_x = SparseNDArray(
            sps.random(30, 10, density=0.5, format='csr'))
        sparse_raw_y = SparseNDArray(
            sps.random(40, 10, density=0.5, format='csr'))

        for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                             (sparse_raw_x, sparse_raw_y)]:
            x = mt.tensor(raw_x, chunk_size=9)
            y = mt.tensor(raw_y, chunk_size=7)

            distance = euclidean_distances(x, y)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x, Y=raw_y)
            np.testing.assert_almost_equal(result, expected)

            x_norm = x.sum(axis=1)[..., np.newaxis]
            y_norm = y.sum(axis=1)[np.newaxis, ...]
            distance = euclidean_distances(x,
                                           y,
                                           X_norm_squared=x_norm,
                                           Y_norm_squared=y_norm)
            x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
            y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x,
                                              raw_y,
                                              X_norm_squared=x_raw_norm,
                                              Y_norm_squared=y_raw_norm)
            np.testing.assert_almost_equal(result, expected)

            x_sq = (x**2).astype(np.float32)
            y_sq = (y**2).astype(np.float32)

            distance = euclidean_distances(x_sq, y_sq, squared=True)

            x_raw_sq = (raw_x**2).astype(np.float32)
            y_raw_sq = (raw_y**2).astype(np.float32)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
            np.testing.assert_almost_equal(result, expected, decimal=6)

            # test x is y
            distance = euclidean_distances(x)

            result = self.executor.execute_tensor(distance, concat=True)[0]
            expected = sk_euclidean_distances(raw_x)

            np.testing.assert_almost_equal(result, expected)
Пример #2
0
def test_euclidean_distances_op():
    x = mt.random.rand(10, 3)
    xx = mt.random.rand(1, 10)
    y = mt.random.rand(11, 3)

    d = euclidean_distances(x, X_norm_squared=xx)
    assert d.op.x_norm_squared.key == check_array(xx).T.key

    d = euclidean_distances(x,
                            y,
                            X_norm_squared=mt.random.rand(10,
                                                          1,
                                                          dtype=mt.float32),
                            Y_norm_squared=mt.random.rand(1,
                                                          11,
                                                          dtype=mt.float32))
    assert d.op.x_norm_squared is None
    assert d.op.y_norm_squared is None

    # XX shape incompatible
    with pytest.raises(ValueError):
        euclidean_distances(x, X_norm_squared=mt.random.rand(10))

    # XX shape incompatible
    with pytest.raises(ValueError):
        euclidean_distances(x, X_norm_squared=mt.random.rand(11, 1))

    # YY shape incompatible
    with pytest.raises(ValueError):
        euclidean_distances(x, y, Y_norm_squared=mt.random.rand(10))
Пример #3
0
    def testEuclideanDistancesOp(self):
        x = mt.random.rand(10, 3)
        xx = mt.random.rand(1, 10)
        y = mt.random.rand(11, 3)

        d = euclidean_distances(x, X_norm_squared=xx)
        self.assertEqual(d.op.x_norm_squared.key, check_array(xx).T.key)

        d = euclidean_distances(
            x,
            y,
            X_norm_squared=mt.random.rand(10, 1, dtype=mt.float32),
            Y_norm_squared=mt.random.rand(1, 11, dtype=mt.float32))
        self.assertIsNone(d.op.x_norm_squared)
        self.assertIsNone(d.op.y_norm_squared)

        # XX shape incompatible
        with self.assertRaises(ValueError):
            euclidean_distances(x, X_norm_squared=mt.random.rand(10))

        # XX shape incompatible
        with self.assertRaises(ValueError):
            euclidean_distances(x, X_norm_squared=mt.random.rand(11, 1))

        # YY shape incompatible
        with self.assertRaises(ValueError):
            euclidean_distances(x, y, Y_norm_squared=mt.random.rand(10))
Пример #4
0
def test_euclidean_distances_execution(setup):
    dense_raw_x = np.random.rand(30, 10)
    dense_raw_y = np.random.rand(40, 10)
    sparse_raw_x = SparseNDArray(sps.random(30, 10, density=0.5, format='csr'))
    sparse_raw_y = SparseNDArray(sps.random(40, 10, density=0.5, format='csr'))

    for raw_x, raw_y in [(dense_raw_x, dense_raw_y),
                         (sparse_raw_x, sparse_raw_y)]:
        x = mt.tensor(raw_x, chunk_size=9)
        y = mt.tensor(raw_y, chunk_size=7)

        distance = euclidean_distances(x, y)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw_x, Y=raw_y)
        np.testing.assert_almost_equal(result, expected)

        x_norm = x.sum(axis=1)[..., np.newaxis]
        y_norm = y.sum(axis=1)[np.newaxis, ...]
        distance = euclidean_distances(x,
                                       y,
                                       X_norm_squared=x_norm,
                                       Y_norm_squared=y_norm)
        x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis]
        y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...]

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw_x,
                                          raw_y,
                                          X_norm_squared=x_raw_norm,
                                          Y_norm_squared=y_raw_norm)
        np.testing.assert_almost_equal(result, expected)

        x_sq = (x**2).astype(np.float32)
        y_sq = (y**2).astype(np.float32)

        distance = euclidean_distances(x_sq, y_sq, squared=True)

        x_raw_sq = (raw_x**2).astype(np.float32)
        y_raw_sq = (raw_y**2).astype(np.float32)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True)
        np.testing.assert_almost_equal(result, expected, decimal=6)

        # test x is y
        distance = euclidean_distances(x)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw_x)

        np.testing.assert_almost_equal(result, expected)

    # test size adjust
    raw1 = np.random.rand(12, 4)
    raw2 = np.random.rand(18, 4)

    t1 = mt.tensor(raw1, chunk_size=4)
    t2 = mt.tensor(raw2, chunk_size=6)
    with option_context({'chunk_store_limit': 80}):
        distance = euclidean_distances(t1, t2)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw1, raw2)
        np.testing.assert_almost_equal(result, expected)

        distance = euclidean_distances(t2, t1)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw2, raw1)
        np.testing.assert_almost_equal(result, expected)

    with option_context({'chunk_store_limit': 20}):
        distance = euclidean_distances(t1, t2)

        result = distance.execute().fetch()
        expected = sk_euclidean_distances(raw1, raw2)
        np.testing.assert_almost_equal(result, expected)