Exemplo n.º 1
0
def test_map_transformer():
    post_data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8],
                          [7, 9, 7]])
    test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]])
    n_gaussians = 2
    n_features = 3
    prior_machine = GMMMachine(n_gaussians)
    prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]])
    prior_machine.variances = np.ones_like(prior_machine.means)
    prior_machine.weights = np.array([0.5, 0.5])

    machine = GMMMachine(
        n_gaussians,
        trainer="map",
        ubm=prior_machine,
        update_means=True,
        update_variances=True,
        update_weights=True,
    )

    for transform in (to_numpy, to_dask_array):
        post_data = transform(post_data)
        machine = machine.fit(post_data)

        expected_means = np.array([[1.83333333, 1.83333333, 2.0],
                                   [7.57142857, 8, 8]])
        np.testing.assert_almost_equal(machine.means, expected_means)
        eps = np.finfo(float).eps
        expected_vars = np.array([[eps, eps, eps], [eps, eps, eps]])
        np.testing.assert_almost_equal(machine.variances, expected_vars)
        expected_weights = np.array([0.46226415, 0.53773585])
        np.testing.assert_almost_equal(machine.weights, expected_weights)

        stats = machine.acc_stats(test_data)

        expected_stats = GMMStats(n_gaussians, n_features)
        expected_stats.init_fields(
            log_likelihood=-1.3837590691807108e16,
            t=test_data.shape[0],
            n=np.array([2, 2], dtype=float),
            sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float),
            sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float),
        )
        assert stats.is_similar_to(expected_stats)
Exemplo n.º 2
0
def test_ml_transformer():
    data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]])
    test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]])
    n_gaussians = 2
    n_features = 3

    machine = GMMMachine(
        n_gaussians,
        update_means=True,
        update_variances=True,
        update_weights=True,
    )
    machine.means = np.array([[2, 2, 2], [8, 8, 8]])
    machine.variances = np.ones_like(machine.means)

    for transform in (to_numpy, to_dask_array):
        data = transform(data)
        machine = machine.fit(data)

        expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]])
        np.testing.assert_almost_equal(machine.means, expected_means)
        expected_weights = np.array([2 / 5, 3 / 5])
        np.testing.assert_almost_equal(machine.weights, expected_weights)
        eps = np.finfo(float).eps
        expected_variances = np.array([[1 / 4, 1 / 4, eps],
                                       [eps, 2 / 3, 2 / 3]])
        np.testing.assert_almost_equal(machine.variances, expected_variances)

        stats = machine.acc_stats(test_data)

        expected_stats = GMMStats(n_gaussians, n_features)
        expected_stats.init_fields(
            log_likelihood=-6755399441055685.0,
            t=test_data.shape[0],
            n=np.array([2, 2], dtype=float),
            sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float),
            sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float),
        )
        assert stats.is_similar_to(expected_stats)
Exemplo n.º 3
0
def test_GMMStats():
    # Test a GMMStats
    # Initializes a GMMStats
    n_gaussians = 2
    n_features = 3
    gs = GMMStats(n_gaussians, n_features)
    log_likelihood = -3.0
    T = 57
    n = np.array([4.37, 5.31], "float64")
    sumpx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64")
    sumpxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64")
    gs.log_likelihood = log_likelihood
    gs.t = T
    gs.n = n
    gs.sum_px = sumpx
    gs.sum_pxx = sumpxx
    np.testing.assert_equal(gs.log_likelihood, log_likelihood)
    np.testing.assert_equal(gs.t, T)
    np.testing.assert_equal(gs.n, n)
    np.testing.assert_equal(gs.sum_px, sumpx)
    np.testing.assert_equal(gs.sum_pxx, sumpxx)
    np.testing.assert_equal(gs.shape, (n_gaussians, n_features))

    # Saves and reads from file using `from_hdf5`
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(HDF5File(filename, "w"))
    gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r"))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    assert type(gs_loaded.n_gaussians) is np.int64
    assert type(gs_loaded.n_features) is np.int64
    assert type(gs_loaded.log_likelihood) is np.float64

    # Saves and load from file using `load`
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=HDF5File(filename, "w"))
    gs_loaded = GMMStats(n_gaussians, n_features)
    gs_loaded.load(HDF5File(filename, "r"))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Makes them different
    gs_loaded.t = 58
    assert (gs == gs_loaded) is False
    assert gs != gs_loaded
    assert not (gs.is_similar_to(gs_loaded))

    # Accumulates from another GMMStats
    gs2 = GMMStats(n_gaussians, n_features)
    gs2.log_likelihood = log_likelihood
    gs2.t = T
    gs2.n = n.copy()
    gs2.sum_px = sumpx.copy()
    gs2.sum_pxx = sumpxx.copy()
    gs2 += gs
    np.testing.assert_equal(gs2.log_likelihood, 2 * log_likelihood)
    np.testing.assert_equal(gs2.t, 2 * T)
    np.testing.assert_almost_equal(gs2.n, 2 * n, decimal=8)
    np.testing.assert_almost_equal(gs2.sum_px, 2 * sumpx, decimal=8)
    np.testing.assert_almost_equal(gs2.sum_pxx, 2 * sumpxx, decimal=8)

    # Re-init and checks for zeros
    gs_loaded.init_fields()
    np.testing.assert_equal(gs_loaded.log_likelihood, 0)
    np.testing.assert_equal(gs_loaded.t, 0)
    np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians, )))
    np.testing.assert_equal(gs_loaded.sum_px,
                            np.zeros((n_gaussians, n_features)))
    np.testing.assert_equal(gs_loaded.sum_pxx,
                            np.zeros((n_gaussians, n_features)))
    # Resize and checks size
    assert gs_loaded.shape == (n_gaussians, n_features)
    gs_loaded.resize(4, 5)
    assert gs_loaded.shape == (4, 5)
    assert gs_loaded.sum_px.shape[0] == 4
    assert gs_loaded.sum_px.shape[1] == 5

    # Clean-up
    os.unlink(filename)