Exemplo n.º 1
0
def test_GMMMachine_stats():
    """Tests a GMMMachine (statistics)"""

    arrayset = load_array(
        resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
    gmm = GMMMachine(n_gaussians=2)
    gmm.weights = np.array([0.5, 0.5], "float64")
    gmm.means = np.array([[3, 70], [4, 72]], "float64")
    gmm.variances = np.array([[1, 10], [2, 5]], "float64")
    gmm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    stats = gmm_module.e_step(
        arrayset,
        gmm,
    )

    stats_ref = GMMStats(n_gaussians=2, n_features=2)
    stats_ref.load(
        HDF5File(resource_filename("bob.learn.em", "data/stats.hdf5"), "r"))

    np.testing.assert_equal(stats.t, stats_ref.t)
    np.testing.assert_almost_equal(stats.n, stats_ref.n, decimal=10)
    # np.testing.assert_equal(stats.sum_px, stats_ref.sum_px)
    # Note AA: precision error above
    np.testing.assert_almost_equal(stats.sum_px, stats_ref.sum_px, decimal=10)
    np.testing.assert_almost_equal(stats.sum_pxx,
                                   stats_ref.sum_pxx,
                                   decimal=10)
Exemplo n.º 2
0
def test_ISVTrainAndEnrol():
    # Train and enroll an 'ISVMachine'

    eps = 1e-10
    d_ref = numpy.array([
        0.39601136, 0.07348469, 0.47712682, 0.44738127, 0.43179856, 0.45086029
    ], 'float64')
    u_ref = numpy.array([[0.855125642430777, 0.563104284748032],
                         [-0.325497865404680, 1.923598985291687],
                         [0.511575659503837, 1.964288663083095],
                         [9.330165761678115, 1.073623827995043],
                         [0.511099245664012, 0.278551249248978],
                         [5.065578541930268, 0.509565618051587]], 'float64')
    z_ref = numpy.array([
        -0.079315777443826, 0.092702428248543, -0.342488761656616,
        -0.059922635809136, 0.133539981073604, 0.213118695516570
    ], 'float64')

    # Calls the train function
    ubm = GMMMachine(2, 3)
    ubm.mean_supervector = UBM_MEAN
    ubm.variance_supervector = UBM_VAR
    mb = ISVBase(ubm, 2)
    t = ISVTrainer(4.)
    t.initialize(mb, TRAINING_STATS)
    mb.u = M_u
    for i in range(10):
        t.e_step(mb, TRAINING_STATS)
        t.m_step(mb)

    assert numpy.allclose(mb.d, d_ref, eps)
    assert numpy.allclose(mb.u, u_ref, eps)

    # Calls the enroll function
    m = ISVMachine(mb)

    Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2, 2))
    Fe = numpy.array([
        0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345,
        0.2463, 0.4789, 0.5236
    ]).reshape((6, 2))
    gse1 = GMMStats(2, 3)
    gse1.n = Ne[:, 0]
    gse1.sum_px = Fe[:, 0].reshape(2, 3)
    gse2 = GMMStats(2, 3)
    gse2.n = Ne[:, 1]
    gse2.sum_px = Fe[:, 1].reshape(2, 3)

    gse = [gse1, gse2]
    t.enroll(m, gse, 5)
    assert numpy.allclose(m.z, z_ref, eps)

    #Testing exceptions
    nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1, 2, 2]])
    nose.tools.assert_raises(RuntimeError, t.e_step, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.e_step, mb, [[1, 2, 2]])
    nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1, 2, 2]], 5)
Exemplo n.º 3
0
def test_score():
    gmm1 = GMM(number_of_gaussians=2)
    gmm1.load_model(
        pkg_resources.resource_filename("bob.bio.gmm.test",
                                        "data/gmm_ubm.hdf5"))
    biometric_reference = GMMMachine.from_hdf5(
        pkg_resources.resource_filename("bob.bio.gmm.test",
                                        "data/gmm_enrolled.hdf5"),
        ubm=gmm1.ubm,
    )
    probe = GMMStats.from_hdf5(
        pkg_resources.resource_filename("bob.bio.gmm.test",
                                        "data/gmm_projected.hdf5"))
    probe_data = utils.random_array((20, 45), -5.0, 5.0, seed=seed_value)

    reference_score = 0.6509

    numpy.testing.assert_almost_equal(gmm1.score(biometric_reference, probe),
                                      reference_score,
                                      decimal=5)

    multi_refs = gmm1.score_multiple_biometric_references(
        [biometric_reference, biometric_reference, biometric_reference], probe)
    assert multi_refs.shape == (3, 1), multi_refs.shape
    numpy.testing.assert_almost_equal(multi_refs, reference_score, decimal=5)

    # With not projected data
    numpy.testing.assert_almost_equal(gmm1.score(biometric_reference,
                                                 probe_data),
                                      reference_score,
                                      decimal=5)
Exemplo n.º 4
0
def test_map_transformer():
    post_data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8],
                          [7, 9, 7]])
    test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]])
    n_gaussians = 2
    n_features = 3
    prior_machine = GMMMachine(n_gaussians)
    prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]])
    prior_machine.variances = np.ones_like(prior_machine.means)
    prior_machine.weights = np.array([0.5, 0.5])

    machine = GMMMachine(
        n_gaussians,
        trainer="map",
        ubm=prior_machine,
        update_means=True,
        update_variances=True,
        update_weights=True,
    )

    for transform in (to_numpy, to_dask_array):
        post_data = transform(post_data)
        machine = machine.fit(post_data)

        expected_means = np.array([[1.83333333, 1.83333333, 2.0],
                                   [7.57142857, 8, 8]])
        np.testing.assert_almost_equal(machine.means, expected_means)
        eps = np.finfo(float).eps
        expected_vars = np.array([[eps, eps, eps], [eps, eps, eps]])
        np.testing.assert_almost_equal(machine.variances, expected_vars)
        expected_weights = np.array([0.46226415, 0.53773585])
        np.testing.assert_almost_equal(machine.weights, expected_weights)

        stats = machine.acc_stats(test_data)

        expected_stats = GMMStats(n_gaussians, n_features)
        expected_stats.init_fields(
            log_likelihood=-1.3837590691807108e16,
            t=test_data.shape[0],
            n=np.array([2, 2], dtype=float),
            sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float),
            sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float),
        )
        assert stats.is_similar_to(expected_stats)
Exemplo n.º 5
0
def test_ml_transformer():
    data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]])
    test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]])
    n_gaussians = 2
    n_features = 3

    machine = GMMMachine(
        n_gaussians,
        update_means=True,
        update_variances=True,
        update_weights=True,
    )
    machine.means = np.array([[2, 2, 2], [8, 8, 8]])
    machine.variances = np.ones_like(machine.means)

    for transform in (to_numpy, to_dask_array):
        data = transform(data)
        machine = machine.fit(data)

        expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]])
        np.testing.assert_almost_equal(machine.means, expected_means)
        expected_weights = np.array([2 / 5, 3 / 5])
        np.testing.assert_almost_equal(machine.weights, expected_weights)
        eps = np.finfo(float).eps
        expected_variances = np.array([[1 / 4, 1 / 4, eps],
                                       [eps, 2 / 3, 2 / 3]])
        np.testing.assert_almost_equal(machine.variances, expected_variances)

        stats = machine.acc_stats(test_data)

        expected_stats = GMMStats(n_gaussians, n_features)
        expected_stats.init_fields(
            log_likelihood=-6755399441055685.0,
            t=test_data.shape[0],
            n=np.array([2, 2], dtype=float),
            sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float),
            sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float),
        )
        assert stats.is_similar_to(expected_stats)
Exemplo n.º 6
0
def test_GMMMachine_2():
    # Test a GMMMachine (statistics)

    arrayset = bob.io.base.load(
        datafile("faithful.torch3_f64.hdf5", __name__, path="../data/"))
    gmm = GMMMachine(2, 2)
    gmm.weights = numpy.array([0.5, 0.5], 'float64')
    gmm.means = numpy.array([[3, 70], [4, 72]], 'float64')
    gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
    gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    stats = GMMStats(2, 2)
    gmm.acc_statistics(arrayset, stats)

    stats_ref = GMMStats(
        bob.io.base.HDF5File(datafile("stats.hdf5", __name__,
                                      path="../data/")))

    assert stats.t == stats_ref.t
    assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10)
    #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx)
    #Note AA: precision error above
    assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10)
    assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10)
Exemplo n.º 7
0
def test_projector():
    """Tests the projector."""
    # Load the UBM
    gmm1 = GMM(number_of_gaussians=2)
    gmm1.ubm = GMMMachine.from_hdf5(
        pkg_resources.resource_filename("bob.bio.gmm.test",
                                        "data/gmm_ubm.hdf5"))

    # Generate and project random feature
    feature = utils.random_array((20, 45), -5.0, 5.0, seed=seed_value)
    projected = gmm1.project(feature)
    assert isinstance(projected, GMMStats)

    reference_file = pkg_resources.resource_filename(
        "bob.bio.gmm.test", "data/gmm_projected.hdf5")
    if regenerate_refs:
        projected.save(reference_file)

    reference = GMMStats.from_hdf5(reference_file)
    assert projected.is_similar_to(reference)
Exemplo n.º 8
0
def test_machine():

    # Ubm
    ubm = GMMMachine(2, 3)
    ubm.weights = numpy.array([0.4, 0.6])
    ubm.means = numpy.array([[1., 7, 4], [4, 5, 3]])
    ubm.variances = numpy.array([[0.5, 1., 1.5], [1., 1.5, 2.]])

    # Defines GMMStats
    gs = GMMStats(2, 3)
    log_likelihood = -3.
    T = 1
    n = numpy.array([0.4, 0.6], numpy.float64)
    sumpx = numpy.array([[1., 2., 3.], [2., 4., 3.]], numpy.float64)
    sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], numpy.float64)
    gs.log_likelihood = log_likelihood
    gs.t = T
    gs.n = n
    gs.sum_px = sumpx
    gs.sum_pxx = sumpxx

    # IVector (Python)
    m = IVectorMachinePy(ubm, 2)
    t = numpy.array([[1., 2], [4, 1], [0, 3], [5, 8], [7, 10], [11, 1]])
    m.set_t(t)
    sigma = numpy.array([1., 2., 1., 3., 2., 4.])
    m.set_sigma(sigma)

    wij_ref = numpy.array([-0.04213415, 0.21463343
                           ])  # Reference from original Chris implementation
    wij = m.project(gs)
    assert numpy.allclose(wij_ref, wij, 1e-5)

    # IVector (C++)
    mc = IVectorMachine(ubm, 2)
    mc.t = t
    mc.sigma = sigma

    wij_ref = numpy.array([-0.04213415, 0.21463343
                           ])  # Reference from original Chris implementation
    wij = mc.project(gs)
    assert numpy.allclose(wij_ref, wij, 1e-5)
Exemplo n.º 9
0
def test_ISVTrainAndEnrol():
  # Train and enroll an 'ISVMachine'

  eps = 1e-10
  d_ref = numpy.array([0.39601136, 0.07348469, 0.47712682, 0.44738127, 0.43179856, 0.45086029], 'float64')
  u_ref = numpy.array([[0.855125642430777, 0.563104284748032], [-0.325497865404680, 1.923598985291687], [0.511575659503837, 1.964288663083095], [9.330165761678115, 1.073623827995043], [0.511099245664012, 0.278551249248978], [5.065578541930268, 0.509565618051587]], 'float64')
  z_ref = numpy.array([-0.079315777443826, 0.092702428248543, -0.342488761656616, -0.059922635809136 , 0.133539981073604, 0.213118695516570], 'float64')

  # Calls the train function
  ubm = GMMMachine(2,3)
  ubm.mean_supervector = UBM_MEAN
  ubm.variance_supervector = UBM_VAR
  mb = ISVBase(ubm,2)
  t = ISVTrainer(4.)
  t.initialize(mb, TRAINING_STATS)
  mb.u = M_u
  for i in range(10):
    t.e_step(mb, TRAINING_STATS)
    t.m_step(mb)

  assert numpy.allclose(mb.d, d_ref, eps)
  assert numpy.allclose(mb.u, u_ref, eps)

  # Calls the enroll function
  m = ISVMachine(mb)

  Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2,2))
  Fe = numpy.array([0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236]).reshape((6,2))
  gse1 = GMMStats(2,3)
  gse1.n = Ne[:,0]
  gse1.sum_px = Fe[:,0].reshape(2,3)
  gse2 = GMMStats(2,3)
  gse2.n = Ne[:,1]
  gse2.sum_px = Fe[:,1].reshape(2,3)

  gse = [gse1, gse2]
  t.enroll(m, gse, 5)
  assert numpy.allclose(m.z, z_ref, eps)
  
  #Testing exceptions
  nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1,2,2]])
  nose.tools.assert_raises(RuntimeError, t.e_step, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.e_step, mb, [[1,2,2]])
  nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1,2,2]],5)
Exemplo n.º 10
0
    def est(self):
        mix_file = util.cache_path(self.ubm)
        ivecdim = self.dim

        gslist = []
        for idx, gfile in self.single_accu_caches.items():
            gs = GMMStats(HDF5File(tk.uncached_path(gfile)))
            gslist.append(gs)

        gmm = sc.MixtureSet(mix_file)
        ubm = convert_gmm(gmm)

        ivm = IVectorMachine(ubm, ivecdim)
        ivm.variance_threshold = 1e-5

        ivtrainer = IVectorTrainer(update_sigma=True)
        ivtrainer.initialize(ivm, gslist)

        for i in range(self.iter):
            ivtrainer.e_step(ivm, gslist)
            ivtrainer.m_step(ivm)

        ivm.save(HDF5File(self.t_matrix.get_path(), "w"))
Exemplo n.º 11
0
def test_JFAMachine():

    eps = 1e-10

    # Creates a UBM
    ubm = GMMMachine(2, 3)
    ubm.weights = np.array([0.4, 0.6], "float64")
    ubm.means = np.array([[1, 6, 2], [4, 3, 2]], "float64")
    ubm.variances = np.array([[1, 2, 1], [2, 1, 2]], "float64")

    # Defines GMMStats
    gs = GMMStats(2, 3)
    gs.log_likelihood = -3.0
    gs.t = 1
    gs.n = np.array([0.4, 0.6], "float64")
    gs.sum_px = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64")
    gs.sum_pxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64")

    # Creates a JFAMachine
    m = JFAMachine(2, 2, em_iterations=10, ubm=ubm)
    m.U = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]],
                   "float64")
    m.V = np.array([[6, 5], [4, 3], [2, 1], [1, 2], [3, 4], [5, 6]], "float64")
    m.D = np.array([0, 1, 0, 1, 0, 1], "float64")

    # Preparing the model
    y = np.array([1, 2], "float64")
    z = np.array([3, 4, 1, 2, 0, 1], "float64")
    model = [y, z]

    score_ref = -2.111577181208289
    score = m.score(model, gs)
    np.testing.assert_allclose(score, score_ref, atol=eps)

    # Scoring with numpy array
    np.random.seed(0)
    X = np.random.normal(loc=0.0, scale=1.0, size=(50, 3))
    score_ref = 2.028009315286946
    score = m.score_using_array(model, X)
    np.testing.assert_allclose(score, score_ref, atol=eps)
Exemplo n.º 12
0
def test_ISVMachine():

    eps = 1e-10

    # Creates a UBM
    ubm = GMMMachine(2, 3)
    ubm.weights = np.array([0.4, 0.6], "float64")
    ubm.means = np.array([[1, 6, 2], [4, 3, 2]], "float64")
    ubm.variances = np.array([[1, 2, 1], [2, 1, 2]], "float64")

    # Creates a ISVMachine
    isv_machine = ISVMachine(ubm=ubm, r_U=2, em_iterations=10)
    isv_machine.U = np.array(
        [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], "float64")
    # base.v = np.array([[0], [0], [0], [0], [0], [0]], 'float64')
    isv_machine.D = np.array([0, 1, 0, 1, 0, 1], "float64")

    # Defines GMMStats
    gs = GMMStats(2, 3)
    gs.log_likelihood = -3.0
    gs.t = 1
    gs.n = np.array([0.4, 0.6], "float64")
    gs.sum_px = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64")
    gs.sum_pxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64")

    # Enrolled model
    latent_z = np.array([3, 4, 1, 2, 0, 1], "float64")
    score = isv_machine.score(latent_z, gs)
    score_ref = -3.280498193082100
    np.testing.assert_allclose(score, score_ref, atol=eps)

    # Scoring with numpy array
    np.random.seed(0)
    X = np.random.normal(loc=0.0, scale=1.0, size=(50, 3))
    score_ref = -1.2343813195374242
    score = isv_machine.score_using_array(latent_z, X)
    np.testing.assert_allclose(score, score_ref, atol=eps)
Exemplo n.º 13
0
def test_GMMStats():
  # Test a GMMStats
  # Initializes a GMMStats
  gs = GMMStats(2,3)
  log_likelihood = -3.
  T = 57
  n = numpy.array([4.37, 5.31], 'float64')
  sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64')
  sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64')
  gs.log_likelihood = log_likelihood
  gs.t = T
  gs.n = n
  gs.sum_px = sumpx
  gs.sum_pxx = sumpxx
  assert gs.log_likelihood == log_likelihood
  assert gs.t == T
  assert (gs.n == n).all()
  assert (gs.sum_px == sumpx).all()
  assert (gs.sum_pxx == sumpxx).all()
  assert gs.shape==(2,3)

  # Saves and reads from file
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)
  
  # Saves and reads from file using the keyword argument
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)

  # Saves and load from file using the keyword argument
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats()
  gs_loaded.load(bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)

  # Saves and load from file using the keyword argument
  filename = str(tempfile.mkstemp(".hdf5")[1])
  gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
  gs_loaded = GMMStats()
  gs_loaded.load(hdf5=bob.io.base.HDF5File(filename))
  assert gs == gs_loaded
  assert (gs != gs_loaded ) is False
  assert gs.is_similar_to(gs_loaded)
  
  
  # Makes them different
  gs_loaded.t = 58
  assert (gs == gs_loaded ) is False
  assert gs != gs_loaded
  assert (gs.is_similar_to(gs_loaded)) is False
  # Accumulates from another GMMStats
  gs2 = GMMStats(2,3)
  gs2.log_likelihood = log_likelihood
  gs2.t = T
  gs2.n = n
  gs2.sum_px = sumpx
  gs2.sum_pxx = sumpxx
  gs2 += gs
  eps = 1e-8
  assert gs2.log_likelihood == 2*log_likelihood
  assert gs2.t == 2*T
  assert numpy.allclose(gs2.n, 2*n, eps)
  assert numpy.allclose(gs2.sum_px, 2*sumpx, eps)
  assert numpy.allclose(gs2.sum_pxx, 2*sumpxx, eps)

  # Reinit and checks for zeros
  gs_loaded.init()
  assert gs_loaded.log_likelihood == 0
  assert gs_loaded.t == 0
  assert (gs_loaded.n == 0).all()
  assert (gs_loaded.sum_px == 0).all()
  assert (gs_loaded.sum_pxx == 0).all()
  # Resize and checks size
  assert  gs_loaded.shape==(2,3)
  gs_loaded.resize(4,5)  
  assert  gs_loaded.shape==(4,5)
  assert gs_loaded.sum_px.shape[0] == 4
  assert gs_loaded.sum_px.shape[1] == 5

  # Clean-up
  os.unlink(filename)
Exemplo n.º 14
0
def test_LinearScoring():

    ubm = GMMMachine(2, 2)
    ubm.weights = numpy.array([0.5, 0.5], 'float64')
    ubm.means = numpy.array([[3, 70], [4, 72]], 'float64')
    ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
    ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    model1 = GMMMachine(2, 2)
    model1.weights = numpy.array([0.5, 0.5], 'float64')
    model1.means = numpy.array([[1, 2], [3, 4]], 'float64')
    model1.variances = numpy.array([[9, 10], [11, 12]], 'float64')
    model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    model2 = GMMMachine(2, 2)
    model2.weights = numpy.array([0.5, 0.5], 'float64')
    model2.means = numpy.array([[5, 6], [7, 8]], 'float64')
    model2.variances = numpy.array([[13, 14], [15, 16]], 'float64')
    model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    stats1 = GMMStats(2, 2)
    stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64')
    stats1.n = numpy.array([1, 2], 'float64')
    stats1.t = 1 + 2

    stats2 = GMMStats(2, 2)
    stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64')
    stats2.n = numpy.array([3, 4], 'float64')
    stats2.t = 3 + 4

    stats3 = GMMStats(2, 2)
    stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64')
    stats3.n = numpy.array([3, 4], 'float64')
    stats3.t = 3 + 4

    test_channeloffset = [
        numpy.array([9, 8, 7, 6], 'float64'),
        numpy.array([5, 4, 3, 2], 'float64'),
        numpy.array([1, 0, 1, 2], 'float64')
    ]

    # Reference scores (from Idiap internal matlab implementation)
    ref_scores_00 = numpy.array(
        [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64')
    ref_scores_01 = numpy.array(
        [[790.9666666666667, 743.9571428571428, 753.6714285714285],
         [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64')
    ref_scores_10 = numpy.array(
        [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64')
    ref_scores_11 = numpy.array(
        [[871.8333333333332, 776.3000000000001, 770.3571428571427],
         [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64')

    # 1/ Use GMMMachines
    # 1/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3])
    assert (abs(scores - ref_scores_00) < 1e-7).all()

    # 1/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            [], True)
    assert (abs(scores - ref_scores_01) < 1e-7).all()
    #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True)
    #assert (abs(scores - ref_scores_01) < 1e-7).all()
    #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True)
    #assert (abs(scores - ref_scores_01) < 1e-7).all()

    # 1/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            test_channeloffset)
    assert (abs(scores - ref_scores_10) < 1e-7).all()

    # 1/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            test_channeloffset, True)
    assert (abs(scores - ref_scores_11) < 1e-7).all()

    # 2/ Use mean/variance supervectors
    # 2/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3])
    assert (abs(scores - ref_scores_00) < 1e-7).all()

    # 2/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3], [], True)
    assert (abs(scores - ref_scores_01) < 1e-7).all()

    # 2/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3], test_channeloffset)
    assert (abs(scores - ref_scores_10) < 1e-7).all()

    # 2/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3], test_channeloffset, True)
    assert (abs(scores - ref_scores_11) < 1e-7).all()

    # 3/ Using single model/sample
    # 3/a/ without frame-length normalisation
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0])
    assert abs(score - ref_scores_10[0, 0]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1])
    assert abs(score - ref_scores_10[0, 1]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2])
    assert abs(score - ref_scores_10[0, 2]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0])
    assert abs(score - ref_scores_10[1, 0]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1])
    assert abs(score - ref_scores_10[1, 1]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2])
    assert abs(score - ref_scores_10[1, 2]) < 1e-7

    # 3/b/ without frame-length normalisation
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0], True)
    assert abs(score - ref_scores_11[0, 0]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1], True)
    assert abs(score - ref_scores_11[0, 1]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2], True)
    assert abs(score - ref_scores_11[0, 2]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0], True)
    assert abs(score - ref_scores_11[1, 0]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1], True)
    assert abs(score - ref_scores_11[1, 1]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2], True)
    assert abs(score - ref_scores_11[1, 2]) < 1e-7
Exemplo n.º 15
0
def test_LinearScoring():

    ubm = GMMMachine(n_gaussians=2)
    ubm.weights = np.array([0.5, 0.5], "float64")
    ubm.means = np.array([[3, 70], [4, 72]], "float64")
    ubm.variances = np.array([[1, 10], [2, 5]], "float64")
    ubm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    model1 = GMMMachine(n_gaussians=2)
    model1.weights = np.array([0.5, 0.5], "float64")
    model1.means = np.array([[1, 2], [3, 4]], "float64")
    model1.variances = np.array([[9, 10], [11, 12]], "float64")
    model1.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    model2 = GMMMachine(n_gaussians=2)
    model2.weights = np.array([0.5, 0.5], "float64")
    model2.means = np.array([[5, 6], [7, 8]], "float64")
    model2.variances = np.array([[13, 14], [15, 16]], "float64")
    model2.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    stats1 = GMMStats(2, 2)
    stats1.sum_px = np.array([[1, 2], [3, 4]], "float64")
    stats1.n = np.array([1, 2], "float64")
    stats1.t = 1 + 2

    stats2 = GMMStats(2, 2)
    stats2.sum_px = np.array([[5, 6], [7, 8]], "float64")
    stats2.n = np.array([3, 4], "float64")
    stats2.t = 3 + 4

    stats3 = GMMStats(2, 2)
    stats3.sum_px = np.array([[5, 6], [7, 3]], "float64")
    stats3.n = np.array([3, 4], "float64")
    stats3.t = 3 + 4

    test_channeloffset = [
        np.array([[9, 8], [7, 6]], "float64"),
        np.array([[5, 4], [3, 2]], "float64"),
        np.array([[1, 0], [1, 2]], "float64"),
    ]

    # Reference scores (from Idiap internal matlab implementation)
    ref_scores_00 = np.array(
        [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], "float64")
    ref_scores_01 = np.array(
        [
            [790.9666666666667, 743.9571428571428, 753.6714285714285],
            [738.5666666666667, 695.4428571428572, 704.5857142857144],
        ],
        "float64",
    )
    ref_scores_10 = np.array(
        [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], "float64")
    ref_scores_11 = np.array(
        [
            [871.8333333333332, 776.3000000000001, 770.3571428571427],
            [793.8333333333333, 714.1857142857143, 717.5000000000000],
        ],
        "float64",
    )

    # 1/ Use GMMMachines
    # 1/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3])
    np.testing.assert_almost_equal(scores, ref_scores_00, decimal=7)

    # 1/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1, model2],
        ubm,
        [stats1, stats2, stats3],
        frame_length_normalization=True,
    )
    np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7)
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], 0,
                            True)
    np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7)

    # 1/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            test_channeloffset)
    np.testing.assert_almost_equal(scores, ref_scores_10, decimal=7)

    # 1/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1, model2],
        ubm,
        [stats1, stats2, stats3],
        test_channeloffset,
        frame_length_normalization=True,
    )
    np.testing.assert_almost_equal(scores, ref_scores_11, decimal=7)

    # 2/ Use means instead of models
    # 2/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1.means, model2.means], ubm,
                            [stats1, stats2, stats3])
    assert (abs(scores - ref_scores_00) < 1e-7).all()

    # 2/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1.means, model2.means],
        ubm,
        [stats1, stats2, stats3],
        frame_length_normalization=True,
    )
    assert (abs(scores - ref_scores_01) < 1e-7).all()

    # 2/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring(
        [model1.means, model2.means],
        ubm,
        [stats1, stats2, stats3],
        test_channeloffset,
    )
    assert (abs(scores - ref_scores_10) < 1e-7).all()

    # 2/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1.means, model2.means],
        ubm,
        [stats1, stats2, stats3],
        test_channeloffset,
        frame_length_normalization=True,
    )
    assert (abs(scores - ref_scores_11) < 1e-7).all()

    # 3/ Using single model/sample
    # 3/a/ without frame-length normalisation
    score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0])
    np.testing.assert_almost_equal(score, ref_scores_10[0, 0], decimal=7)
    score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1])
    np.testing.assert_almost_equal(score, ref_scores_10[0, 1], decimal=7)
    score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2])
    np.testing.assert_almost_equal(score, ref_scores_10[0, 2], decimal=7)
    score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0])
    np.testing.assert_almost_equal(score, ref_scores_10[1, 0], decimal=7)
    score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1])
    np.testing.assert_almost_equal(score, ref_scores_10[1, 1], decimal=7)
    score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2])
    np.testing.assert_almost_equal(score, ref_scores_10[1, 2], decimal=7)

    # 3/b/ with frame-length normalisation
    score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[0, 0], decimal=7)
    score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[0, 1], decimal=7)
    score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[0, 2], decimal=7)
    score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[1, 0], decimal=7)
    score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[1, 1], decimal=7)
    score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[1, 2], decimal=7)
Exemplo n.º 16
0
def test_JFATrainAndEnrol():
    # Train and enroll a JFAMachine

    # Calls the train function
    ubm = GMMMachine(2, 3)
    ubm.means = UBM_MEAN.reshape((2, 3))
    ubm.variances = UBM_VAR.reshape((2, 3))
    it = JFAMachine(2, 2, em_iterations=10, enroll_iterations=5, ubm=ubm)

    it.U = copy.deepcopy(M_u)
    it.V = copy.deepcopy(M_v)
    it.D = copy.deepcopy(M_d)
    it.fit(TRAINING_STATS_X, TRAINING_STATS_y)

    v_ref = np.array(
        [
            [0.245364911936476, 0.978133261775424],
            [0.769646805052223, 0.940070736856596],
            [0.310779202800089, 1.456332053893072],
            [0.184760934399551, 2.265139705602147],
            [0.701987784039800, 0.081632150899400],
            [0.074344030229297, 1.090248340917255],
        ],
        "float64",
    )
    u_ref = np.array(
        [
            [0.049424652628448, 0.060480486336896],
            [0.178104127464007, 1.884873813495153],
            [1.204011484266777, 2.281351307871720],
            [7.278512126426286, -0.390966087173334],
            [-0.084424326581145, -0.081725474934414],
            [4.042143689831097, -0.262576386580701],
        ],
        "float64",
    )
    d_ref = np.array(
        [
            9.648467e-18,
            2.63720683155e-12,
            2.11822157653706e-10,
            9.1047243e-17,
            1.41163442535567e-10,
            3.30581e-19,
        ],
        "float64",
    )

    eps = 1e-10
    np.testing.assert_allclose(it.V, v_ref, rtol=eps, atol=1e-8)
    np.testing.assert_allclose(it.U, u_ref, rtol=eps, atol=1e-8)
    np.testing.assert_allclose(it.D, d_ref, rtol=eps, atol=1e-8)

    # Calls the enroll function

    Ne = np.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2, 2))
    Fe = np.array([
        0.1579,
        0.1925,
        0.3242,
        0.1234,
        0.2354,
        0.2734,
        0.2514,
        0.5874,
        0.3345,
        0.2463,
        0.4789,
        0.5236,
    ]).reshape((6, 2))
    gse1 = GMMStats(2, 3)
    gse1.n = Ne[:, 0]
    gse1.sum_px = Fe[:, 0].reshape(2, 3)
    gse2 = GMMStats(2, 3)
    gse2.n = Ne[:, 1]
    gse2.sum_px = Fe[:, 1].reshape(2, 3)

    gse = [gse1, gse2]
    latent_y, latent_z = it.enroll(gse)

    y_ref = np.array([0.555991469319657, 0.002773650670010], "float64")
    z_ref = np.array(
        [
            8.2228e-20,
            3.15216909492e-13,
            -1.48616735364395e-10,
            1.0625905e-17,
            3.7150503117895e-11,
            1.71104e-19,
        ],
        "float64",
    )

    np.testing.assert_allclose(latent_y, y_ref, rtol=eps, atol=1e-8)
    np.testing.assert_allclose(latent_z, z_ref, rtol=eps, atol=1e-8)
Exemplo n.º 17
0
def test_ISVTrainAndEnrol():
    # Train and enroll an 'ISVMachine'

    eps = 1e-10
    d_ref = np.array(
        [
            0.39601136,
            0.07348469,
            0.47712682,
            0.44738127,
            0.43179856,
            0.45086029,
        ],
        "float64",
    )
    u_ref = np.array(
        [
            [0.855125642430777, 0.563104284748032],
            [-0.325497865404680, 1.923598985291687],
            [0.511575659503837, 1.964288663083095],
            [9.330165761678115, 1.073623827995043],
            [0.511099245664012, 0.278551249248978],
            [5.065578541930268, 0.509565618051587],
        ],
        "float64",
    )
    z_ref = np.array(
        [[
            -0.079315777443826,
            0.092702428248543,
            -0.342488761656616,
            -0.059922635809136,
            0.133539981073604,
            0.213118695516570,
        ]],
        "float64",
    )
    """
    Calls the train function
    """
    ubm = GMMMachine(2, 3)
    ubm.means = UBM_MEAN.reshape((2, 3))
    ubm.variances = UBM_VAR.reshape((2, 3))

    it = ISVMachine(
        ubm=ubm,
        r_U=2,
        relevance_factor=4.0,
        em_iterations=10,
        enroll_iterations=5,
    )

    it.U = copy.deepcopy(M_u)
    it = it.fit(TRAINING_STATS_X, TRAINING_STATS_y)

    np.testing.assert_allclose(it.D, d_ref, rtol=eps, atol=1e-8)
    np.testing.assert_allclose(it.U, u_ref, rtol=eps, atol=1e-8)
    """
    Calls the enroll function
    """

    Ne = np.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2, 2))
    Fe = np.array([
        0.1579,
        0.1925,
        0.3242,
        0.1234,
        0.2354,
        0.2734,
        0.2514,
        0.5874,
        0.3345,
        0.2463,
        0.4789,
        0.5236,
    ]).reshape((6, 2))
    gse1 = GMMStats(2, 3)
    gse1.n = Ne[:, 0]
    gse1.sum_px = Fe[:, 0].reshape(2, 3)
    gse2 = GMMStats(2, 3)
    gse2.n = Ne[:, 1]
    gse2.sum_px = Fe[:, 1].reshape(2, 3)

    gse = [gse1, gse2]
    latent_z = it.enroll(gse)
    np.testing.assert_allclose(latent_z, z_ref, rtol=eps, atol=1e-8)
Exemplo n.º 18
0
def test_ISVMachine():

  # Creates a UBM
  weights = numpy.array([0.4, 0.6], 'float64')
  means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64')
  variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64')
  ubm = GMMMachine(2,3)
  ubm.weights = weights
  ubm.means = means
  ubm.variances = variances

  # Creates a ISVBaseMachine
  U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64')
  #V = numpy.array([[0], [0], [0], [0], [0], [0]], 'float64')
  d = numpy.array([0, 1, 0, 1, 0, 1], 'float64')
  base = ISVBase(ubm,2)
  base.u = U
  #base.v = V
  base.d = d

  # Creates a JFAMachine
  z = numpy.array([3,4,1,2,0,1], 'float64')
  m = ISVMachine(base)
  m.z = z

  n_gaussians,dim,ru    = m.shape
  supervector_length    = m.supervector_length
  assert n_gaussians          == 2
  assert dim                  == 3
  assert supervector_length   == 6
  assert ru                   == 2
  assert (m.z == z).all()

  # Saves and loads
  filename = str(tempfile.mkstemp(".hdf5")[1])
  m.save(bob.io.base.HDF5File(filename, 'w'))
  m_loaded = ISVMachine(bob.io.base.HDF5File(filename))
  m_loaded.isv_base = base
  assert m == m_loaded
  assert (m != m_loaded) is False
  assert m.is_similar_to(m_loaded)

  # Copy constructor
  mc = ISVMachine(m)
  assert m == mc

  # Variant
  mv = ISVMachine(base)
  # Checks for correctness
  #mv.isv_base = base
  m.z = z

  n_gaussians,dim,ru    = m.shape
  supervector_length    = m.supervector_length
  assert n_gaussians        == 2
  assert dim                == 3
  assert supervector_length == 6
  assert ru                 == 2
  assert (m.z == z).all()

  # Defines GMMStats
  gs = GMMStats(2,3)
  log_likelihood = -3.
  T = 1
  n = numpy.array([0.4, 0.6], 'float64')
  sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64')
  sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64')
  gs.log_likelihood = log_likelihood
  gs.t = T
  gs.n = n
  gs.sum_px = sumpx
  gs.sum_pxx = sumpxx

  # Forward GMMStats and check estimated value of the x speaker factor
  eps = 1e-10
  x_ref = numpy.array([0.291042849767692, 0.310273618998444], 'float64')
  score_ref = -3.280498193082100

  score = m(gs)
  assert numpy.allclose(m.x, x_ref, eps)
  assert abs(score_ref-score) < eps

  # Check using alternate forward() method
  supervector_length = m.supervector_length
  Ux = numpy.ndarray(shape=(supervector_length,), dtype=numpy.float64)
  m.estimate_ux(gs, Ux)
  score = m.forward_ux(gs, Ux)
  assert abs(score_ref-score) < eps

  # x and Ux
  x = numpy.ndarray((2,), numpy.float64)
  m.estimate_x(gs, x)
  n_gaussians,dim,_    = m.shape
  x_py = estimate_x(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx)
  assert numpy.allclose(x, x_py, eps)

  ux = numpy.ndarray((6,), numpy.float64)
  m.estimate_ux(gs, ux)
  n_gaussians,dim,_    = m.shape
  ux_py = estimate_ux(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx)
  assert numpy.allclose(ux, ux_py, eps)
  assert numpy.allclose(m.x, x, eps)

  score = m.forward_ux(gs, ux)
  assert abs(score_ref-score) < eps

  # Clean-up
  os.unlink(filename)
Exemplo n.º 19
0
    0.9891,
    0.5341,
    0.0669,
    0.8854,
    0.9394,
    0.8990,
    0.0182,
    0.6259,
]).reshape((6, 2))
F = [F1, F2]

N1 = np.array([0.1379, 0.1821, 0.2178, 0.0418]).reshape((2, 2))
N2 = np.array([0.1069, 0.9397, 0.6164, 0.3545]).reshape((2, 2))
N = [N1, N2]

gs11 = GMMStats(2, 3)
gs11.n = N1[:, 0]
gs11.sum_px = F1[:, 0].reshape(2, 3)
gs12 = GMMStats(2, 3)
gs12.n = N1[:, 1]
gs12.sum_px = F1[:, 1].reshape(2, 3)

gs21 = GMMStats(2, 3)
gs21.n = N2[:, 0]
gs21.sum_px = F2[:, 0].reshape(2, 3)
gs22 = GMMStats(2, 3)
gs22.n = N2[:, 1]
gs22.sum_px = F2[:, 1].reshape(2, 3)

TRAINING_STATS_X = [gs11, gs12, gs21, gs22]
TRAINING_STATS_y = [0, 0, 1, 1]
Exemplo n.º 20
0
def test_JFATrainAndEnrol():
  # Train and enroll a JFAMachine

  # Calls the train function
  ubm = GMMMachine(2,3)
  ubm.mean_supervector = UBM_MEAN
  ubm.variance_supervector = UBM_VAR
  mb = JFABase(ubm, 2, 2)
  t = JFATrainer()
  t.initialize(mb, TRAINING_STATS)
  mb.u = M_u
  mb.v = M_v
  mb.d = M_d
  bob.learn.em.train_jfa(t,mb, TRAINING_STATS, initialize=False)

  v_ref = numpy.array([[0.245364911936476, 0.978133261775424], [0.769646805052223, 0.940070736856596], [0.310779202800089, 1.456332053893072],
        [0.184760934399551, 2.265139705602147], [0.701987784039800, 0.081632150899400], [0.074344030229297, 1.090248340917255]], 'float64')
  u_ref = numpy.array([[0.049424652628448, 0.060480486336896], [0.178104127464007, 1.884873813495153], [1.204011484266777, 2.281351307871720],
        [7.278512126426286, -0.390966087173334], [-0.084424326581145, -0.081725474934414], [4.042143689831097, -0.262576386580701]], 'float64')
  d_ref = numpy.array([9.648467e-18, 2.63720683155e-12, 2.11822157653706e-10, 9.1047243e-17, 1.41163442535567e-10, 3.30581e-19], 'float64')

  eps = 1e-10
  assert numpy.allclose(mb.v, v_ref, eps)
  assert numpy.allclose(mb.u, u_ref, eps)
  assert numpy.allclose(mb.d, d_ref, eps)

  # Calls the enroll function
  m = JFAMachine(mb)

  Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2,2))
  Fe = numpy.array([0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236]).reshape((6,2))
  gse1 = GMMStats(2,3)
  gse1.n = Ne[:,0]
  gse1.sum_px = Fe[:,0].reshape(2,3)
  gse2 = GMMStats(2,3)
  gse2.n = Ne[:,1]
  gse2.sum_px = Fe[:,1].reshape(2,3)

  gse = [gse1, gse2]
  t.enroll(m, gse, 5)

  y_ref = numpy.array([0.555991469319657, 0.002773650670010], 'float64')
  z_ref = numpy.array([8.2228e-20, 3.15216909492e-13, -1.48616735364395e-10, 1.0625905e-17, 3.7150503117895e-11, 1.71104e-19], 'float64')
  assert numpy.allclose(m.y, y_ref, eps)
  assert numpy.allclose(m.z, z_ref, eps)
  
  #Testing exceptions
  nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1,2,2]])
  nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [[1,2,2]])
  nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [[1,2,2]])
  
  nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [[1,2,2]])  
  nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [[1,2,2]])  
    
  nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [[1,2,2]])
  nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [1,2,2])  
  nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [[1,2,2]])
  
  nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1,2,2]],5)
Exemplo n.º 21
0
def test_JFAMachine():

  # Creates a UBM
  weights   = numpy.array([0.4, 0.6], 'float64')
  means     = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64')
  variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64')
  ubm           = GMMMachine(2,3)
  ubm.weights   = weights
  ubm.means     = means
  ubm.variances = variances

  # Creates a JFABase
  U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64')
  V = numpy.array([[6, 5], [4, 3], [2, 1], [1, 2], [3, 4], [5, 6]], 'float64')
  d = numpy.array([0, 1, 0, 1, 0, 1], 'float64')
  base = JFABase(ubm,2,2)
  base.u = U
  base.v = V
  base.d = d

  # Creates a JFAMachine
  y = numpy.array([1,2], 'float64')
  z = numpy.array([3,4,1,2,0,1], 'float64')
  m = JFAMachine(base)
  m.y = y
  m.z = z
  n_gaussians,dim,ru,rv = m.shape
  supervector_length    = m.supervector_length

  assert n_gaussians        == 2
  assert dim                == 3
  assert supervector_length == 6
  assert ru                 == 2
  assert rv                 == 2
  assert (m.y == y).all()
  assert (m.z == z).all()

  # Saves and loads
  filename = str(tempfile.mkstemp(".hdf5")[1])
  m.save(bob.io.base.HDF5File(filename, 'w'))
  m_loaded = JFAMachine(bob.io.base.HDF5File(filename))
  m_loaded.jfa_base = base
  assert m == m_loaded
  assert (m != m_loaded) is False
  assert m.is_similar_to(m_loaded)

  # Copy constructor
  mc = JFAMachine(m)
  assert m == mc

  # Variant
  #mv = JFAMachine()
  # Checks for correctness
  #mv.jfa_base = base
  #m.y = y
  #m.z = z
  #assert m.dim_c == 2
  #assert m.dim_d == 3
  #assert m.dim_cd == 6
  #assert m.dim_ru == 2
  #assert m.dim_rv == 2
  #assert (m.y == y).all()
  #assert (m.z == z).all()

  # Defines GMMStats
  gs = GMMStats(2,3)
  log_likelihood = -3.
  T = 1
  n = numpy.array([0.4, 0.6], 'float64')
  sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64')
  sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64')
  gs.log_likelihood = log_likelihood
  gs.t = T
  gs.n = n
  gs.sum_px = sumpx
  gs.sum_pxx = sumpxx

  # Forward GMMStats and check estimated value of the x speaker factor
  eps = 1e-10
  x_ref = numpy.array([0.291042849767692, 0.310273618998444], 'float64')
  score_ref = -2.111577181208289
  score = m.log_likelihood(gs)
  assert numpy.allclose(m.x, x_ref, eps)
  assert abs(score_ref-score) < eps

  # x and Ux
  x = numpy.ndarray((2,), numpy.float64)
  m.estimate_x(gs, x)
  n_gaussians, dim,_,_ = m.shape
  x_py = estimate_x(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx)
  assert numpy.allclose(x, x_py, eps)

  ux = numpy.ndarray((6,), numpy.float64)
  m.estimate_ux(gs, ux)
  n_gaussians, dim,_,_ = m.shape
  ux_py = estimate_ux(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx)
  assert numpy.allclose(ux, ux_py, eps)
  assert numpy.allclose(m.x, x, eps)

  score = m.forward_ux(gs, ux)

  assert abs(score_ref-score) < eps

  # Clean-up
  os.unlink(filename)
Exemplo n.º 22
0
def test_trainer_nosigma():
  # Ubm
  ubm = GMMMachine(2,3)
  ubm.weights = numpy.array([0.4,0.6])
  ubm.means = numpy.array([[1.,7,4],[4,5,3]])
  ubm.variances = numpy.array([[0.5,1.,1.5],[1.,1.5,2.]])

  # Defines GMMStats
  gs1 = GMMStats(2,3)
  log_likelihood1 = -3.
  T1 = 1
  n1 = numpy.array([0.4, 0.6], numpy.float64)
  sumpx1 = numpy.array([[1., 2., 3.], [2., 4., 3.]], numpy.float64)
  sumpxx1 = numpy.array([[10., 20., 30.], [40., 50., 60.]], numpy.float64)
  gs1.log_likelihood = log_likelihood1
  gs1.t = T1
  gs1.n = n1
  gs1.sum_px = sumpx1
  gs1.sum_pxx = sumpxx1

  gs2 = GMMStats(2,3)
  log_likelihood2 = -4.
  T2 = 1
  n2 = numpy.array([0.2, 0.8], numpy.float64)
  sumpx2 = numpy.array([[2., 1., 3.], [3., 4.1, 3.2]], numpy.float64)
  sumpxx2 = numpy.array([[12., 15., 25.], [39., 51., 62.]], numpy.float64)
  gs2.log_likelihood = log_likelihood2
  gs2.t = T2
  gs2.n = n2
  gs2.sum_px = sumpx2
  gs2.sum_pxx = sumpxx2

  data = [gs1, gs2]


  acc_Nij_Sigma_wij2_ref1  = {0: numpy.array([[ 0.03202305, -0.02947769], [-0.02947769,  0.0561132 ]]),
                             1: numpy.array([[ 0.07953279, -0.07829414], [-0.07829414,  0.13814242]])}
  acc_Fnorm_Sigma_wij_ref1 = {0: numpy.array([[-0.29622691,  0.61411796], [ 0.09391764, -0.27955961], [-0.39014455,  0.89367757]]),
                             1: numpy.array([[ 0.04695882, -0.13977981], [-0.05718673,  0.24159665], [-0.17098161,  0.47326585]])}
  acc_Snorm_ref1           = numpy.array([16.6, 22.4, 16.6, 61.4, 55., 97.4])
  N_ref1                   = numpy.array([0.6, 1.4])
  t_ref1                   = numpy.array([[  1.59543739, 11.78239235], [ -3.20130371, -6.66379081], [  4.79674111, 18.44618316],
                                          [ -0.91765407, -1.5319461 ], [  2.26805901,  3.03434944], [  2.76600031,  4.9935962 ]])

  acc_Nij_Sigma_wij2_ref2  = {0: numpy.array([[ 0.37558389, -0.15405228], [-0.15405228,  0.1421269 ]]),
                             1: numpy.array([[ 1.02076081, -0.57683953], [-0.57683953,  0.53912239]])}
  acc_Fnorm_Sigma_wij_ref2 = {0: numpy.array([[-1.1261668 ,  1.46496753], [-0.03579289, -0.37875811], [-1.09037391,  1.84372565]]),
                             1: numpy.array([[-0.01789645, -0.18937906], [ 0.35221084,  0.15854126], [-0.10004552,  0.72559036]])}
  acc_Snorm_ref2           = numpy.array([16.6, 22.4, 16.6, 61.4, 55., 97.4])
  N_ref2                   = numpy.array([0.6, 1.4])
  t_ref2                   = numpy.array([[  2.2133685,  12.70654597], [ -2.13959381, -4.98404887], [  4.35296231, 17.69059484],
                                          [ -0.54644055, -0.93594252], [  1.29308324,  1.67762053], [  1.67583072,  3.13894546]])
  acc_Nij_Sigma_wij2_ref = [acc_Nij_Sigma_wij2_ref1, acc_Nij_Sigma_wij2_ref2]
  acc_Fnorm_Sigma_wij_ref = [acc_Fnorm_Sigma_wij_ref1, acc_Fnorm_Sigma_wij_ref2]
  acc_Snorm_ref = [acc_Snorm_ref1, acc_Snorm_ref2]
  N_ref = [N_ref1, N_ref2]
  t_ref = [t_ref1, t_ref2]

  # Python implementation
  # Machine
  m = IVectorMachine(ubm, 2)
  t = numpy.array([[1.,2],[4,1],[0,3],[5,8],[7,10],[11,1]])
  sigma = numpy.array([1.,2.,1.,3.,2.,4.])

  # Initialization
  trainer = IVectorTrainerPy()
  trainer.initialize(m, data)
  m.t = t
  m.sigma = sigma
  for it in range(2):
    # E-Step
    trainer.e_step(m, data)
    for k in acc_Nij_Sigma_wij2_ref[it]:
      assert numpy.allclose(acc_Nij_Sigma_wij2_ref[it][k], trainer.m_acc_Nij_Sigma_wij2[k], 1e-5)
    for k in acc_Fnorm_Sigma_wij_ref[it]:
      assert numpy.allclose(acc_Fnorm_Sigma_wij_ref[it][k], trainer.m_acc_Fnorm_Sigma_wij[k], 1e-5)
    assert numpy.allclose(acc_Snorm_ref[it], trainer.m_acc_Snorm, 1e-5)
    assert numpy.allclose(N_ref[it], trainer.m_N, 1e-5)

    # M-Step
    trainer.m_step(m, data)
    assert numpy.allclose(t_ref[it], m.t, 1e-5)

  # C++ implementation
  # Machine
  m = IVectorMachine(ubm, 2)

  # Initialization
  trainer = IVectorTrainer()
  trainer.initialize(m)
  m.t = t
  m.sigma = sigma
  for it in range(2):
    # E-Step
    trainer.e_step(m, data)
    for k in acc_Nij_Sigma_wij2_ref[it]:
      assert numpy.allclose(acc_Nij_Sigma_wij2_ref[it][k], trainer.acc_nij_wij2[k], 1e-5)
    for k in acc_Fnorm_Sigma_wij_ref[it]:
      assert numpy.allclose(acc_Fnorm_Sigma_wij_ref[it][k], trainer.acc_fnormij_wij[k], 1e-5)

    # M-Step
    trainer.m_step(m)
    assert numpy.allclose(t_ref[it], m.t, 1e-5)


  #testing exceptions
  nose.tools.assert_raises(RuntimeError, trainer.e_step, m, [1,2,2])
Exemplo n.º 23
0
def test_trainer_update_sigma():
  # Ubm
  dim_c = 2
  dim_d = 3
  ubm = GMMMachine(dim_c,dim_d)
  ubm.weights = numpy.array([0.4,0.6])
  ubm.means = numpy.array([[1.,7,4],[4,5,3]])
  ubm.variances = numpy.array([[0.5,1.,1.5],[1.,1.5,2.]])

  # Defines GMMStats
  gs1 = GMMStats(dim_c,dim_d)
  log_likelihood1 = -3.
  T1 = 1
  n1 = numpy.array([0.4, 0.6], numpy.float64)
  sumpx1 = numpy.array([[1., 2., 3.], [2., 4., 3.]], numpy.float64)
  sumpxx1 = numpy.array([[10., 20., 30.], [40., 50., 60.]], numpy.float64)
  gs1.log_likelihood = log_likelihood1
  gs1.t = T1
  gs1.n = n1
  gs1.sum_px = sumpx1
  gs1.sum_pxx = sumpxx1

  gs2 = GMMStats(dim_c,dim_d)
  log_likelihood2 = -4.
  T2 = 1
  n2 = numpy.array([0.2, 0.8], numpy.float64)
  sumpx2 = numpy.array([[2., 1., 3.], [3., 4.1, 3.2]], numpy.float64)
  sumpxx2 = numpy.array([[12., 15., 25.], [39., 51., 62.]], numpy.float64)
  gs2.log_likelihood = log_likelihood2
  gs2.t = T2
  gs2.n = n2
  gs2.sum_px = sumpx2
  gs2.sum_pxx = sumpxx2

  data = [gs1, gs2]

  # Reference values
  acc_Nij_Sigma_wij2_ref1  = {0: numpy.array([[ 0.03202305, -0.02947769], [-0.02947769,  0.0561132 ]]),
                              1: numpy.array([[ 0.07953279, -0.07829414], [-0.07829414,  0.13814242]])}
  acc_Fnorm_Sigma_wij_ref1 = {0: numpy.array([[-0.29622691,  0.61411796], [ 0.09391764, -0.27955961], [-0.39014455,  0.89367757]]),
                              1: numpy.array([[ 0.04695882, -0.13977981], [-0.05718673,  0.24159665], [-0.17098161,  0.47326585]])}
  acc_Snorm_ref1           = numpy.array([16.6, 22.4, 16.6, 61.4, 55., 97.4])
  N_ref1                   = numpy.array([0.6, 1.4])
  t_ref1                   = numpy.array([[  1.59543739, 11.78239235], [ -3.20130371, -6.66379081], [  4.79674111, 18.44618316],
                                          [ -0.91765407, -1.5319461 ], [  2.26805901,  3.03434944], [  2.76600031,  4.9935962 ]])
  sigma_ref1               = numpy.array([ 16.39472121, 34.72955353,  3.3108037, 43.73496916, 38.85472445, 68.22116903])

  acc_Nij_Sigma_wij2_ref2  = {0: numpy.array([[ 0.50807426, -0.11907756], [-0.11907756,  0.12336544]]),
                              1: numpy.array([[ 1.18602399, -0.2835859 ], [-0.2835859 ,  0.39440498]])}
  acc_Fnorm_Sigma_wij_ref2 = {0: numpy.array([[ 0.07221453,  1.1189786 ], [-0.08681275, -0.35396112], [ 0.15902728,  1.47293972]]),
                              1: numpy.array([[-0.04340637, -0.17698056], [ 0.10662127,  0.21484933],[ 0.13116645,  0.64474271]])}
  acc_Snorm_ref2           = numpy.array([16.6, 22.4, 16.6, 61.4, 55., 97.4])
  N_ref2                   = numpy.array([0.6, 1.4])
  t_ref2                   = numpy.array([[  2.93105054, 11.89961223], [ -1.08988119, -3.92120757], [  4.02093173, 15.82081981],
                                          [ -0.17376634, -0.57366984], [  0.26585634,  0.73589952], [  0.60557877,   2.07014704]])
  sigma_ref2               = numpy.array([5.12154025e+00, 3.48623823e+01, 1.00000000e-05, 4.37792350e+01, 3.91525332e+01, 6.85613258e+01])

  acc_Nij_Sigma_wij2_ref = [acc_Nij_Sigma_wij2_ref1, acc_Nij_Sigma_wij2_ref2]
  acc_Fnorm_Sigma_wij_ref = [acc_Fnorm_Sigma_wij_ref1, acc_Fnorm_Sigma_wij_ref2]
  acc_Snorm_ref = [acc_Snorm_ref1, acc_Snorm_ref2]
  N_ref = [N_ref1, N_ref2]
  t_ref = [t_ref1, t_ref2]
  sigma_ref = [sigma_ref1, sigma_ref2]


  # Python implementation
  # Machine
  m = IVectorMachine(ubm, 2)
  t = numpy.array([[1.,2],[4,1],[0,3],[5,8],[7,10],[11,1]])
  sigma = numpy.array([1.,2.,1.,3.,2.,4.])

  # Initialization
  trainer = IVectorTrainerPy(sigma_update=True)
  trainer.initialize(m, data)
  m.t = t
  m.sigma = sigma
  for it in range(2):
    # E-Step
    trainer.e_step(m, data)
    for k in acc_Nij_Sigma_wij2_ref[it]:
      assert numpy.allclose(acc_Nij_Sigma_wij2_ref[it][k], trainer.m_acc_Nij_Sigma_wij2[k], 1e-5)
    for k in acc_Fnorm_Sigma_wij_ref[it]:
      assert numpy.allclose(acc_Fnorm_Sigma_wij_ref[it][k], trainer.m_acc_Fnorm_Sigma_wij[k], 1e-5)
    assert numpy.allclose(acc_Snorm_ref[it], trainer.m_acc_Snorm, 1e-5)
    assert numpy.allclose(N_ref[it], trainer.m_N, 1e-5)

    # M-Step
    trainer.m_step(m, data)
    assert numpy.allclose(t_ref[it], m.t, 1e-5)
    assert numpy.allclose(sigma_ref[it], m.sigma, 1e-5)


  # C++ implementation
  # Machine
  m = IVectorMachine(ubm, 2)
  m.variance_threshold = 1e-5

  # Initialization
  trainer = IVectorTrainer(update_sigma=True)
  trainer.initialize(m)
  m.t = t
  m.sigma = sigma
  for it in range(2):
    # E-Step
    trainer.e_step(m, data)
    for k in acc_Nij_Sigma_wij2_ref[it]:
      assert numpy.allclose(acc_Nij_Sigma_wij2_ref[it][k], trainer.acc_nij_wij2[k], 1e-5)
    for k in acc_Fnorm_Sigma_wij_ref[it]:
      assert numpy.allclose(acc_Fnorm_Sigma_wij_ref[it][k], trainer.acc_fnormij_wij[k], 1e-5)
    assert numpy.allclose(acc_Snorm_ref[it].reshape(dim_c,dim_d), trainer.acc_snormij, 1e-5)
    assert numpy.allclose(N_ref[it], trainer.acc_nij, 1e-5)

    # M-Step
    trainer.m_step(m)
    assert numpy.allclose(t_ref[it], m.t, 1e-5)
    assert numpy.allclose(sigma_ref[it], m.sigma, 1e-5)
Exemplo n.º 24
0
def test_GMMStats():
    # Test a GMMStats
    # Initializes a GMMStats
    gs = GMMStats(2, 3)
    log_likelihood = -3.
    T = 57
    n = numpy.array([4.37, 5.31], 'float64')
    sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64')
    sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64')
    gs.log_likelihood = log_likelihood
    gs.t = T
    gs.n = n
    gs.sum_px = sumpx
    gs.sum_pxx = sumpxx
    assert gs.log_likelihood == log_likelihood
    assert gs.t == T
    assert (gs.n == n).all()
    assert (gs.sum_px == sumpx).all()
    assert (gs.sum_pxx == sumpxx).all()
    assert gs.shape == (2, 3)

    # Saves and reads from file
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Saves and reads from file using the keyword argument
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats(bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Saves and load from file using the keyword argument
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats()
    gs_loaded.load(bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Saves and load from file using the keyword argument
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=bob.io.base.HDF5File(filename, 'w'))
    gs_loaded = GMMStats()
    gs_loaded.load(hdf5=bob.io.base.HDF5File(filename))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Makes them different
    gs_loaded.t = 58
    assert (gs == gs_loaded) is False
    assert gs != gs_loaded
    assert (gs.is_similar_to(gs_loaded)) is False
    # Accumulates from another GMMStats
    gs2 = GMMStats(2, 3)
    gs2.log_likelihood = log_likelihood
    gs2.t = T
    gs2.n = n
    gs2.sum_px = sumpx
    gs2.sum_pxx = sumpxx
    gs2 += gs
    eps = 1e-8
    assert gs2.log_likelihood == 2 * log_likelihood
    assert gs2.t == 2 * T
    assert numpy.allclose(gs2.n, 2 * n, eps)
    assert numpy.allclose(gs2.sum_px, 2 * sumpx, eps)
    assert numpy.allclose(gs2.sum_pxx, 2 * sumpxx, eps)

    # Reinit and checks for zeros
    gs_loaded.init()
    assert gs_loaded.log_likelihood == 0
    assert gs_loaded.t == 0
    assert (gs_loaded.n == 0).all()
    assert (gs_loaded.sum_px == 0).all()
    assert (gs_loaded.sum_pxx == 0).all()
    # Resize and checks size
    assert gs_loaded.shape == (2, 3)
    gs_loaded.resize(4, 5)
    assert gs_loaded.shape == (4, 5)
    assert gs_loaded.sum_px.shape[0] == 4
    assert gs_loaded.sum_px.shape[1] == 5

    # Clean-up
    os.unlink(filename)
Exemplo n.º 25
0
def test_LinearScoring():

  ubm = GMMMachine(2, 2)
  ubm.weights   = numpy.array([0.5, 0.5], 'float64')
  ubm.means     = numpy.array([[3, 70], [4, 72]], 'float64')
  ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
  ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  model1 = GMMMachine(2, 2)
  model1.weights   = numpy.array([0.5, 0.5], 'float64')
  model1.means     = numpy.array([[1, 2], [3, 4]], 'float64')
  model1.variances = numpy.array([[9, 10], [11, 12]], 'float64')
  model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  model2 = GMMMachine(2, 2)
  model2.weights   = numpy.array([0.5, 0.5], 'float64')
  model2.means     = numpy.array([[5, 6], [7, 8]], 'float64')
  model2.variances = numpy.array([[13, 14], [15, 16]], 'float64')
  model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  stats1 = GMMStats(2, 2)
  stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64')
  stats1.n = numpy.array([1, 2], 'float64')
  stats1.t = 1+2

  stats2 = GMMStats(2, 2)
  stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64')
  stats2.n = numpy.array([3, 4], 'float64')
  stats2.t = 3+4

  stats3 = GMMStats(2, 2)
  stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64')
  stats3.n = numpy.array([3, 4], 'float64')
  stats3.t = 3+4

  test_channeloffset = [numpy.array([9, 8, 7, 6], 'float64'), numpy.array([5, 4, 3, 2], 'float64'), numpy.array([1, 0, 1, 2], 'float64')]

  # Reference scores (from Idiap internal matlab implementation)
  ref_scores_00 = numpy.array([[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64')
  ref_scores_01 = numpy.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64')
  ref_scores_10 = numpy.array([[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64')
  ref_scores_11 = numpy.array([[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64')


  # 1/ Use GMMMachines
  # 1/a/ Without test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3])
  assert (abs(scores - ref_scores_00) < 1e-7).all()

  # 1/b/ Without test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True)
  assert (abs(scores - ref_scores_01) < 1e-7).all()
  #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True)
  #assert (abs(scores - ref_scores_01) < 1e-7).all()
  #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True)
  #assert (abs(scores - ref_scores_01) < 1e-7).all()

  # 1/c/ With test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset)
  assert (abs(scores - ref_scores_10) < 1e-7).all()

  # 1/d/ With test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True)
  assert (abs(scores - ref_scores_11) < 1e-7).all()


  # 2/ Use mean/variance supervectors
  # 2/a/ Without test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3])
  assert (abs(scores - ref_scores_00) < 1e-7).all()

  # 2/b/ Without test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], [], True)
  assert (abs(scores - ref_scores_01) < 1e-7).all()

  # 2/c/ With test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset)
  assert (abs(scores - ref_scores_10) < 1e-7).all()

  # 2/d/ With test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset, True)
  assert (abs(scores - ref_scores_11) < 1e-7).all()


  # 3/ Using single model/sample
  # 3/a/ without frame-length normalisation
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0])
  assert abs(score - ref_scores_10[0,0]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1])
  assert abs(score - ref_scores_10[0,1]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2])
  assert abs(score - ref_scores_10[0,2]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0])
  assert abs(score - ref_scores_10[1,0]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1])
  assert abs(score - ref_scores_10[1,1]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2])
  assert abs(score - ref_scores_10[1,2]) < 1e-7


  # 3/b/ without frame-length normalisation
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True)
  assert abs(score - ref_scores_11[0,0]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True)
  assert abs(score - ref_scores_11[0,1]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True)
  assert abs(score - ref_scores_11[0,2]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True)
  assert abs(score - ref_scores_11[1,0]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True)
  assert abs(score - ref_scores_11[1,1]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True)
  assert abs(score - ref_scores_11[1,2]) < 1e-7
Exemplo n.º 26
0
def test_GMMStats():
    # Test a GMMStats
    # Initializes a GMMStats
    n_gaussians = 2
    n_features = 3
    gs = GMMStats(n_gaussians, n_features)
    log_likelihood = -3.0
    T = 57
    n = np.array([4.37, 5.31], "float64")
    sumpx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64")
    sumpxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64")
    gs.log_likelihood = log_likelihood
    gs.t = T
    gs.n = n
    gs.sum_px = sumpx
    gs.sum_pxx = sumpxx
    np.testing.assert_equal(gs.log_likelihood, log_likelihood)
    np.testing.assert_equal(gs.t, T)
    np.testing.assert_equal(gs.n, n)
    np.testing.assert_equal(gs.sum_px, sumpx)
    np.testing.assert_equal(gs.sum_pxx, sumpxx)
    np.testing.assert_equal(gs.shape, (n_gaussians, n_features))

    # Saves and reads from file using `from_hdf5`
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(HDF5File(filename, "w"))
    gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r"))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    assert type(gs_loaded.n_gaussians) is np.int64
    assert type(gs_loaded.n_features) is np.int64
    assert type(gs_loaded.log_likelihood) is np.float64

    # Saves and load from file using `load`
    filename = str(tempfile.mkstemp(".hdf5")[1])
    gs.save(hdf5=HDF5File(filename, "w"))
    gs_loaded = GMMStats(n_gaussians, n_features)
    gs_loaded.load(HDF5File(filename, "r"))
    assert gs == gs_loaded
    assert (gs != gs_loaded) is False
    assert gs.is_similar_to(gs_loaded)

    # Makes them different
    gs_loaded.t = 58
    assert (gs == gs_loaded) is False
    assert gs != gs_loaded
    assert not (gs.is_similar_to(gs_loaded))

    # Accumulates from another GMMStats
    gs2 = GMMStats(n_gaussians, n_features)
    gs2.log_likelihood = log_likelihood
    gs2.t = T
    gs2.n = n.copy()
    gs2.sum_px = sumpx.copy()
    gs2.sum_pxx = sumpxx.copy()
    gs2 += gs
    np.testing.assert_equal(gs2.log_likelihood, 2 * log_likelihood)
    np.testing.assert_equal(gs2.t, 2 * T)
    np.testing.assert_almost_equal(gs2.n, 2 * n, decimal=8)
    np.testing.assert_almost_equal(gs2.sum_px, 2 * sumpx, decimal=8)
    np.testing.assert_almost_equal(gs2.sum_pxx, 2 * sumpxx, decimal=8)

    # Re-init and checks for zeros
    gs_loaded.init_fields()
    np.testing.assert_equal(gs_loaded.log_likelihood, 0)
    np.testing.assert_equal(gs_loaded.t, 0)
    np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians, )))
    np.testing.assert_equal(gs_loaded.sum_px,
                            np.zeros((n_gaussians, n_features)))
    np.testing.assert_equal(gs_loaded.sum_pxx,
                            np.zeros((n_gaussians, n_features)))
    # Resize and checks size
    assert gs_loaded.shape == (n_gaussians, n_features)
    gs_loaded.resize(4, 5)
    assert gs_loaded.shape == (4, 5)
    assert gs_loaded.sum_px.shape[0] == 4
    assert gs_loaded.sum_px.shape[1] == 5

    # Clean-up
    os.unlink(filename)
Exemplo n.º 27
0
    def acc(self, task_id):
        mix_file = util.cache_path(self.ubm)
        align_file = util.cache_path(self.alignment[task_id])
        feat_file = util.cache_path(self.features[task_id])
        allo_file = util.cache_path(self.allophones)

        logging.info("Reading mixture file from '%s'..." % mix_file)
        gmm = sc.MixtureSet(mix_file)
        logging.info("Read %d means and %d covariances of dimension %d" %
                     (gmm.nMeans, gmm.nCovs, gmm.dim))

        ubm = convert_gmm(gmm)

        ivm = IVectorMachine(ubm, self.dim)
        ivm.variance_threshold = 1e-5

        gs = GMMStats(gmm.nMeans, gmm.dim)

        logging.info(
            "Opening alignment cache '%s' with allophones from '%s'; ignoring '%s'"
            % (align_file, allo_file, ",".join(self.allophones_to_ignore)))
        aligncache = sc.FileArchive(align_file)
        aligncache.setAllophones(allo_file)

        cache = sc.FileArchive(feat_file)

        for a in cache.ft.keys():
            if a.endswith(".attribs"):
                continue
            logging.info("Reading '%s'..." % a)

            time, data = cache.read(a, "feat")

            align = aligncache.read(a, "align")
            if len(align) < 1:
                logging.warning("No data for segment: '%s' in alignment." % a)
                continue
            allos = []
            for (t, i, s, w) in align:
                allos.append(aligncache.allophones[i])
            allos = list(aligncache.allophones[i] for (t, i, s, w) in align)
            T = len(
                list(
                    filter(lambda al: al not in self.allophones_to_ignore,
                           allos)))

            feat = np.ndarray((T, len(data[0])))
            k = 0

            for t in range(len(data)):
                (_, allo, state, weight) = align[t]
                if aligncache.allophones[
                        allo] not in self.allophones_to_ignore:
                    feat[k, :] = data[t]
                    k += 1

            ivm.ubm.acc_statistics(feat, gs)

        logging.info("Writing Gaussian statistics to '%s'" %
                     self.single_accu_caches[task_id].get_path())
        gs.save(HDF5File(self.single_accu_caches[task_id].get_path(), "w"))
Exemplo n.º 28
0
def test_JFATrainAndEnrol():
    # Train and enroll a JFAMachine

    # Calls the train function
    ubm = GMMMachine(2, 3)
    ubm.mean_supervector = UBM_MEAN
    ubm.variance_supervector = UBM_VAR
    mb = JFABase(ubm, 2, 2)
    t = JFATrainer()
    t.initialize(mb, TRAINING_STATS)
    mb.u = M_u
    mb.v = M_v
    mb.d = M_d
    bob.learn.em.train_jfa(t, mb, TRAINING_STATS, initialize=False)

    v_ref = numpy.array([[0.245364911936476, 0.978133261775424],
                         [0.769646805052223, 0.940070736856596],
                         [0.310779202800089, 1.456332053893072],
                         [0.184760934399551, 2.265139705602147],
                         [0.701987784039800, 0.081632150899400],
                         [0.074344030229297, 1.090248340917255]], 'float64')
    u_ref = numpy.array([[0.049424652628448, 0.060480486336896],
                         [0.178104127464007, 1.884873813495153],
                         [1.204011484266777, 2.281351307871720],
                         [7.278512126426286, -0.390966087173334],
                         [-0.084424326581145, -0.081725474934414],
                         [4.042143689831097, -0.262576386580701]], 'float64')
    d_ref = numpy.array([
        9.648467e-18, 2.63720683155e-12, 2.11822157653706e-10, 9.1047243e-17,
        1.41163442535567e-10, 3.30581e-19
    ], 'float64')

    eps = 1e-10
    assert numpy.allclose(mb.v, v_ref, eps)
    assert numpy.allclose(mb.u, u_ref, eps)
    assert numpy.allclose(mb.d, d_ref, eps)

    # Calls the enroll function
    m = JFAMachine(mb)

    Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2, 2))
    Fe = numpy.array([
        0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345,
        0.2463, 0.4789, 0.5236
    ]).reshape((6, 2))
    gse1 = GMMStats(2, 3)
    gse1.n = Ne[:, 0]
    gse1.sum_px = Fe[:, 0].reshape(2, 3)
    gse2 = GMMStats(2, 3)
    gse2.n = Ne[:, 1]
    gse2.sum_px = Fe[:, 1].reshape(2, 3)

    gse = [gse1, gse2]
    t.enroll(m, gse, 5)

    y_ref = numpy.array([0.555991469319657, 0.002773650670010], 'float64')
    z_ref = numpy.array([
        8.2228e-20, 3.15216909492e-13, -1.48616735364395e-10, 1.0625905e-17,
        3.7150503117895e-11, 1.71104e-19
    ], 'float64')
    assert numpy.allclose(m.y, y_ref, eps)
    assert numpy.allclose(m.z, z_ref, eps)

    #Testing exceptions
    nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1, 2, 2]])
    nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [[1, 2, 2]])
    nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [[1, 2, 2]])

    nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [[1, 2, 2]])
    nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [[1, 2, 2]])

    nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [[1, 2, 2]])
    nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [1, 2, 2])
    nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [[1, 2, 2]])

    nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1, 2, 2]], 5)
Exemplo n.º 29
0
    def forward(self, task_id):
        mixfile = util.cache_path(self.ubm)
        ivmfile = tk.uncached_path(self.t_matrix)
        alignfile = util.cache_path(self.alignment[task_id])
        allofile = tk.uncached_path(self.allophones)
        alloignore = self.allophones_to_ignore
        featfile = util.cache_path(self.features[task_id])
        ivecdim = self.dim
        lengthnorm = bool(self.length_norm)

        gmm = sc.MixtureSet(mixfile)
        ubm = convert_gmm(gmm)

        ivm = IVectorMachine(ubm, ivecdim)
        ivm.load(HDF5File(ivmfile))

        tmp_ivec_file = tempfile.mktemp(suffix=".ivec")

        out = sc.FileArchive(tmp_ivec_file)

        logging.info(
            "Opening alignment cache '%s' with allophones from '%s'; ignoring '%s'"
            % (alignfile, allofile, ",".join(alloignore)))
        aligncache = sc.FileArchive(alignfile)
        aligncache.setAllophones(allofile)

        cache = sc.FileArchive(featfile)
        cur_rec = ""
        tmp_feat = None
        tmp_segs = []

        for a in sorted(cache.ft.keys()):
            if a.endswith(".attribs"):
                continue
            logging.info("Reading '%s'..." % a)
            ncorpus, nrec, nseg = a.split("/")

            try:
                time, data = cache.read(a, "feat")

                align = aligncache.read(a, "align")
                allos = list(aligncache.allophones[i]
                             for (t, i, s, w) in align)
                T = len(list(filter(lambda al: al not in alloignore, allos)))

                feat = np.ndarray((T, len(data[0])))
                k = 0
                for t in range(len(data)):
                    (_, allo, state, weight) = align[t]
                    if aligncache.allophones[allo] not in alloignore:
                        feat[k, :] = data[t]
                        k += 1

            except Exception as e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                logging.error("failed", sys.exc_info(), exc_tb.tb_lineno)
                ivector = np.zeros([1, ivecdim])
                out.addFeatureCache(a, [ivector], [[0.0, 999999.0]])
                continue

            if nrec == cur_rec:
                tmp_feat = np.concatenate((tmp_feat, feat), axis=0)
                tmp_segs.append(a)
                continue
            else:
                if cur_rec != "":
                    gs_test = GMMStats(gmm.nMeans, gmm.dim)
                    ivm.ubm.acc_statistics(tmp_feat, gs_test)
                    ivector = ivm.project(gs_test)
                    ivector = ivector / np.linalg.norm(ivector)
                    ivector = np.expand_dims(ivector, 0)
                    for seg in tmp_segs:
                        out.addFeatureCache(seg, [ivector], [[0.0, 999999.0]])

                tmp_feat = feat
                tmp_segs = [a]
                cur_rec = nrec

        # last rec
        gs_test = GMMStats(gmm.nMeans, gmm.dim)
        ivm.ubm.acc_statistics(tmp_feat, gs_test)
        ivector = ivm.project(gs_test)
        if lengthnorm:
            ivector = ivector / np.linalg.norm(ivector)
        ivector = np.expand_dims(ivector, 0)
        for seg in tmp_segs:
            out.addFeatureCache(seg, [ivector], [[0.0, 999999.0]])

        out.finalize()

        del out  # delete this to close the file handle. This ensures all data is written.

        shutil.move(tmp_ivec_file, self.single_ivec_caches[task_id].get_path())
Exemplo n.º 30
0
def equals(x, y, epsilon):
  return (abs(x - y) < epsilon).all()

# Define Training set and initial values for tests
F1 = numpy.array( [0.3833, 0.4516, 0.6173, 0.2277, 0.5755, 0.8044, 0.5301,
  0.9861, 0.2751, 0.0300, 0.2486, 0.5357]).reshape((6,2))
F2 = numpy.array( [0.0871, 0.6838, 0.8021, 0.7837, 0.9891, 0.5341, 0.0669,
  0.8854, 0.9394, 0.8990, 0.0182, 0.6259]).reshape((6,2))
F=[F1, F2]

N1 = numpy.array([0.1379, 0.1821, 0.2178, 0.0418]).reshape((2,2))
N2 = numpy.array([0.1069, 0.9397, 0.6164, 0.3545]).reshape((2,2))
N=[N1, N2]

gs11 = GMMStats(2,3)
gs11.n = N1[:,0]
gs11.sum_px = F1[:,0].reshape(2,3)
gs12 = GMMStats(2,3)
gs12.n = N1[:,1]
gs12.sum_px = F1[:,1].reshape(2,3)

gs21 = GMMStats(2,3)
gs21.n = N2[:,0]
gs21.sum_px = F2[:,0].reshape(2,3)
gs22 = GMMStats(2,3)
gs22.n = N2[:,1]
gs22.sum_px = F2[:,1].reshape(2,3)

TRAINING_STATS = [[gs11, gs12], [gs21, gs22]]
UBM_MEAN = numpy.array([0.1806, 0.0451, 0.7232, 0.3474, 0.6606, 0.3839])