示例#1
0
def test_gmm_MAP_3():
    # Train a GMMMachine with MAP_GMMTrainer; compares to old reference
    ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5"))

    # Initialize GMMMachine
    n_gaussians = 5
    prior_gmm = GMMMachine(n_gaussians)
    prior_gmm.means = load_array(
        resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
    prior_gmm.variances = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
    prior_gmm.weights = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))

    threshold = 0.001
    prior_gmm.variance_thresholds = threshold

    # Initialize MAP Trainer
    prior = 0.001
    accuracy = 0.00001
    gmm = GMMMachine(
        n_gaussians,
        trainer="map",
        ubm=prior_gmm,
        convergence_threshold=prior,
        max_fitting_steps=1,
        update_means=True,
        update_variances=False,
        update_weights=False,
        mean_var_update_threshold=accuracy,
        map_relevance_factor=None,
    )
    gmm.variance_thresholds = threshold

    # Test results
    # Load torch3vision reference
    meansMAP_ref = load_array(
        resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5"))
    variancesMAP_ref = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5"))
    weightsMAP_ref = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5"))

    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)
        # Train
        gmm = gmm.fit(ar)

        # Compare to current results
        # Gaps are quite large. This might be explained by the fact that there is no
        # adaptation of a given Gaussian in torch3 when the corresponding responsibilities
        # are below the responsibilities threshold
        np.testing.assert_allclose(gmm.means, meansMAP_ref, atol=2e-1)
        np.testing.assert_allclose(gmm.variances, variancesMAP_ref, atol=1e-4)
        np.testing.assert_allclose(gmm.weights, weightsMAP_ref, atol=1e-4)
示例#2
0
def test_gmm_test():
    # Tests a GMMMachine by computing scores against a model and comparing to a reference

    ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5"))

    # Initialize GMMMachine
    n_gaussians = 5
    gmm = GMMMachine(n_gaussians)
    gmm.means = load_array(
        resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
    gmm.variances = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
    gmm.weights = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))

    threshold = 0.001
    gmm.variance_thresholds = threshold

    # Test against the model
    score_mean_ref = -1.50379e06
    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)
        score = gmm.log_likelihood(ar).sum()
        score /= len(ar)

        # Compare current results to torch3vision
        assert abs(score - score_mean_ref) / score_mean_ref < 1e-4
示例#3
0
def test_GMMMachine_stats():
    """Tests a GMMMachine (statistics)"""

    arrayset = load_array(
        resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
    gmm = GMMMachine(n_gaussians=2)
    gmm.weights = np.array([0.5, 0.5], "float64")
    gmm.means = np.array([[3, 70], [4, 72]], "float64")
    gmm.variances = np.array([[1, 10], [2, 5]], "float64")
    gmm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    stats = gmm_module.e_step(
        arrayset,
        gmm,
    )

    stats_ref = GMMStats(n_gaussians=2, n_features=2)
    stats_ref.load(
        HDF5File(resource_filename("bob.learn.em", "data/stats.hdf5"), "r"))

    np.testing.assert_equal(stats.t, stats_ref.t)
    np.testing.assert_almost_equal(stats.n, stats_ref.n, decimal=10)
    # np.testing.assert_equal(stats.sum_px, stats_ref.sum_px)
    # Note AA: precision error above
    np.testing.assert_almost_equal(stats.sum_px, stats_ref.sum_px, decimal=10)
    np.testing.assert_almost_equal(stats.sum_pxx,
                                   stats_ref.sum_pxx,
                                   decimal=10)
示例#4
0
    def _voice_activity_detection(self,
                                  energy_array: np.ndarray) -> np.ndarray:
        """Fits a 2 Gaussian GMM on the energy that splits between voice and silence."""
        n_samples = len(energy_array)
        # if energy does not change a lot, it may not be audio?
        if np.std(energy_array) < 10e-5:
            return np.zeros(shape=n_samples)

        # Add an epsilon small Gaussian noise to avoid numerical issues (mainly due to artificial silence).
        energy_array = (1e-6 * np.random.randn(n_samples)) + energy_array

        # Normalize the energy array, make it an array of 1D samples
        normalized_energy = utils.normalize_std_array(energy_array).reshape(
            (-1, 1))

        # Note: self.max_iterations and self.convergence_threshold are used for both
        # k-means and GMM training.
        kmeans_trainer = KMeansMachine(
            n_clusters=2,
            convergence_threshold=self.convergence_threshold,
            max_iter=self.max_iterations,
            init_max_iter=self.max_iterations,
        )
        ubm_gmm = GMMMachine(
            n_gaussians=2,
            trainer="ml",
            update_means=True,
            update_variances=True,
            update_weights=True,
            convergence_threshold=self.convergence_threshold,
            max_fitting_steps=self.max_iterations,
            k_means_trainer=kmeans_trainer,
        )
        ubm_gmm.variance_thresholds = self.variance_threshold

        ubm_gmm.fit(normalized_energy)

        if np.isnan(ubm_gmm.means).any():
            logger.warn("Annotation aborted: File contains NaN's")
            return np.zeros(shape=n_samples, dtype=int)

        # Classify

        # Different behavior dep on which mean represents high energy (higher value)
        labels = ubm_gmm.log_weighted_likelihood(normalized_energy)
        if ubm_gmm.means.argmax() == 0:  # High energy in means[0]
            labels = labels.argmin(axis=0)
        else:  # High energy in means[1]
            labels = labels.argmax(axis=0)

        return labels
示例#5
0
def test_gmm_ML_2():
    # Trains a GMMMachine with ML_GMMTrainer; compares to a reference
    ar = load_array(
        resource_filename("bob.learn.em", "data/dataNormalized.hdf5"))

    # Test results
    # Load torch3vision reference
    meansML_ref = load_array(
        resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
    variancesML_ref = load_array(
        resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
    weightsML_ref = load_array(
        resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))

    for transform in (to_numpy, to_dask_array):
        ar = transform(ar)
        # Initialize GMMMachine
        gmm = GMMMachine(n_gaussians=5)
        gmm.means = load_array(
            resource_filename("bob.learn.em",
                              "data/meansAfterKMeans.hdf5")).astype("float64")
        gmm.variances = load_array(
            resource_filename(
                "bob.learn.em",
                "data/variancesAfterKMeans.hdf5")).astype("float64")
        gmm.weights = np.exp(
            load_array(
                resource_filename(
                    "bob.learn.em",
                    "data/weightsAfterKMeans.hdf5")).astype("float64"))

        threshold = 0.001
        gmm.variance_thresholds = threshold

        # Initialize ML Trainer
        gmm.mean_var_update_threshold = 0.001
        gmm.max_fitting_steps = 25
        gmm.convergence_threshold = 0.000001
        gmm.update_means = True
        gmm.update_variances = True
        gmm.update_weights = True

        # Run ML
        gmm = gmm.fit(ar)

        # Compare to current results
        np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3)
        np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3)
        np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4)
示例#6
0
def test_GMMMachine_2():
  # Test a GMMMachine (statistics)

  arrayset = bob.io.base.load(datafile("faithful.torch3_f64.hdf5", __name__, path="../data/"))
  gmm = GMMMachine(2, 2)
  gmm.weights   = numpy.array([0.5, 0.5], 'float64')
  gmm.means     = numpy.array([[3, 70], [4, 72]], 'float64')
  gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
  gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  stats = GMMStats(2, 2)
  gmm.acc_statistics(arrayset, stats)

  stats_ref = GMMStats(bob.io.base.HDF5File(datafile("stats.hdf5",__name__, path="../data/")))

  assert stats.t == stats_ref.t
  assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10)
  #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx)
  #Note AA: precision error above
  assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10)
  assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10)
示例#7
0
def test_GMMMachine_2():
    # Test a GMMMachine (statistics)

    arrayset = bob.io.base.load(
        datafile("faithful.torch3_f64.hdf5", __name__, path="../data/"))
    gmm = GMMMachine(2, 2)
    gmm.weights = numpy.array([0.5, 0.5], 'float64')
    gmm.means = numpy.array([[3, 70], [4, 72]], 'float64')
    gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
    gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    stats = GMMStats(2, 2)
    gmm.acc_statistics(arrayset, stats)

    stats_ref = GMMStats(
        bob.io.base.HDF5File(datafile("stats.hdf5", __name__,
                                      path="../data/")))

    assert stats.t == stats_ref.t
    assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10)
    #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx)
    #Note AA: precision error above
    assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10)
    assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10)
示例#8
0
def test_LinearScoring():

    ubm = GMMMachine(2, 2)
    ubm.weights = numpy.array([0.5, 0.5], 'float64')
    ubm.means = numpy.array([[3, 70], [4, 72]], 'float64')
    ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
    ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    model1 = GMMMachine(2, 2)
    model1.weights = numpy.array([0.5, 0.5], 'float64')
    model1.means = numpy.array([[1, 2], [3, 4]], 'float64')
    model1.variances = numpy.array([[9, 10], [11, 12]], 'float64')
    model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    model2 = GMMMachine(2, 2)
    model2.weights = numpy.array([0.5, 0.5], 'float64')
    model2.means = numpy.array([[5, 6], [7, 8]], 'float64')
    model2.variances = numpy.array([[13, 14], [15, 16]], 'float64')
    model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

    stats1 = GMMStats(2, 2)
    stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64')
    stats1.n = numpy.array([1, 2], 'float64')
    stats1.t = 1 + 2

    stats2 = GMMStats(2, 2)
    stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64')
    stats2.n = numpy.array([3, 4], 'float64')
    stats2.t = 3 + 4

    stats3 = GMMStats(2, 2)
    stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64')
    stats3.n = numpy.array([3, 4], 'float64')
    stats3.t = 3 + 4

    test_channeloffset = [
        numpy.array([9, 8, 7, 6], 'float64'),
        numpy.array([5, 4, 3, 2], 'float64'),
        numpy.array([1, 0, 1, 2], 'float64')
    ]

    # Reference scores (from Idiap internal matlab implementation)
    ref_scores_00 = numpy.array(
        [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64')
    ref_scores_01 = numpy.array(
        [[790.9666666666667, 743.9571428571428, 753.6714285714285],
         [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64')
    ref_scores_10 = numpy.array(
        [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64')
    ref_scores_11 = numpy.array(
        [[871.8333333333332, 776.3000000000001, 770.3571428571427],
         [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64')

    # 1/ Use GMMMachines
    # 1/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3])
    assert (abs(scores - ref_scores_00) < 1e-7).all()

    # 1/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            [], True)
    assert (abs(scores - ref_scores_01) < 1e-7).all()
    #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True)
    #assert (abs(scores - ref_scores_01) < 1e-7).all()
    #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True)
    #assert (abs(scores - ref_scores_01) < 1e-7).all()

    # 1/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            test_channeloffset)
    assert (abs(scores - ref_scores_10) < 1e-7).all()

    # 1/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            test_channeloffset, True)
    assert (abs(scores - ref_scores_11) < 1e-7).all()

    # 2/ Use mean/variance supervectors
    # 2/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3])
    assert (abs(scores - ref_scores_00) < 1e-7).all()

    # 2/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3], [], True)
    assert (abs(scores - ref_scores_01) < 1e-7).all()

    # 2/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3], test_channeloffset)
    assert (abs(scores - ref_scores_10) < 1e-7).all()

    # 2/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring([model1.mean_supervector, model2.mean_supervector],
                            ubm.mean_supervector, ubm.variance_supervector,
                            [stats1, stats2, stats3], test_channeloffset, True)
    assert (abs(scores - ref_scores_11) < 1e-7).all()

    # 3/ Using single model/sample
    # 3/a/ without frame-length normalisation
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0])
    assert abs(score - ref_scores_10[0, 0]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1])
    assert abs(score - ref_scores_10[0, 1]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2])
    assert abs(score - ref_scores_10[0, 2]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0])
    assert abs(score - ref_scores_10[1, 0]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1])
    assert abs(score - ref_scores_10[1, 1]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2])
    assert abs(score - ref_scores_10[1, 2]) < 1e-7

    # 3/b/ without frame-length normalisation
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0], True)
    assert abs(score - ref_scores_11[0, 0]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1], True)
    assert abs(score - ref_scores_11[0, 1]) < 1e-7
    score = linear_scoring(model1.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2], True)
    assert abs(score - ref_scores_11[0, 2]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats1,
                           test_channeloffset[0], True)
    assert abs(score - ref_scores_11[1, 0]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats2,
                           test_channeloffset[1], True)
    assert abs(score - ref_scores_11[1, 1]) < 1e-7
    score = linear_scoring(model2.mean_supervector, ubm.mean_supervector,
                           ubm.variance_supervector, stats3,
                           test_channeloffset[2], True)
    assert abs(score - ref_scores_11[1, 2]) < 1e-7
def test_LinearScoring():

    ubm = GMMMachine(n_gaussians=2)
    ubm.weights = np.array([0.5, 0.5], "float64")
    ubm.means = np.array([[3, 70], [4, 72]], "float64")
    ubm.variances = np.array([[1, 10], [2, 5]], "float64")
    ubm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    model1 = GMMMachine(n_gaussians=2)
    model1.weights = np.array([0.5, 0.5], "float64")
    model1.means = np.array([[1, 2], [3, 4]], "float64")
    model1.variances = np.array([[9, 10], [11, 12]], "float64")
    model1.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    model2 = GMMMachine(n_gaussians=2)
    model2.weights = np.array([0.5, 0.5], "float64")
    model2.means = np.array([[5, 6], [7, 8]], "float64")
    model2.variances = np.array([[13, 14], [15, 16]], "float64")
    model2.variance_thresholds = np.array([[0, 0], [0, 0]], "float64")

    stats1 = GMMStats(2, 2)
    stats1.sum_px = np.array([[1, 2], [3, 4]], "float64")
    stats1.n = np.array([1, 2], "float64")
    stats1.t = 1 + 2

    stats2 = GMMStats(2, 2)
    stats2.sum_px = np.array([[5, 6], [7, 8]], "float64")
    stats2.n = np.array([3, 4], "float64")
    stats2.t = 3 + 4

    stats3 = GMMStats(2, 2)
    stats3.sum_px = np.array([[5, 6], [7, 3]], "float64")
    stats3.n = np.array([3, 4], "float64")
    stats3.t = 3 + 4

    test_channeloffset = [
        np.array([[9, 8], [7, 6]], "float64"),
        np.array([[5, 4], [3, 2]], "float64"),
        np.array([[1, 0], [1, 2]], "float64"),
    ]

    # Reference scores (from Idiap internal matlab implementation)
    ref_scores_00 = np.array(
        [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], "float64")
    ref_scores_01 = np.array(
        [
            [790.9666666666667, 743.9571428571428, 753.6714285714285],
            [738.5666666666667, 695.4428571428572, 704.5857142857144],
        ],
        "float64",
    )
    ref_scores_10 = np.array(
        [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], "float64")
    ref_scores_11 = np.array(
        [
            [871.8333333333332, 776.3000000000001, 770.3571428571427],
            [793.8333333333333, 714.1857142857143, 717.5000000000000],
        ],
        "float64",
    )

    # 1/ Use GMMMachines
    # 1/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3])
    np.testing.assert_almost_equal(scores, ref_scores_00, decimal=7)

    # 1/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1, model2],
        ubm,
        [stats1, stats2, stats3],
        frame_length_normalization=True,
    )
    np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7)
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], 0,
                            True)
    np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7)

    # 1/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3],
                            test_channeloffset)
    np.testing.assert_almost_equal(scores, ref_scores_10, decimal=7)

    # 1/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1, model2],
        ubm,
        [stats1, stats2, stats3],
        test_channeloffset,
        frame_length_normalization=True,
    )
    np.testing.assert_almost_equal(scores, ref_scores_11, decimal=7)

    # 2/ Use means instead of models
    # 2/a/ Without test_channelOffset, without frame-length normalisation
    scores = linear_scoring([model1.means, model2.means], ubm,
                            [stats1, stats2, stats3])
    assert (abs(scores - ref_scores_00) < 1e-7).all()

    # 2/b/ Without test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1.means, model2.means],
        ubm,
        [stats1, stats2, stats3],
        frame_length_normalization=True,
    )
    assert (abs(scores - ref_scores_01) < 1e-7).all()

    # 2/c/ With test_channelOffset, without frame-length normalisation
    scores = linear_scoring(
        [model1.means, model2.means],
        ubm,
        [stats1, stats2, stats3],
        test_channeloffset,
    )
    assert (abs(scores - ref_scores_10) < 1e-7).all()

    # 2/d/ With test_channelOffset, with frame-length normalisation
    scores = linear_scoring(
        [model1.means, model2.means],
        ubm,
        [stats1, stats2, stats3],
        test_channeloffset,
        frame_length_normalization=True,
    )
    assert (abs(scores - ref_scores_11) < 1e-7).all()

    # 3/ Using single model/sample
    # 3/a/ without frame-length normalisation
    score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0])
    np.testing.assert_almost_equal(score, ref_scores_10[0, 0], decimal=7)
    score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1])
    np.testing.assert_almost_equal(score, ref_scores_10[0, 1], decimal=7)
    score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2])
    np.testing.assert_almost_equal(score, ref_scores_10[0, 2], decimal=7)
    score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0])
    np.testing.assert_almost_equal(score, ref_scores_10[1, 0], decimal=7)
    score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1])
    np.testing.assert_almost_equal(score, ref_scores_10[1, 1], decimal=7)
    score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2])
    np.testing.assert_almost_equal(score, ref_scores_10[1, 2], decimal=7)

    # 3/b/ with frame-length normalisation
    score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[0, 0], decimal=7)
    score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[0, 1], decimal=7)
    score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[0, 2], decimal=7)
    score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[1, 0], decimal=7)
    score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[1, 1], decimal=7)
    score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2],
                           True)
    np.testing.assert_almost_equal(score, ref_scores_11[1, 2], decimal=7)
示例#10
0
def test_GMMMachine_1():
    # Test a GMMMachine basic features

    weights = numpy.array([0.5, 0.5], 'float64')
    weights2 = numpy.array([0.6, 0.4], 'float64')
    means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64')
    means2 = numpy.array([[3, 7, 0], [4, 72, 0]], 'float64')
    variances = numpy.array([[1, 10, 1], [2, 5, 2]], 'float64')
    variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], 'float64')
    varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], 'float64')
    varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]],
                                      'float64')

    # Initializes a GMMMachine
    gmm = GMMMachine(2, 3)
    # Sets the weights, means, variances and varianceThresholds and
    # Checks correctness
    gmm.weights = weights
    gmm.means = means
    gmm.variances = variances
    gmm.variance_thresholds = varianceThresholds
    assert gmm.shape == (2, 3)
    assert (gmm.weights == weights).all()
    assert (gmm.means == means).all()
    assert (gmm.variances == variances).all()
    assert (gmm.variance_thresholds == varianceThresholds).all()

    # Checks supervector-like accesses
    assert (gmm.mean_supervector == means.reshape(means.size)).all()
    assert (gmm.variance_supervector == variances.reshape(
        variances.size)).all()
    newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], 'float64')
    newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], 'float64')

    # Checks particular varianceThresholds-related methods
    varianceThresholds1D = numpy.array([0.3, 1, 0.5], 'float64')
    gmm.set_variance_thresholds(varianceThresholds1D)
    assert (gmm.variance_thresholds[0, :] == varianceThresholds1D).all()
    assert (gmm.variance_thresholds[1, :] == varianceThresholds1D).all()

    gmm.set_variance_thresholds(0.005)
    assert (gmm.variance_thresholds == 0.005).all()

    # Checks Gaussians access
    gmm.means = newMeans
    gmm.variances = newVariances
    assert (gmm.get_gaussian(0).mean == newMeans[0, :]).all()
    assert (gmm.get_gaussian(1).mean == newMeans[1, :]).all()
    assert (gmm.get_gaussian(0).variance == newVariances[0, :]).all()
    assert (gmm.get_gaussian(1).variance == newVariances[1, :]).all()

    # Checks resize
    gmm.resize(4, 5)
    assert gmm.shape == (4, 5)

    # Checks comparison
    gmm2 = GMMMachine(gmm)
    gmm3 = GMMMachine(2, 3)
    gmm3.weights = weights2
    gmm3.means = means
    gmm3.variances = variances
    #gmm3.varianceThresholds = varianceThresholds
    gmm4 = GMMMachine(2, 3)
    gmm4.weights = weights
    gmm4.means = means2
    gmm4.variances = variances
    #gmm4.varianceThresholds = varianceThresholds
    gmm5 = GMMMachine(2, 3)
    gmm5.weights = weights
    gmm5.means = means
    gmm5.variances = variances2
    #gmm5.varianceThresholds = varianceThresholds
    gmm6 = GMMMachine(2, 3)
    gmm6.weights = weights
    gmm6.means = means
    gmm6.variances = variances
    #gmm6.varianceThresholds = varianceThresholds2

    assert gmm == gmm2
    assert (gmm != gmm2) is False
    assert gmm.is_similar_to(gmm2)
    assert gmm != gmm3
    assert (gmm == gmm3) is False
    assert gmm.is_similar_to(gmm3) is False
    assert gmm != gmm4
    assert (gmm == gmm4) is False
    assert gmm.is_similar_to(gmm4) is False
    assert gmm != gmm5
    assert (gmm == gmm5) is False
    assert gmm.is_similar_to(gmm5) is False
    assert gmm != gmm6
    assert (gmm == gmm6) is False
    assert gmm.is_similar_to(gmm6) is False
示例#11
0
def test_LinearScoring():

  ubm = GMMMachine(2, 2)
  ubm.weights   = numpy.array([0.5, 0.5], 'float64')
  ubm.means     = numpy.array([[3, 70], [4, 72]], 'float64')
  ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64')
  ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  model1 = GMMMachine(2, 2)
  model1.weights   = numpy.array([0.5, 0.5], 'float64')
  model1.means     = numpy.array([[1, 2], [3, 4]], 'float64')
  model1.variances = numpy.array([[9, 10], [11, 12]], 'float64')
  model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  model2 = GMMMachine(2, 2)
  model2.weights   = numpy.array([0.5, 0.5], 'float64')
  model2.means     = numpy.array([[5, 6], [7, 8]], 'float64')
  model2.variances = numpy.array([[13, 14], [15, 16]], 'float64')
  model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64')

  stats1 = GMMStats(2, 2)
  stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64')
  stats1.n = numpy.array([1, 2], 'float64')
  stats1.t = 1+2

  stats2 = GMMStats(2, 2)
  stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64')
  stats2.n = numpy.array([3, 4], 'float64')
  stats2.t = 3+4

  stats3 = GMMStats(2, 2)
  stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64')
  stats3.n = numpy.array([3, 4], 'float64')
  stats3.t = 3+4

  test_channeloffset = [numpy.array([9, 8, 7, 6], 'float64'), numpy.array([5, 4, 3, 2], 'float64'), numpy.array([1, 0, 1, 2], 'float64')]

  # Reference scores (from Idiap internal matlab implementation)
  ref_scores_00 = numpy.array([[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64')
  ref_scores_01 = numpy.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64')
  ref_scores_10 = numpy.array([[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64')
  ref_scores_11 = numpy.array([[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64')


  # 1/ Use GMMMachines
  # 1/a/ Without test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3])
  assert (abs(scores - ref_scores_00) < 1e-7).all()

  # 1/b/ Without test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True)
  assert (abs(scores - ref_scores_01) < 1e-7).all()
  #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True)
  #assert (abs(scores - ref_scores_01) < 1e-7).all()
  #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True)
  #assert (abs(scores - ref_scores_01) < 1e-7).all()

  # 1/c/ With test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset)
  assert (abs(scores - ref_scores_10) < 1e-7).all()

  # 1/d/ With test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True)
  assert (abs(scores - ref_scores_11) < 1e-7).all()


  # 2/ Use mean/variance supervectors
  # 2/a/ Without test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3])
  assert (abs(scores - ref_scores_00) < 1e-7).all()

  # 2/b/ Without test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], [], True)
  assert (abs(scores - ref_scores_01) < 1e-7).all()

  # 2/c/ With test_channelOffset, without frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset)
  assert (abs(scores - ref_scores_10) < 1e-7).all()

  # 2/d/ With test_channelOffset, with frame-length normalisation
  scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset, True)
  assert (abs(scores - ref_scores_11) < 1e-7).all()


  # 3/ Using single model/sample
  # 3/a/ without frame-length normalisation
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0])
  assert abs(score - ref_scores_10[0,0]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1])
  assert abs(score - ref_scores_10[0,1]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2])
  assert abs(score - ref_scores_10[0,2]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0])
  assert abs(score - ref_scores_10[1,0]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1])
  assert abs(score - ref_scores_10[1,1]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2])
  assert abs(score - ref_scores_10[1,2]) < 1e-7


  # 3/b/ without frame-length normalisation
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True)
  assert abs(score - ref_scores_11[0,0]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True)
  assert abs(score - ref_scores_11[0,1]) < 1e-7
  score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True)
  assert abs(score - ref_scores_11[0,2]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True)
  assert abs(score - ref_scores_11[1,0]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True)
  assert abs(score - ref_scores_11[1,1]) < 1e-7
  score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True)
  assert abs(score - ref_scores_11[1,2]) < 1e-7
示例#12
0
def test_GMMMachine():
    # Test a GMMMachine basic features

    weights = np.array([0.5, 0.5], "float64")
    weights2 = np.array([0.6, 0.4], "float64")
    means = np.array([[3, 70, 0], [4, 72, 0]], "float64")
    means2 = np.array([[3, 7, 0], [4, 72, 0]], "float64")
    variances = np.array([[1, 10, 1], [2, 5, 2]], "float64")
    variances2 = np.array([[10, 10, 1], [2, 5, 2]], "float64")
    varianceThresholds = np.array([[0, 0, 0], [0, 0, 0]], "float64")
    varianceThresholds2 = np.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]],
                                   "float64")

    # Initializes a GMMMachine
    gmm = GMMMachine(n_gaussians=2)
    # Sets the weights, means, variances and varianceThresholds and
    # Checks correctness
    gmm.weights = weights
    gmm.means = means
    gmm.variances = variances
    gmm.variance_thresholds = varianceThresholds
    assert gmm.shape == (2, 3)
    np.testing.assert_equal(gmm.weights, weights)
    np.testing.assert_equal(gmm.means, means)
    np.testing.assert_equal(gmm.variances, variances)
    np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds)

    newMeans = np.array([[3, 70, 2], [4, 72, 2]], "float64")
    newVariances = np.array([[1, 1, 1], [2, 2, 2]], "float64")

    # Checks particular varianceThresholds-related methods
    varianceThresholds1D = np.array([0.3, 1, 0.5], "float64")
    gmm.variance_thresholds = varianceThresholds1D
    np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds1D)

    gmm.variance_thresholds = 0.005
    np.testing.assert_equal(gmm.variance_thresholds, 0.005)

    gmm.means = newMeans
    gmm.variances = newVariances
    np.testing.assert_equal(gmm.means, newMeans)
    np.testing.assert_equal(gmm.variances, newVariances)

    # Checks comparison
    gmm2 = deepcopy(gmm)
    gmm3 = GMMMachine(n_gaussians=2)
    gmm3.weights = weights2
    gmm3.means = means
    gmm3.variances = variances
    gmm3.variance_thresholds = varianceThresholds
    gmm4 = GMMMachine(n_gaussians=2)
    gmm4.weights = weights
    gmm4.means = means2
    gmm4.variances = variances
    gmm4.variance_thresholds = varianceThresholds
    gmm5 = GMMMachine(n_gaussians=2)
    gmm5.weights = weights
    gmm5.means = means
    gmm5.variances = variances2
    gmm5.variance_thresholds = varianceThresholds
    gmm6 = GMMMachine(n_gaussians=2)
    gmm6.weights = weights
    gmm6.means = means
    gmm6.variances = variances
    gmm6.variance_thresholds = varianceThresholds2

    assert_gmm_equal(gmm, gmm2)
    assert (gmm != gmm2) is False
    assert gmm.is_similar_to(gmm2)
    assert gmm != gmm3
    assert gmm.is_similar_to(gmm3) is False
    assert gmm != gmm4
    assert gmm.is_similar_to(gmm4) is False
    assert gmm != gmm5
    assert gmm.is_similar_to(gmm5) is False
    assert gmm != gmm6
    assert gmm.is_similar_to(gmm6) is False

    # Saving and loading
    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:
        filename = f.name
        gmm.save(HDF5File(filename, "w"))
        # Using from_hdf5
        gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r"))
        assert type(gmm1.n_gaussians) is np.int64
        assert type(gmm1.update_means) is np.bool_
        assert type(gmm1.update_variances) is np.bool_
        assert type(gmm1.update_weights) is np.bool_
        assert type(gmm1.trainer) is str
        assert gmm1.ubm is None
        assert_gmm_equal(gmm, gmm1)
        # Using load
        gmm1 = GMMMachine(n_gaussians=gmm.n_gaussians)
        gmm1.load(HDF5File(filename, "r"))
        assert type(gmm1.n_gaussians) is np.int64
        assert type(gmm1.update_means) is np.bool_
        assert type(gmm1.update_variances) is np.bool_
        assert type(gmm1.update_weights) is np.bool_
        assert type(gmm1.trainer) is str
        assert gmm1.ubm is None
        assert_gmm_equal(gmm, gmm1)

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:
        filename = f.name
        gmm.save(filename)
        gmm1 = GMMMachine.from_hdf5(filename)
        assert_gmm_equal(gmm, gmm1)

    # Weights
    n_gaussians = 5
    machine = GMMMachine(n_gaussians)

    default_weights = np.full(shape=(n_gaussians, ),
                              fill_value=1.0 / n_gaussians)
    default_log_weights = np.full(shape=(n_gaussians, ),
                                  fill_value=np.log(1.0 / n_gaussians))

    # Test weights getting and setting
    np.testing.assert_almost_equal(machine.weights, default_weights)
    np.testing.assert_almost_equal(machine.log_weights, default_log_weights)

    modified_weights = default_weights
    modified_weights[:n_gaussians // 2] = (1 / n_gaussians) / 2
    modified_weights[n_gaussians // 2 +
                     n_gaussians % 2:] = (1 / n_gaussians) * 1.5

    # Ensure setter works (log_weights is updated correctly)
    machine.weights = modified_weights
    np.testing.assert_almost_equal(machine.weights, modified_weights)
    np.testing.assert_almost_equal(machine.log_weights,
                                   np.log(modified_weights))
示例#13
0
def test_GMMMachine_1():
  # Test a GMMMachine basic features

  weights   = numpy.array([0.5, 0.5], 'float64')
  weights2   = numpy.array([0.6, 0.4], 'float64')
  means     = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64')
  means2     = numpy.array([[3, 7, 0], [4, 72, 0]], 'float64')
  variances = numpy.array([[1, 10, 1], [2, 5, 2]], 'float64')
  variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], 'float64')
  varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], 'float64')
  varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], 'float64')

  # Initializes a GMMMachine
  gmm = GMMMachine(2,3)
  # Sets the weights, means, variances and varianceThresholds and
  # Checks correctness
  gmm.weights = weights
  gmm.means = means
  gmm.variances = variances
  gmm.variance_thresholds = varianceThresholds
  assert gmm.shape == (2,3)
  assert (gmm.weights == weights).all()
  assert (gmm.means == means).all()
  assert (gmm.variances == variances).all()
  assert (gmm.variance_thresholds == varianceThresholds).all()

  # Checks supervector-like accesses
  assert (gmm.mean_supervector == means.reshape(means.size)).all()
  assert (gmm.variance_supervector == variances.reshape(variances.size)).all()
  newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], 'float64')
  newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], 'float64')


  # Checks particular varianceThresholds-related methods
  varianceThresholds1D = numpy.array([0.3, 1, 0.5], 'float64')
  gmm.set_variance_thresholds(varianceThresholds1D)
  assert (gmm.variance_thresholds[0,:] == varianceThresholds1D).all()
  assert (gmm.variance_thresholds[1,:] == varianceThresholds1D).all()

  gmm.set_variance_thresholds(0.005)
  assert (gmm.variance_thresholds == 0.005).all()

  # Checks Gaussians access
  gmm.means     = newMeans
  gmm.variances = newVariances
  assert (gmm.get_gaussian(0).mean == newMeans[0,:]).all()
  assert (gmm.get_gaussian(1).mean == newMeans[1,:]).all()
  assert (gmm.get_gaussian(0).variance == newVariances[0,:]).all()
  assert (gmm.get_gaussian(1).variance == newVariances[1,:]).all()

  # Checks resize
  gmm.resize(4,5)
  assert gmm.shape == (4,5)

  # Checks comparison
  gmm2 = GMMMachine(gmm)
  gmm3 = GMMMachine(2,3)
  gmm3.weights = weights2
  gmm3.means = means
  gmm3.variances = variances
  #gmm3.varianceThresholds = varianceThresholds
  gmm4 = GMMMachine(2,3)
  gmm4.weights = weights
  gmm4.means = means2
  gmm4.variances = variances
  #gmm4.varianceThresholds = varianceThresholds
  gmm5 = GMMMachine(2,3)
  gmm5.weights = weights
  gmm5.means = means
  gmm5.variances = variances2
  #gmm5.varianceThresholds = varianceThresholds
  gmm6 = GMMMachine(2,3)
  gmm6.weights = weights
  gmm6.means = means
  gmm6.variances = variances
  #gmm6.varianceThresholds = varianceThresholds2

  assert gmm == gmm2
  assert (gmm != gmm2) is False
  assert gmm.is_similar_to(gmm2)
  assert gmm != gmm3
  assert (gmm == gmm3) is False
  assert gmm.is_similar_to(gmm3) is False
  assert gmm != gmm4
  assert (gmm == gmm4) is False
  assert gmm.is_similar_to(gmm4) is False
  assert gmm != gmm5
  assert (gmm == gmm5) is False
  assert gmm.is_similar_to(gmm5) is False
  assert gmm != gmm6
  assert (gmm == gmm6) is False
  assert gmm.is_similar_to(gmm6) is False