def acc(self, task_id): mix_file = util.cache_path(self.ubm) align_file = util.cache_path(self.alignment[task_id]) feat_file = util.cache_path(self.features[task_id]) allo_file = util.cache_path(self.allophones) logging.info("Reading mixture file from '%s'..." % mix_file) gmm = sc.MixtureSet(mix_file) logging.info("Read %d means and %d covariances of dimension %d" % (gmm.nMeans, gmm.nCovs, gmm.dim)) ubm = convert_gmm(gmm) ivm = IVectorMachine(ubm, self.dim) ivm.variance_threshold = 1e-5 gs = GMMStats(gmm.nMeans, gmm.dim) logging.info( "Opening alignment cache '%s' with allophones from '%s'; ignoring '%s'" % (align_file, allo_file, ",".join(self.allophones_to_ignore))) aligncache = sc.FileArchive(align_file) aligncache.setAllophones(allo_file) cache = sc.FileArchive(feat_file) for a in cache.ft.keys(): if a.endswith(".attribs"): continue logging.info("Reading '%s'..." % a) time, data = cache.read(a, "feat") align = aligncache.read(a, "align") if len(align) < 1: logging.warning("No data for segment: '%s' in alignment." % a) continue allos = [] for (t, i, s, w) in align: allos.append(aligncache.allophones[i]) allos = list(aligncache.allophones[i] for (t, i, s, w) in align) T = len( list( filter(lambda al: al not in self.allophones_to_ignore, allos))) feat = np.ndarray((T, len(data[0]))) k = 0 for t in range(len(data)): (_, allo, state, weight) = align[t] if aligncache.allophones[ allo] not in self.allophones_to_ignore: feat[k, :] = data[t] k += 1 ivm.ubm.acc_statistics(feat, gs) logging.info("Writing Gaussian statistics to '%s'" % self.single_accu_caches[task_id].get_path()) gs.save(HDF5File(self.single_accu_caches[task_id].get_path(), "w"))
def test_GMMStats(): # Test a GMMStats # Initializes a GMMStats n_gaussians = 2 n_features = 3 gs = GMMStats(n_gaussians, n_features) log_likelihood = -3.0 T = 57 n = np.array([4.37, 5.31], "float64") sumpx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64") sumpxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64") gs.log_likelihood = log_likelihood gs.t = T gs.n = n gs.sum_px = sumpx gs.sum_pxx = sumpxx np.testing.assert_equal(gs.log_likelihood, log_likelihood) np.testing.assert_equal(gs.t, T) np.testing.assert_equal(gs.n, n) np.testing.assert_equal(gs.sum_px, sumpx) np.testing.assert_equal(gs.sum_pxx, sumpxx) np.testing.assert_equal(gs.shape, (n_gaussians, n_features)) # Saves and reads from file using `from_hdf5` filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(HDF5File(filename, "w")) gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r")) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) assert type(gs_loaded.n_gaussians) is np.int64 assert type(gs_loaded.n_features) is np.int64 assert type(gs_loaded.log_likelihood) is np.float64 # Saves and load from file using `load` filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=HDF5File(filename, "w")) gs_loaded = GMMStats(n_gaussians, n_features) gs_loaded.load(HDF5File(filename, "r")) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) # Makes them different gs_loaded.t = 58 assert (gs == gs_loaded) is False assert gs != gs_loaded assert not (gs.is_similar_to(gs_loaded)) # Accumulates from another GMMStats gs2 = GMMStats(n_gaussians, n_features) gs2.log_likelihood = log_likelihood gs2.t = T gs2.n = n.copy() gs2.sum_px = sumpx.copy() gs2.sum_pxx = sumpxx.copy() gs2 += gs np.testing.assert_equal(gs2.log_likelihood, 2 * log_likelihood) np.testing.assert_equal(gs2.t, 2 * T) np.testing.assert_almost_equal(gs2.n, 2 * n, decimal=8) np.testing.assert_almost_equal(gs2.sum_px, 2 * sumpx, decimal=8) np.testing.assert_almost_equal(gs2.sum_pxx, 2 * sumpxx, decimal=8) # Re-init and checks for zeros gs_loaded.init_fields() np.testing.assert_equal(gs_loaded.log_likelihood, 0) np.testing.assert_equal(gs_loaded.t, 0) np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians, ))) np.testing.assert_equal(gs_loaded.sum_px, np.zeros((n_gaussians, n_features))) np.testing.assert_equal(gs_loaded.sum_pxx, np.zeros((n_gaussians, n_features))) # Resize and checks size assert gs_loaded.shape == (n_gaussians, n_features) gs_loaded.resize(4, 5) assert gs_loaded.shape == (4, 5) assert gs_loaded.sum_px.shape[0] == 4 assert gs_loaded.sum_px.shape[1] == 5 # Clean-up os.unlink(filename)
def test_GMMStats(): # Test a GMMStats # Initializes a GMMStats gs = GMMStats(2, 3) log_likelihood = -3. T = 57 n = numpy.array([4.37, 5.31], 'float64') sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') gs.log_likelihood = log_likelihood gs.t = T gs.n = n gs.sum_px = sumpx gs.sum_pxx = sumpxx assert gs.log_likelihood == log_likelihood assert gs.t == T assert (gs.n == n).all() assert (gs.sum_px == sumpx).all() assert (gs.sum_pxx == sumpxx).all() assert gs.shape == (2, 3) # Saves and reads from file filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) # Saves and reads from file using the keyword argument filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) # Saves and load from file using the keyword argument filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats() gs_loaded.load(bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) # Saves and load from file using the keyword argument filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats() gs_loaded.load(hdf5=bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) # Makes them different gs_loaded.t = 58 assert (gs == gs_loaded) is False assert gs != gs_loaded assert (gs.is_similar_to(gs_loaded)) is False # Accumulates from another GMMStats gs2 = GMMStats(2, 3) gs2.log_likelihood = log_likelihood gs2.t = T gs2.n = n gs2.sum_px = sumpx gs2.sum_pxx = sumpxx gs2 += gs eps = 1e-8 assert gs2.log_likelihood == 2 * log_likelihood assert gs2.t == 2 * T assert numpy.allclose(gs2.n, 2 * n, eps) assert numpy.allclose(gs2.sum_px, 2 * sumpx, eps) assert numpy.allclose(gs2.sum_pxx, 2 * sumpxx, eps) # Reinit and checks for zeros gs_loaded.init() assert gs_loaded.log_likelihood == 0 assert gs_loaded.t == 0 assert (gs_loaded.n == 0).all() assert (gs_loaded.sum_px == 0).all() assert (gs_loaded.sum_pxx == 0).all() # Resize and checks size assert gs_loaded.shape == (2, 3) gs_loaded.resize(4, 5) assert gs_loaded.shape == (4, 5) assert gs_loaded.sum_px.shape[0] == 4 assert gs_loaded.sum_px.shape[1] == 5 # Clean-up os.unlink(filename)
def test_GMMStats(): # Test a GMMStats # Initializes a GMMStats gs = GMMStats(2,3) log_likelihood = -3. T = 57 n = numpy.array([4.37, 5.31], 'float64') sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') gs.log_likelihood = log_likelihood gs.t = T gs.n = n gs.sum_px = sumpx gs.sum_pxx = sumpxx assert gs.log_likelihood == log_likelihood assert gs.t == T assert (gs.n == n).all() assert (gs.sum_px == sumpx).all() assert (gs.sum_pxx == sumpxx).all() assert gs.shape==(2,3) # Saves and reads from file filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded ) is False assert gs.is_similar_to(gs_loaded) # Saves and reads from file using the keyword argument filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded ) is False assert gs.is_similar_to(gs_loaded) # Saves and load from file using the keyword argument filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats() gs_loaded.load(bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded ) is False assert gs.is_similar_to(gs_loaded) # Saves and load from file using the keyword argument filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) gs_loaded = GMMStats() gs_loaded.load(hdf5=bob.io.base.HDF5File(filename)) assert gs == gs_loaded assert (gs != gs_loaded ) is False assert gs.is_similar_to(gs_loaded) # Makes them different gs_loaded.t = 58 assert (gs == gs_loaded ) is False assert gs != gs_loaded assert (gs.is_similar_to(gs_loaded)) is False # Accumulates from another GMMStats gs2 = GMMStats(2,3) gs2.log_likelihood = log_likelihood gs2.t = T gs2.n = n gs2.sum_px = sumpx gs2.sum_pxx = sumpxx gs2 += gs eps = 1e-8 assert gs2.log_likelihood == 2*log_likelihood assert gs2.t == 2*T assert numpy.allclose(gs2.n, 2*n, eps) assert numpy.allclose(gs2.sum_px, 2*sumpx, eps) assert numpy.allclose(gs2.sum_pxx, 2*sumpxx, eps) # Reinit and checks for zeros gs_loaded.init() assert gs_loaded.log_likelihood == 0 assert gs_loaded.t == 0 assert (gs_loaded.n == 0).all() assert (gs_loaded.sum_px == 0).all() assert (gs_loaded.sum_pxx == 0).all() # Resize and checks size assert gs_loaded.shape==(2,3) gs_loaded.resize(4,5) assert gs_loaded.shape==(4,5) assert gs_loaded.sum_px.shape[0] == 4 assert gs_loaded.sum_px.shape[1] == 5 # Clean-up os.unlink(filename)