示例#1
0
def _extract_zero_and_first_stats(X, sad, indices, gmm, z_path, f_path,
                                  name_path):
    n_samples = X.shape[0]
    # indices is None, every row is single sample (utterance or image ...)
    if indices is None:
        if os.path.exists(z_path):
            os.remove(z_path)
        if os.path.exists(f_path):
            os.remove(f_path)
        Z = MmapArrayWriter(path=z_path,
                            dtype='float32',
                            shape=(n_samples, gmm.nmix),
                            remove_exist=True)
        F = MmapArrayWriter(path=f_path,
                            dtype='float32',
                            shape=(n_samples, gmm.feat_dim * gmm.nmix),
                            remove_exist=True)
        jobs, _ = _split_jobs(n_samples,
                              ncpu=mpi.cpu_count(),
                              device='cpu',
                              gpu_factor=1)

        def map_transform(start_end):
            start, end = start_end
            for i in range(start, end):
                # removed by SAD
                if sad is not None and not bool(sad[i]):
                    yield None, None, None
                else:
                    z, f = gmm.transform(X[i][np.newaxis, :],
                                         zero=True,
                                         first=True,
                                         device='cpu')
                    yield i, z, f

        prog = Progbar(target=n_samples,
                       print_report=True,
                       print_summary=False,
                       name="Extracting zero and first order statistics")
        for i, z, f in mpi.MPI(jobs, map_transform, ncpu=None, batch=1):
            if i is not None:  # i None means removed by SAD
                Z[i] = z
                F[i] = f
            prog.add(1)
        Z.flush()
        F.flush()
        Z.close()
        F.close()
    # use directly the transform_to_disk function
    else:
        gmm.transform_to_disk(X,
                              indices=indices,
                              sad=sad,
                              pathZ=z_path,
                              pathF=f_path,
                              name_path=name_path,
                              dtype='float32',
                              device=None,
                              ncpu=None,
                              override=True)
示例#2
0
start = timeit.default_timer()
with open(numpy_path, 'wb') as f:
    np.save(f, X)
print('Numpy save in:', timeit.default_timer() - start, 's')

start = timeit.default_timer()
hdf5['X'] = X
print('Writing data to HDF5  :', timeit.default_timer() - start, 's')

start = timeit.default_timer()
mmap.write(X)
print('Writing data to Memmap:', timeit.default_timer() - start, 's')

hdf5.flush()
hdf5.close()
mmap.flush()
mmap.close()

# ====== check file size ====== #
print()
print("Numpy saved size: %.2f (MB)" %
      (os.stat(numpy_path).st_size / 1024 / 1024))
print("HDF5 saved size: %.2f (MB)" %
      (os.stat(hdf5_path).st_size / 1024 / 1024))
print("Mmap saved size: %.2f (MB)" %
      (os.stat(mmap_path).st_size / 1024 / 1024))

# ====== reading ====== #
print()

start = timeit.default_timer()