def _extract_zero_and_first_stats(X, sad, indices, gmm, z_path, f_path, name_path): n_samples = X.shape[0] # indices is None, every row is single sample (utterance or image ...) if indices is None: if os.path.exists(z_path): os.remove(z_path) if os.path.exists(f_path): os.remove(f_path) Z = MmapArrayWriter(path=z_path, dtype='float32', shape=(n_samples, gmm.nmix), remove_exist=True) F = MmapArrayWriter(path=f_path, dtype='float32', shape=(n_samples, gmm.feat_dim * gmm.nmix), remove_exist=True) jobs, _ = _split_jobs(n_samples, ncpu=mpi.cpu_count(), device='cpu', gpu_factor=1) def map_transform(start_end): start, end = start_end for i in range(start, end): # removed by SAD if sad is not None and not bool(sad[i]): yield None, None, None else: z, f = gmm.transform(X[i][np.newaxis, :], zero=True, first=True, device='cpu') yield i, z, f prog = Progbar(target=n_samples, print_report=True, print_summary=False, name="Extracting zero and first order statistics") for i, z, f in mpi.MPI(jobs, map_transform, ncpu=None, batch=1): if i is not None: # i None means removed by SAD Z[i] = z F[i] = f prog.add(1) Z.flush() F.flush() Z.close() F.close() # use directly the transform_to_disk function else: gmm.transform_to_disk(X, indices=indices, sad=sad, pathZ=z_path, pathF=f_path, name_path=name_path, dtype='float32', device=None, ncpu=None, override=True)
start = timeit.default_timer() with open(numpy_path, 'wb') as f: np.save(f, X) print('Numpy save in:', timeit.default_timer() - start, 's') start = timeit.default_timer() hdf5['X'] = X print('Writing data to HDF5 :', timeit.default_timer() - start, 's') start = timeit.default_timer() mmap.write(X) print('Writing data to Memmap:', timeit.default_timer() - start, 's') hdf5.flush() hdf5.close() mmap.flush() mmap.close() # ====== check file size ====== # print() print("Numpy saved size: %.2f (MB)" % (os.stat(numpy_path).st_size / 1024 / 1024)) print("HDF5 saved size: %.2f (MB)" % (os.stat(hdf5_path).st_size / 1024 / 1024)) print("Mmap saved size: %.2f (MB)" % (os.stat(mmap_path).st_size / 1024 / 1024)) # ====== reading ====== # print() start = timeit.default_timer()