def __init__(self, kind_data: str, **kwargs: Channel): self._PATH = hp.dict_path[f'mel_{kind_data}'] # default needs # self._needs = dict(x=Channel.ALL, y=Channel.ALL, # length=Channel.ALL, # path_speech=Channel.ALL) self._needs = dict( path_speech=Channel.ALL, wav=Channel.ALL, y=Channel.ALL, length=Channel.ALL, ) self.set_needs(**kwargs) self._all_files = [ f for f in self._PATH.glob('*.*') if hp.is_featurefile(f) ] self._all_files = sorted(self._all_files) if hp.n_data > 0: self._all_files = self._all_files[:hp.n_data] print( f'{len(self._all_files)} files are prepared from {kind_data.upper()}.' )
def __init__(self, kind_data: str, norm_modules: Dict[str, Normalization] = None, **kwargs: Channel): self._PATH = hp.dict_path[f'feature_{kind_data}'] # default needs self._needs = dict(x=Channel.ALL, y=Channel.LAST, x_mag=Channel.NONE, x_phase=Channel.NONE, y_phase=Channel.NONE, path_speech=Channel.ALL) self.set_needs(**kwargs) self._all_files = [ f for f in self._PATH.glob('*.*') if hp.is_featurefile(f) ] self._all_files = sorted(self._all_files) if hp.n_data > 0: self._all_files = self._all_files[:hp.n_data] self.norm_modules = dict() if kind_data == 'train': # path_normconst: path of the file that has information about mean, std, ... path_normconst = hp.dict_path[f'normconst_{kind_data}'] if not hp.refresh_const and path_normconst.exists(): # when normconst file exists npz_normconst = np.load(path_normconst, allow_pickle=True) self.norm_modules['x'] = Normalization( *npz_normconst['normconst_x']) self.norm_modules['y'] = Normalization( *npz_normconst['normconst_y']) else: print('calculate normalization consts for input') self.norm_modules['x'] \ = Normalization.calc_const(self._all_files, key=hp.spec_data_names['x']) print('calculate normalization consts for output') self.norm_modules['y'] \ = Normalization.calc_const(self._all_files, key=hp.spec_data_names['y']) np.savez(path_normconst, normconst_x=self.norm_modules['x'].astuple(), normconst_y=self.norm_modules['y'].astuple()) scio.savemat( path_normconst.with_suffix('.mat'), dict(normconst_x=self.norm_modules['x'].astuple(), normconst_y=self.norm_modules['y'].astuple())) print(f'normalization consts for input: {self.norm_modules["x"]}') print(f'normalization consts for output: {self.norm_modules["y"]}') else: assert 'x' in norm_modules and 'y' in norm_modules self.norm_modules = norm_modules print( f'{len(self._all_files)} files are prepared from {kind_data.upper()}.' )
def search_files(self, n_file: int, random_by_utterance=False, n_loc=1): s_all_files = [ f.name for f in os.scandir(self._PATH) if hp.is_featurefile(f) ] s_all_files = sorted(s_all_files) if n_file != -1: if random_by_utterance: utterances = np.random.randint(len(s_all_files) // n_loc, size=n_file // n_loc) utterances = [f'{u:4d}_' for u in utterances] s_all_files = [ f for f in s_all_files if f.startswith(utterances) ] else: s_all_files = np.random.permutation(s_all_files) s_all_files = s_all_files[:n_file] return s_all_files