def __init__(self, cfg: DictConfig): super().__init__(cfg=cfg) typecheck.set_typecheck_enabled(enabled=False) cfg = self._cfg self.vocab = AudioToCharWithDursF0Dataset.make_vocab(**cfg.train_ds.dataset.vocab) self.embed = GaussianEmbedding(self.vocab, cfg.d_char) self.model = instantiate(cfg.model) d_out = cfg.model.jasper[-1].filters self.sil_proj = nn.Conv1d(d_out, 1, kernel_size=1) self.body_proj = nn.Conv1d(d_out, 1, kernel_size=1)
def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None): super().__init__(cfg=cfg, trainer=trainer) cfg = self._cfg self.vocab = AudioToCharWithDursF0Dataset.make_vocab( **cfg.train_ds.dataset.vocab) self.embed = GaussianEmbedding(self.vocab, cfg.d_char) self.encoder = instantiate(cfg.encoder) d_out = cfg.encoder.jasper[-1].filters self.sil_proj = nn.Conv1d(d_out, 1, kernel_size=1) self.body_proj = nn.Conv1d(d_out, 1, kernel_size=1) self.f0_mean, self.f0_std = cfg.f0_mean, cfg.f0_std
def __init__(self, cfg: DictConfig): super().__init__(cfg=cfg) typecheck.set_typecheck_enabled(enabled=False) cfg = self._cfg self.vocab = AudioToCharWithDursF0Dataset.make_vocab(**cfg.train_ds.dataset.vocab) self.preprocessor = instantiate(cfg.preprocessor) self.embed = GaussianEmbedding(self.vocab, cfg.d_char) self.norm_f0 = MaskedInstanceNorm1d(1) self.res_f0 = StyleResidual(cfg.d_char, 1, kernel_size=3) self.model = instantiate(cfg.model) d_out = cfg.model.jasper[-1].filters self.proj = nn.Conv1d(d_out, cfg.n_mels, kernel_size=1)
def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None): super().__init__(cfg=cfg, trainer=trainer) cfg = self._cfg self.vocab = AudioToCharWithDursF0Dataset.make_vocab( **cfg.train_ds.dataset.vocab) self.blanking = cfg.train_ds.dataset.blanking self.preprocessor = instantiate(cfg.preprocessor) self.embed = GaussianEmbedding(self.vocab, cfg.d_char) self.norm_f0 = MaskedInstanceNorm1d(1) self.res_f0 = StyleResidual(cfg.d_char, 1, kernel_size=3) self.encoder = instantiate(cfg.encoder) d_out = cfg.encoder.jasper[-1].filters self.proj = nn.Conv1d(d_out, cfg.n_mels, kernel_size=1)