Exemplo n.º 1
0
    def _store_features(self, batch, outputs, cv_spkr_name, tdir):
        feats = {}
        feat_type = self.conf["feat_type"]
        decoded = outputs["decoded"]
        for n in range(decoded.size(0)):
            org_spkr_name = batch["org_spkr_name"][n]
            cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
            wavf = tdir / "{}_org-{}_cv-{}.wav".format(batch["flbl"][n],
                                                       org_spkr_name, cv_name)
            wavf.parent.mkdir(parents=True, exist_ok=True)

            # for feat
            flen = batch["flen"][n]
            feat = to_numpy(decoded[n][:flen])
            feats[wavf] = {}
            feats[wavf]["feats"] = self.scaler[feat_type].inverse_transform(
                feat)
            feats[wavf]["normed_feat"] = feat

            # for f0 features
            org_cf0 = self.scaler["lcf0"].inverse_transform(
                to_numpy(batch["lcf0"][n][:flen]))
            cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name)
            feats[wavf]["lcf0"] = cv_cf0
            feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0)
            feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen])
            feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"]

            if feat_type == "mcep":
                feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen])
        return feats
Exemplo n.º 2
0
    def _store_features(self, batch, outputs, cv_spkr_name, tdir):
        feats = {}
        feat_type = self.conf["feat_type"]
        for n in range(outputs["decoded"].size(0)):
            org_spkr_name = batch["org_spkr_name"][n]
            cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
            wavf = tdir / f"{batch['flbl'][n]}_org-{org_spkr_name}_cv-{cv_name}.wav"

            # for feat
            feats[wavf] = {}
            flen = batch["flen"][n]
            feat = to_numpy(outputs["decoded"][n][:flen])
            if feat_type == "mcep" and not self.conf["use_mcep_0th"]:
                mcep_0th = to_numpy(batch["mcep_0th"][n][:flen])
                feat = np.hstack([mcep_0th, feat])
            feats[wavf]["feats"] = self.scaler[feat_type].inverse_transform(
                feat)
            feats[wavf]["normed_feat"] = feat

            # for f0 features
            org_cf0 = self.scaler["lcf0"].inverse_transform(
                to_numpy(batch["lcf0"][n][:flen]))
            cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name)
            feats[wavf]["lcf0"] = cv_cf0
            feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0)
            feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen])
            feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"]

            if feat_type == "mcep":
                feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen])
        return feats
Exemplo n.º 3
0
 def _get_cvf0(self, batch, spkr_name):
     cv_lcf0s = []
     for n in range(batch["feats"].size(0)):
         org_lcf0 = self.scaler["lcf0"].inverse_transform(
             to_numpy(batch["lcf0"][n]))
         cv_lcf0 = convert_f0(self.scaler, org_lcf0,
                              batch["org_spkr_name"][n], spkr_name)
         normed_cv_lcf0 = self.scaler["lcf0"].transform(cv_lcf0)
         cv_lcf0s.append(torch.tensor(normed_cv_lcf0))
     return torch.stack(cv_lcf0s, dim=0).float().to(self.device)
    def _store_features(self, batch, outputs, cv_spkr_name, tdir):
        def inv_trans(k, feat):
            if k not in self.conf["ignore_scaler"]:
                return self.scaler[k].inverse_transform(feat)
            else:
                return feat

        feats = {}
        feat_type = self.conf["output_feat_type"]
        for n in range(outputs["decoded"].size(0)):
            org_spkr_name = batch["org_spkr_name"][n]
            cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
            wavf = tdir / f"{batch['flbl'][n]}_org-{org_spkr_name}_cv-{cv_name}.wav"

            # feat
            feats[wavf] = {}
            flen = batch["flen"][n]
            feat = to_numpy(outputs["decoded"][n][:flen])
            if feat_type == "mcep":
                feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen])
                if not self.conf["use_mcep_0th"]:
                    org_mcep_0th = to_numpy(batch["mcep_0th"][n][:flen])
                    org_mcep = to_numpy(batch["in_feats"][n][:flen])
                    feat = np.ascontiguousarray(np.hstack([org_mcep_0th,
                                                           feat]))
                    rmcep = np.ascontiguousarray(
                        np.hstack([org_mcep_0th, org_mcep]))
                    feats[wavf]["rmcep"] = inv_trans(feat_type, rmcep)
                else:
                    feats[wavf]["rmcep"] = None
            feats[wavf]["feats"] = inv_trans(feat_type, feat)

            # f0
            org_cf0 = inv_trans("lcf0", to_numpy(batch["lcf0"][n][:flen]))
            cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name)
            feats[wavf]["lcf0"] = cv_cf0
            feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen])
            feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"]

            # save normed one as well
            feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0)
            feats[wavf]["normed_feat"] = feat
        return feats