示例#1
0
    def transform(self, XY):
        X, Y = XY
        assert X.ndim == 3 and Y.ndim == 3

        longer_features = X if X.shape[1] > Y.shape[1] else Y

        X_aligned = np.zeros_like(longer_features)
        Y_aligned = np.zeros_like(longer_features)
        for idx, (x, y) in enumerate(zip(X, Y)):
            x, y = trim_zeros_frames(x), trim_zeros_frames(y)
            dist, path = fastdtw(x, y, radius=self.radius, dist=self.dist)
            dist /= (len(x) + len(y))
            pathx = list(map(lambda l: l[0], path))
            pathy = list(map(lambda l: l[1], path))
            x, y = x[pathx], y[pathy]
            max_len = max(len(x), len(y))
            if max_len > X_aligned.shape[1] or max_len > Y_aligned.shape[1]:
                pad_size = max(max_len - X_aligned.shape[1],
                               max_len > Y_aligned.shape[1])
                X_aligned = np.pad(X_aligned, [(0, 0), (0, pad_size), (0, 0)],
                                   mode="constant",
                                   constant_values=0)
                Y_aligned = np.pad(Y_aligned, [(0, 0), (0, pad_size), (0, 0)],
                                   mode="constant",
                                   constant_values=0)
            X_aligned[idx][:len(x)] = x
            Y_aligned[idx][:len(y)] = y
            if self.verbose > 0:
                print("{}, distance: {}".format(idx, dist))
        return X_aligned, Y_aligned
示例#2
0
    def transform(self, XY):
        X, Y = XY
        assert X.ndim == 3 and Y.ndim == 3

        longer_features = X if X.shape[1] > Y.shape[1] else Y

        Xc = X.copy()  # this will be updated iteratively
        X_aligned = np.zeros_like(longer_features)
        Y_aligned = np.zeros_like(longer_features)
        refined_paths = np.empty(len(X), dtype=np.object)

        for idx in range(self.n_iter):
            for idx, (x, y) in enumerate(zip(Xc, Y)):
                x, y = trim_zeros_frames(x), trim_zeros_frames(y)
                dist, path = fastdtw(x, y, radius=self.radius, dist=self.dist)
                dist /= (len(x) + len(y))
                pathx = list(map(lambda l: l[0], path))
                pathy = list(map(lambda l: l[1], path))

                refined_paths[idx] = pathx
                x, y = x[pathx], y[pathy]

                max_len = max(len(x), len(y))
                if max_len > X_aligned.shape[1] or max_len > Y_aligned.shape[1]:
                    pad_size = max(max_len - X_aligned.shape[1],
                                   max_len > Y_aligned.shape[1])
                    X_aligned = np.pad(X_aligned, [(0, 0), (0, pad_size),
                                                   (0, 0)],
                                       mode="constant",
                                       constant_values=0)
                    Y_aligned = np.pad(Y_aligned, [(0, 0), (0, pad_size),
                                                   (0, 0)],
                                       mode="constant",
                                       constant_values=0)

                X_aligned[idx][:len(x)] = x
                Y_aligned[idx][:len(y)] = y
                if self.verbose > 0:
                    print("{}, distance: {}".format(idx, dist))

            # Fit
            gmm = GaussianMixture(n_components=self.n_components_gmm,
                                  covariance_type="full",
                                  max_iter=self.max_iter_gmm)
            XY = np.concatenate((X_aligned, Y_aligned),
                                axis=-1).reshape(-1, X.shape[-1] * 2)
            gmm.fit(XY)
            windows = [(0, 0, np.array([1.0]))]  # no delta
            paramgen = MLPG(gmm, windows=windows)
            for idx in range(len(Xc)):
                x = trim_zeros_frames(Xc[idx])
                Xc[idx][:len(x)] = paramgen.transform(x)

        # Finally we can get aligned X
        for idx in range(len(X_aligned)):
            x = X[idx][refined_paths[idx]]
            X_aligned[idx][:len(x)] = x

        return X_aligned, Y_aligned
示例#3
0
def plot_parallel(x, y):
    figure(figsize=(16, 7))
    subplot(2, 1, 1)
    librosa.display.specshow(trim_zeros_frames(x).T,
                             sr=fs,
                             hop_length=hop_length,
                             x_axis="time")
    colorbar()
    subplot(2, 1, 2)
    librosa.display.specshow(trim_zeros_frames(y).T,
                             sr=fs,
                             hop_length=hop_length,
                             x_axis="time")
    colorbar()
示例#4
0
 def collect_features(self, path):
     x, fs = librosa.load(path, sr=config.fs)
     x = x.astype(np.float64)
     f0, timeaxis = pyworld.dio(x, fs, frame_period=config.frame_period)
     f0 = pyworld.stonemask(x, f0, timeaxis, fs)
     spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
     spectrogram = trim_zeros_frames(spectrogram)
     mc = pysptk.sp2mc(spectrogram, order=config.order, alpha=config.alpha)
     return mc
示例#5
0
def _get_mcep(x, fs, frame_period=5, order=24):
    alpha = pysptk.util.mcepalpha(fs)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    spectrogram = trim_zeros_frames(spectrogram)
    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    return mc
示例#6
0
 def collect_features(self, path):
     fs, x = wavfile.read(path)
     x = x.astype(np.float64)
     f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
     f0 = pyworld.stonemask(x, f0, timeaxis, fs)
     spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
     spectrogram = trim_zeros_frames(spectrogram)
     mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
     return mc
示例#7
0
def get_features(x, fs):
    # f0 calculate
    _f0, t = pw.dio(x, fs)
    f0 = pw.stonemask(x, _f0, t, fs)
    # mcep calculate
    sp = trim_zeros_frames(pw.cheaptrick(x, f0, t, fs))
    mcep = pysptk.sp2mc(sp, order=24, alpha=pysptk.util.mcepalpha(fs))
    # bap calculate
    ap = pw.d4c(x, f0, t, fs)
    bap = pw.code_aperiodicity(ap, fs)
    return f0, mcep, bap
示例#8
0
def test_trim_zeros_frames():
    arr = np.array(((0, 0), (0, 0), (1, 1), (2, 2), (0, 0)))
    desired_default = np.array(((0, 0), (0, 0), (1, 1), (2, 2)))
    actual_default = trim_zeros_frames(arr)

    assert desired_default.shape[1] == actual_default.shape[1]
    np.testing.assert_array_equal(actual_default, desired_default)

    desired_b = np.array(((0, 0), (0, 0), (1, 1), (2, 2)))
    actual_b = trim_zeros_frames(arr, trim='b')

    assert desired_b.shape[1] == actual_b.shape[1]
    np.testing.assert_array_equal(actual_b, desired_b)

    desired_f = np.array(((1, 1), (2, 2), (0, 0)))
    actual_f = trim_zeros_frames(arr, trim='f')

    assert desired_f.shape[1] == actual_f.shape[1]
    np.testing.assert_array_equal(actual_f, desired_f)

    desired_fb = np.array(((1, 1), (2, 2)))
    actual_fb = trim_zeros_frames(arr, trim='fb')

    assert desired_fb.shape[1] == actual_fb.shape[1]
    np.testing.assert_array_equal(actual_fb, desired_fb)

    non_zeros = np.array(((1, 1), (2, 2), (3, 3), (4, 4), (5, 5)))
    desired_b_or_fb_non_zeros = np.array(
        ((1, 1), (2, 2), (3, 3), (4, 4), (5, 5)))
    actual_b = trim_zeros_frames(non_zeros, trim='b')
    np.testing.assert_array_equal(actual_b, desired_b_or_fb_non_zeros)

    actual_fb = trim_zeros_frames(non_zeros, trim='fb')
    np.testing.assert_array_equal(actual_fb, desired_b_or_fb_non_zeros)
示例#9
0
def apply_each2d_trim(func2d, X, *args, **kwargs):
    """Apply function for each trimmed 2d slice.

    Args:
        func2d (Function): Function applied multiple times for each 2d slice.
        X (numpy.ndarray): Input 3d array of shape (``N x T x D``)

    Returns:
        numpy.ndarray: Output array (``N x T x D'``)
    """
    assert X.ndim == 3
    N, T, _ = X.shape
    x = trim_zeros_frames(X[0])
    y = func2d(x, *args, **kwargs)
    assert y.ndim == 2
    _, D = y.shape

    Y = np.zeros((N, T, D))
    for idx in range(N):
        x = trim_zeros_frames(X[idx])
        y = func2d(x, *args, **kwargs)
        Y[idx][:len(y)] = y
    return Y
示例#10
0
def test_trim_remove_zeros_frames():
    fs, x = wavfile.read(example_audio_file())
    frame_period = 5

    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    for mat in [spectrogram, aperiodicity]:
        trimmed = trim_zeros_frames(mat)
        assert trimmed.shape[1] == mat.shape[1]

    for mat in [spectrogram, aperiodicity]:
        trimmed = remove_zeros_frames(mat)
        assert trimmed.shape[1] == mat.shape[1]
示例#11
0
def collect_features(emotion):
    arr = []
    for count in range(0, num_files):
        count_n = count + 1
        path = '_' + str(emotion) + '/' + [str(count_n), ('0' + str(count_n))][count_n < 10] + '.wav'
        x, fs_ = sf.read(path)
        x = x.astype(np.float64)
        f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period)
        f0 = pyworld.stonemask(x, f0, time_axis, fs_)
        spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_)
        spectrogram = trim_zeros_frames(spectrogram)
        mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
        mc = mc.tolist()
        while len(mc) < 1000:
            mc.append(vuoto)
        arr.append(mc)
    return np.array(arr)
示例#12
0
文件: waveform.py 项目: qxde01/CTTS
def gen_waveform(y_predicted, do_postfilter=False):
    y_predicted = trim_zeros_frames(y_predicted)
    # Generate parameters and split streams
    mgc, lf0, vuv, bap = gen_parameters(y_predicted)
    if do_postfilter:
        mgc = merlin_post_filter(mgc, alpha)
    spectrogram = pysptk.mc2sp(mgc, fftlen=fftlen, alpha=alpha)
    #print(bap.shape)
    aperiodicity = pyworld.decode_aperiodicity(bap.astype(np.float64), fs,
                                               fftlen)
    f0 = lf0.copy()
    f0[vuv < 0.5] = 0
    f0[np.nonzero(f0)] = np.exp(f0[np.nonzero(f0)])
    generated_waveform = pyworld.synthesize(f0.flatten().astype(np.float64),
                                            spectrogram.astype(np.float64),
                                            aperiodicity.astype(np.float64),
                                            fs, frame_period)
    return generated_waveform
示例#13
0
def _process_utterance(out_dir, index, speaker_id, wav_path, text):
    x, fs = librosa.load(wav_path, sr=config.fs)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=config.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    spectrogram = trim_zeros_frames(spectrogram)
    mc = pysptk.sp2mc(spectrogram, order=config.order, alpha=config.alpha)
    timesteps = mc.shape[0]
    wav_id = wav_path.split("/")[-1].split('.')[0]
    mc_name = '{}-mc.npy'.format(wav_id)
    np.save(os.path.join(out_dir, mc_name), mc, allow_pickle=False)

    # compute lf0
    lf0 = f0.copy()
    nonzero_indices = np.nonzero(f0)
    lf0[nonzero_indices] = np.log(f0[nonzero_indices])
    # Return a tuple describing this training example:
    return mc_name, timesteps, text, speaker_id, lf0.tolist()
示例#14
0
 def collect_features(self, wav_path):
     fs, x = wavfile.read(wav_path)
     x = x.astype(np.float64)
     f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
     f0 = pyworld.stonemask(x, f0, timeaxis, fs)
     spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
     spectrogram = P.trim_zeros_frames(spectrogram)
     if self.alpha is None:
         self.alpha = pysptk.util.mcepalpha(fs)
     mgc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=self.alpha)
     # Drop 0-th coefficient
     mgc = mgc[:, 1:]
     # 50Hz cut-off MS smoothing
     hop_length = int(fs * (hp.frame_period * 0.001))
     modfs = fs / hop_length
     mgc = P.modspec_smoothing(mgc, modfs, cutoff=50)
     # Add delta
     mgc = P.delta_features(mgc, hp.windows)
     return mgc.astype(np.float32)
示例#15
0
 def collect_features(self, wav_path):
     fs, x = wavfile.read(wav_path)
     x = x.astype(np.float64)
     f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
     f0 = pyworld.stonemask(x, f0, timeaxis, fs)
     spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
     spectrogram = P.trim_zeros_frames(spectrogram)
     if self.alpha is None:
         self.alpha = pysptk.util.mcepalpha(fs)
     mgc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=self.alpha)
     # Drop 0-th coefficient
     mgc = mgc[:, 1:]
     # 50Hz cut-off MS smoothing
     hop_length = int(fs * (hp.frame_period * 0.001))
     modfs = fs / hop_length
     mgc = P.modspec_smoothing(mgc, modfs, cutoff=50)
     # Add delta
     mgc = P.delta_features(mgc, hp.windows)
     return mgc.astype(np.float32)
示例#16
0
    def _generate_parameters(self, path, var):
        seq = self.parameter_generator.generate(path)
        seq = trim_zeros_frames(seq)
        T = seq.shape[0]

        feat_index = self.feature_config.get_indices()
        mgc = seq[:, :feat_index['lf0']]
        lf0 = seq[:, feat_index['lf0']:feat_index['vuv']]
        vuv = seq[:, feat_index['vuv']]
        bap = seq[:, feat_index['bap']:]

        mgc_var = np.tile(var[:feat_index['lf0']], (T, 1))
        lf0_var = np.tile(var[feat_index['lf0']:feat_index['vuv']], (T, 1))
        bap_var = np.tile(var[feat_index['bap']:], (T, 1))

        mgc = paramgen.mlpg(mgc, mgc_var, self.analysis_config.window)
        lf0 = paramgen.mlpg(lf0, lf0_var, self.analysis_config.window)
        bap = paramgen.mlpg(bap, bap_var, self.analysis_config.window)

        return mgc, lf0, vuv, bap
示例#17
0
 def collect_features(self, path):
     feature = kwiiyatta.analyze_wav(path)
     s = trim_zeros_frames(feature.spectrum_envelope)
     return feature.mel_cepstrum.data[:len(s)]  # トリムするフレームが手前にずれてるのでは?
示例#18
0
def remove_zero_frames_spectrogram(spectrogram):
	""" 
		Removes frames containing only zeros from spectrogram
	"""

	return trim_zeros_frames(spectrogram)
示例#19
0
        print("Destination dir for {}: {}".format(speaker, d))
        if not exists(d):
            os.makedirs(d)

    # Convert to arrays
    print("Convert datasets to arrays")
    X, Y = X_dataset.asarray(verbose=1), Y_dataset.asarray(verbose=1)

    # Alignment
    print("Perform alignment")
    X, Y = DTWAligner().transform((X, Y))

    print("Save features to disk")
    for idx, (x, y) in tqdm(enumerate(zip(X, Y))):
        # paths
        src_name = splitext(basename(X_dataset.collected_files[idx][0]))[0]
        tgt_name = splitext(basename(Y_dataset.collected_files[idx][0]))[0]
        src_path = join(dst_dir, "X", src_name)
        tgt_path = join(dst_dir, "Y", tgt_name)

        # Trim and ajast frames
        x = P.trim_zeros_frames(x)
        y = P.trim_zeros_frames(y)
        x, y = P.adjust_frame_lengths(x, y, pad=True, divisible_by=2)

        # Save
        np.save(src_path, x)
        np.save(tgt_path, y)

    sys.exit(0)
示例#20
0
        print("Destination dir for {}: {}".format(speaker, d))
        if not exists(d):
            os.makedirs(d)

    # Convert to arrays
    print("Convert datasets to arrays")
    X, Y = X_dataset.asarray(verbose=1), Y_dataset.asarray(verbose=1)

    # Alignment
    print("Perform alignment")
    X, Y = DTWAligner().transform((X, Y))

    print("Save features to disk")
    for idx, (x, y) in tqdm(enumerate(zip(X, Y))):
        # paths
        src_name = splitext(basename(X_dataset.collected_files[idx][0]))[0]
        tgt_name = splitext(basename(Y_dataset.collected_files[idx][0]))[0]
        src_path = join(dst_dir, "X", src_name)
        tgt_path = join(dst_dir, "Y", tgt_name)

        # Trim and ajast frames
        x = P.trim_zeros_frames(x)
        y = P.trim_zeros_frames(y)
        x, y = P.adjust_frame_lengths(x, y, pad=True, divisible_by=2)

        # Save
        np.save(src_path, x)
        np.save(tgt_path, y)

    sys.exit(0)
示例#21
0
def TrimmedDataset(feature):
    s = trim_zeros_frames(feature.spectrum_envelope)
    return feature[:len(s)]  # トリムするフレームが手前にずれてるのでは?