Exemplo n.º 1
0
def gen_parameters(y_predicted, Y_mean, Y_std):
    mgc_dim, lf0_dim, vuv_dim, bap_dim = hp_acoustic.stream_sizes

    mgc_start_idx = 0
    lf0_start_idx = mgc_dim
    vuv_start_idx = lf0_start_idx + lf0_dim
    bap_start_idx = vuv_start_idx + vuv_dim

    windows = hp_acoustic.windows

    # Split acoustic features
    mgc = y_predicted[:, :lf0_start_idx]
    lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
    vuv = y_predicted[:, vuv_start_idx]
    bap = y_predicted[:, bap_start_idx:]

    # Perform MLPG on normalized features
    mgc = paramgen.mlpg(mgc, np.ones(mgc.shape[-1]), windows)
    lf0 = paramgen.mlpg(lf0, np.ones(lf0.shape[-1]), windows)
    bap = paramgen.mlpg(bap, np.ones(bap.shape[-1]), windows)

    ty = "acoustic"
    # When we use MGE training, denormalization should be done after MLPG.
    mgc = P.inv_scale(mgc, Y_mean[ty][:mgc_dim // len(windows)],
                      Y_std[ty][:mgc_dim // len(windows)])
    lf0 = P.inv_scale(
        lf0, Y_mean[ty][lf0_start_idx:lf0_start_idx + lf0_dim // len(windows)],
        Y_std[ty][lf0_start_idx:lf0_start_idx + lf0_dim // len(windows)])
    bap = P.inv_scale(
        bap, Y_mean[ty][bap_start_idx:bap_start_idx + bap_dim // len(windows)],
        Y_std[ty][bap_start_idx:bap_start_idx + bap_dim // len(windows)])
    vuv = P.inv_scale(vuv, Y_mean[ty][vuv_start_idx], Y_std[ty][vuv_start_idx])

    return mgc, lf0, vuv, bap
Exemplo n.º 2
0
def test_mlpg():
    from nnmnkwii import paramgen as G

    static_dim = 2
    T = 10

    windows_set = _get_windows_set()
    for windows in windows_set:
        means = np.random.rand(T, static_dim * len(windows))
        variances = np.tile(np.random.rand(static_dim * len(windows)), (T, 1))

        generated = G.mlpg(means, variances, windows)
        assert generated.shape == (T, static_dim)

    # Test variances correctly expanded
    for windows in windows_set:
        for dtype in [np.float32, np.float64]:
            means = np.random.rand(T, static_dim * len(windows)).astype(dtype)
            variances = np.random.rand(static_dim * len(windows)).astype(dtype)
            variances_frames = np.tile(variances, (T, 1))

            # Explicitly give variances over frame
            generated1 = G.mlpg(means, variances_frames, windows)
            # Give global variances. This will get expanded over frames
            # internally
            generated2 = G.mlpg(means, variances, windows)

            assert generated1.dtype == dtype
            assert np.allclose(generated1, generated2)
Exemplo n.º 3
0
def test_functional_mlpg():
    static_dim = 2
    T = 5

    for windows in _get_windows_set():
        torch.manual_seed(1234)
        means = torch.rand(T, static_dim * len(windows))
        variances = torch.ones(static_dim * len(windows))

        y = G.mlpg(means.numpy(), variances.numpy(), windows)
        y = Variable(torch.from_numpy(y), requires_grad=False)

        means = Variable(means, requires_grad=True)

        # mlpg
        y_hat = AF.mlpg(means, variances, windows)
        assert np.allclose(y.data.numpy(), y_hat.data.numpy())

        # Test backward pass
        nn.MSELoss()(y_hat, y).backward()

        # unit_variance_mlpg
        R = torch.from_numpy(G.unit_variance_mlpg_matrix(windows, T))
        y_hat = AF.unit_variance_mlpg(R, means)
        assert np.allclose(y.data.numpy(), y_hat.data.numpy())

        nn.MSELoss()(y_hat, y).backward()

        # Test 3D tensor inputs
        y_hat = AF.unit_variance_mlpg(R, means.view(1, -1, means.size(-1)))
        assert np.allclose(
            y.data.numpy(), y_hat.data.view(-1, static_dim).numpy())

        nn.MSELoss()(y_hat.view(-1, static_dim), y).backward()
Exemplo n.º 4
0
def multi_stream_mlpg(
    inputs,
    variances,
    windows,
    stream_sizes=None,
    has_dynamic_features=None,
    streams=None,
):
    """Split streams and do apply MLPG if stream has dynamic features

    Args:
        inputs (array like): input 3-d or 2-d array
        variances (array like): variances of input features
        windows (list): windows for parameter generation
        stream_sizes (list): stream sizes
        has_dynamic_features (list): binary flags that indicates if steams have dynamic features
        streams (list, optional): Streams of interests. Returns all streams if streams is None.
            Defaults to None.

    Raises:
        RuntimeError: if stream sizes are wrong

    Returns:
        array like: generated static features
    """
    if stream_sizes is None:
        stream_sizes = [180, 3, 1, 3]
    if has_dynamic_features is None:
        has_dynamic_features = [True, True, False, True]
    if streams is None:
        streams = [True] * len(stream_sizes)
    T, D = inputs.shape
    if D != sum(stream_sizes):
        raise RuntimeError("You probably have specified wrong dimension params.")

    # Straem indices for static+delta features
    # [0,   180, 183, 184]
    start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1]))
    # [180, 183, 184, 199]
    end_indices = np.cumsum(stream_sizes)

    ret = []
    for in_start_idx, in_end_idx, v, enabled in zip(
        start_indices,
        end_indices,
        has_dynamic_features,
        streams,
    ):
        if not enabled:
            continue
        x = inputs[:, in_start_idx:in_end_idx]
        if inputs.shape == variances.shape:
            var_ = variances[:, in_start_idx:in_end_idx]
        else:
            var_ = np.tile(variances[in_start_idx:in_end_idx], (T, 1))
        y = paramgen.mlpg(x, var_, windows) if v else x
        ret.append(y)

    return np.concatenate(ret, -1)
Exemplo n.º 5
0
def gen_parameters(y_predicted):
    # Number of time frames
    T = y_predicted.shape[0]
    # Split acoustic features
    mgc = y_predicted[:, :lf0_start_idx]
    lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
    vuv = y_predicted[:, vuv_start_idx]
    bap = y_predicted[:, bap_start_idx:]
    # Perform MLPG
    #Y_acoustic_std.var_
    mgc_variances = np.tile(Y_acoustic_std.var_[:lf0_start_idx], (T, 1))
    #mgc_variances = np.tile(Y_var[ty][:lf0_start_idx], (T, 1))
    mgc = paramgen.mlpg(mgc, mgc_variances, windows)
    lf0_variances = np.tile(Y_acoustic_std.var_[lf0_start_idx:vuv_start_idx],
                            (T, 1))
    lf0 = paramgen.mlpg(lf0, lf0_variances, windows)
    bap_variances = np.tile(Y_acoustic_std.var_[bap_start_idx:], (T, 1))
    bap = paramgen.mlpg(bap, bap_variances, windows)
    return mgc, lf0, vuv, bap
    def gen_parameters(self, utt_id, labels):
        feature = fe.linguistic_features(
            labels, self.binary_dict, self.continuous_dict,
            add_frame_features=True,
            subphone_features='coarse_coding').astype(np.float32)

        # normalize
        feature = scaler['X']['acoustic'].transform(feature)

        # add speaker information
        feature = self.add_speaker_code(utt_id, feature)

        # predict acoustic features
        feature = torch.from_numpy(feature).to(device)
        pred = self.acoustic_model.predict(feature)
        pred_mean = pred['mean'].data.cpu().numpy()
        pred_var = pred['var'].data.cpu().numpy()

        # denormalize
        scale = self.scaler['Y']['acoustic'].scale_
        pred_mean = self.scaler['Y']['acoustic'].inverse_transform(pred_mean)
        pred_var *= scale ** 2

        # split acoustic features
        mgc = pred_mean[:, :self.lf0_start_idx]
        lf0 = pred_mean[:, self.lf0_start_idx:self.vuv_start_idx]
        vuv = pred_mean[:, self.vuv_start_idx]
        bap = pred_mean[:, self.bap_start_idx:]

        # make variances for Maximum Likelihood Parameter Generation (MLPG)
        mgc_variances = pred_var[:, :self.lf0_start_idx]
        lf0_variances = pred_var[:, self.lf0_start_idx:self.vuv_start_idx]
        bap_variances = pred_var[:, self.bap_start_idx:]

        # perform MLPG to calculate static features
        mgc = mlpg(mgc, mgc_variances, self.windows)
        lf0 = mlpg(lf0, lf0_variances, self.windows)
        bap = mlpg(bap, bap_variances, self.windows)

        feature = np.hstack([mgc, lf0, vuv.reshape(-1, 1), bap])

        return feature
Exemplo n.º 7
0
    def _generate_parameters(self, path, var):
        seq = self.parameter_generator.generate(path)
        seq = trim_zeros_frames(seq)
        T = seq.shape[0]

        feat_index = self.feature_config.get_indices()
        mgc = seq[:, :feat_index['lf0']]
        lf0 = seq[:, feat_index['lf0']:feat_index['vuv']]
        vuv = seq[:, feat_index['vuv']]
        bap = seq[:, feat_index['bap']:]

        mgc_var = np.tile(var[:feat_index['lf0']], (T, 1))
        lf0_var = np.tile(var[feat_index['lf0']:feat_index['vuv']], (T, 1))
        bap_var = np.tile(var[feat_index['bap']:], (T, 1))

        mgc = paramgen.mlpg(mgc, mgc_var, self.analysis_config.window)
        lf0 = paramgen.mlpg(lf0, lf0_var, self.analysis_config.window)
        bap = paramgen.mlpg(bap, bap_var, self.analysis_config.window)

        return mgc, lf0, vuv, bap
Exemplo n.º 8
0
def test_unit_variance_mlpg():
    static_dim = 2
    T = 10

    for windows in _get_windows_set():
        means = np.random.rand(T, static_dim * len(windows))
        variances = np.ones(static_dim * len(windows))
        y = G.mlpg(means, variances, windows)

        R = G.unit_variance_mlpg_matrix(windows, T)
        y_hat = R.dot(G.reshape_means(means, static_dim))
        assert np.allclose(y_hat, y)
Exemplo n.º 9
0
def gen_parameters(y_predicted, Y_var):
    # Number of time frames
    T = y_predicted.shape[0]

    # Split acoustic features
    mgc = y_predicted[:, :hp.lf0_start_idx]
    lf0 = y_predicted[:, hp.lf0_start_idx:hp.vuv_start_idx]
    vuv = y_predicted[:, hp.vuv_start_idx]
    bap = y_predicted[:, hp.bap_start_idx:]

    # Perform MLPG
    ty = "acoustic"
    mgc_variances = np.tile(Y_var[ty][:hp.lf0_start_idx], (T, 1))
    mgc = paramgen.mlpg(mgc, mgc_variances, windows)
    lf0_variances = np.tile(Y_var[ty][hp.lf0_start_idx:hp.vuv_start_idx],
                            (T, 1))
    lf0 = paramgen.mlpg(lf0, lf0_variances, windows)
    bap_variances = np.tile(Y_var[ty][hp.bap_start_idx:], (T, 1))
    bap = paramgen.mlpg(bap, bap_variances, windows)

    return mgc, lf0, vuv, bap
Exemplo n.º 10
0
    def forward(self, means):
        assert means.dim() == 2  # we cannot do MLPG on minibatch
        variances = self.variances
        self.save_for_backward(means)

        T, D = means.size()
        assert means.size() == variances.size()

        means_np = means.detach().numpy()
        variances_np = variances.detach().numpy()
        y = G.mlpg(means_np, variances_np, self.windows)
        y = torch.from_numpy(y.astype(np.float32))
        return y
Exemplo n.º 11
0
def gen_parameters(y_predicted, verbose=True):
    # Number of time frames
    T = y_predicted.shape[0]
    
    # Split acoustic features
    mgc = y_predicted[:,:lf0_start_idx]
    lf0 = y_predicted[:,lf0_start_idx:vuv_start_idx]
    #lf0 = Y['acoustic']['train'][90][:, lf0_start_idx:vuv_start_idx]
    #lf0 = np.zeros(lf0.shape)
    vuv = y_predicted[:,vuv_start_idx]
    bap = y_predicted[:,bap_start_idx:]
    
    # Perform MLPG
    ty = "acoustic"
    mgc_variances = np.tile(y_stats['var'][:lf0_start_idx], (T, 1))#np.tile(np.ones(Y_var[ty][:lf0_start_idx].shape), (T, 1))#
    mgc = paramgen.mlpg(mgc, mgc_variances, windows)
    lf0_variances = np.tile(y_stats['var'][lf0_start_idx:vuv_start_idx], (T,1))#np.tile(np.ones(Y_var[ty][lf0_start_idx:vuv_start_idx].shape), (T,1))#
    lf0 = paramgen.mlpg(lf0, lf0_variances, windows)
    bap_variances = np.tile(y_stats['var'][bap_start_idx:], (T, 1))#np.tile(np.ones(Y_var[ty][bap_start_idx:].shape), (T, 1))#
    bap = paramgen.mlpg(bap, bap_variances, windows)
    
    return mgc, lf0, vuv, bap
Exemplo n.º 12
0
def multi_stream_mlpg(inputs,
                      variances,
                      windows,
                      stream_sizes=[180, 3, 1, 3],
                      has_dynamic_features=[True, True, False, True],
                      streams=[True, True, True, True]):
    """Split streams and do apply MLPG if stream has dynamic features.
    """
    T, D = inputs.shape
    if D != sum(stream_sizes):
        raise RuntimeError(
            "You probably have specified wrong dimention params.")
    num_windows = len(windows)

    # Straem indices for static+delta features
    # [0,   180, 183, 184]
    start_indices = np.hstack(([0], np.cumsum(stream_sizes)[:-1]))
    # [180, 183, 184, 199]
    end_indices = np.cumsum(stream_sizes)

    # Stream sizes for static features
    # [60, 1, 1, 5]
    static_stream_sizes = get_static_stream_sizes(stream_sizes,
                                                  has_dynamic_features,
                                                  num_windows)

    # [0,  60, 61, 62]
    static_stream_start_indices = np.hstack(
        ([0], np.cumsum(static_stream_sizes)[:-1]))
    # [60, 61, 62, 63]
    static_stream_end_indices = np.cumsum(static_stream_sizes)

    ret = []
    for in_start_idx, in_end_idx, out_start_idx, out_end_idx, v, enabled in zip(
            start_indices, end_indices, static_stream_start_indices,
            static_stream_end_indices, has_dynamic_features, streams):
        if not enabled:
            continue
        x = inputs[:, in_start_idx:in_end_idx]
        if inputs.shape == variances.shape:
            var_ = variances[:, in_start_idx:in_end_idx]
        else:
            var_ = np.tile(variances[in_start_idx:in_end_idx], (T, 1))
        y = paramgen.mlpg(x, var_, windows) if v else x
        ret.append(y)

    return np.concatenate(ret, -1)
Exemplo n.º 13
0
    def gen_parameters(self, y_predicted, mge_training=False):
        mgc_dim, lf0_dim, vuv_dim, bap_dim = stream_sizes
        mgc_start_idx = 0
        lf0_start_idx = mgc_dim
        vuv_start_idx = lf0_start_idx + lf0_dim
        bap_start_idx = vuv_start_idx + vuv_dim
        # MGE training
        if mge_training:
            # Split acoustic features
            mgc = y_predicted[:, :lf0_start_idx]
            lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
            vuv = y_predicted[:, vuv_start_idx]
            bap = y_predicted[:, bap_start_idx:]

            # Perform MLPG on normalized features
            mgc = paramgen.mlpg(mgc, np.ones(mgc.shape[-1]), windows)
            lf0 = paramgen.mlpg(lf0, np.ones(lf0.shape[-1]), windows)
            bap = paramgen.mlpg(bap, np.ones(bap.shape[-1]), windows)
            #import pdb; pdb.set_trace()
            # When we use MGE training, denormalization should be done after MLPG.
            mgc = P.inv_scale(mgc, self.mean[:mgc_dim // len(windows)],
                              self.std[:mgc_dim // len(windows)])
            lf0 = P.inv_scale(
                lf0, self.mean[lf0_start_idx:lf0_start_idx +
                               lf0_dim // len(windows)],
                self.std[lf0_start_idx:lf0_start_idx +
                         lf0_dim // len(windows)])
            bap = P.inv_scale(
                bap, self.mean[bap_start_idx:bap_start_idx +
                               bap_dim // len(windows)],
                self.std[bap_start_idx:bap_start_idx +
                         bap_dim // len(windows)])
            vuv = P.inv_scale(vuv, self.mean[vuv_start_idx],
                              self.std[vuv_start_idx])
        else:
            # Denormalization first
            y_predicted = P.inv_scale(y_predicted, self.mean, self.std)

            # Split acoustic features
            mgc = y_predicted[:, :lf0_start_idx]
            lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
            vuv = y_predicted[:, vuv_start_idx]
            bap = y_predicted[:, bap_start_idx:]

            # Perform MLPG
            Y_var = self.std * self.std
            mgc = paramgen.mlpg(mgc, Y_var[:lf0_start_idx], windows)
            lf0 = paramgen.mlpg(lf0, Y_var[lf0_start_idx:vuv_start_idx],
                                windows)
            bap = paramgen.mlpg(bap, Y_var[bap_start_idx:], windows)

        return mgc, lf0, vuv, bap
Exemplo n.º 14
0
def gen_parameters(y_predicted, Y_mean, Y_std, mge_training=True):
    mgc_dim, lf0_dim, vuv_dim, bap_dim = audio_world_config.stream_sizes

    mgc_start_idx = 0
    lf0_start_idx = mgc_dim
    vuv_start_idx = lf0_start_idx + lf0_dim
    bap_start_idx = vuv_start_idx + vuv_dim

    windows = audio_world_config.windows

    #ty = "acoustic"

    # MGE training
    if mge_training:
        # Split acoustic features
        mgc = y_predicted[:, :lf0_start_idx]
        lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
        vuv = y_predicted[:, vuv_start_idx]
        bap = y_predicted[:, bap_start_idx:]

        # Perform MLPG on normalized features
        mgc = paramgen.mlpg(mgc, np.ones(mgc.shape[-1]), windows)
        lf0 = paramgen.mlpg(lf0, np.ones(lf0.shape[-1]), windows)
        bap = paramgen.mlpg(bap, np.ones(bap.shape[-1]), windows)

        # When we use MGE training, denormalization should be done after MLPG.
        mgc = P.inv_scale(mgc, Y_mean[:mgc_dim // len(windows)],
                          Y_std[:mgc_dim // len(windows)])
        lf0 = P.inv_scale(
            lf0, Y_mean[lf0_start_idx:lf0_start_idx + lf0_dim // len(windows)],
            Y_std[lf0_start_idx:lf0_start_idx + lf0_dim // len(windows)])
        bap = P.inv_scale(
            bap, Y_mean[bap_start_idx:bap_start_idx + bap_dim // len(windows)],
            Y_std[bap_start_idx:bap_start_idx + bap_dim // len(windows)])
        vuv = P.inv_scale(vuv, Y_mean[vuv_start_idx], Y_std[vuv_start_idx])
    else:
        # Denormalization first
        y_predicted = P.inv_scale(y_predicted, Y_mean, Y_std)

        # Split acoustic features
        mgc = y_predicted[:, :lf0_start_idx]
        lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
        vuv = y_predicted[:, vuv_start_idx]
        bap = y_predicted[:, bap_start_idx:]

        # Perform MLPG
        Y_var = Y_std * Y_std
        mgc = paramgen.mlpg(mgc, Y_var[:lf0_start_idx], windows)
        lf0 = paramgen.mlpg(lf0, Y_var[lf0_start_idx:vuv_start_idx], windows)
        bap = paramgen.mlpg(bap, Y_var[bap_start_idx:], windows)
    return mgc, lf0, vuv, bap
Exemplo n.º 15
0
def gen_parameters(y_predicted, Y_mean, Y_std, mge_training=True):
    mgc_dim, lf0_dim, vuv_dim, bap_dim = hp_acoustic.stream_sizes

    mgc_start_idx = 0
    lf0_start_idx = mgc_dim
    vuv_start_idx = lf0_start_idx + lf0_dim
    bap_start_idx = vuv_start_idx + vuv_dim

    windows = hp_acoustic.windows

    ty = "acoustic"

    # MGE training
    if mge_training:
        # Split acoustic features
        mgc = y_predicted[:, :lf0_start_idx]
        lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
        vuv = y_predicted[:, vuv_start_idx]
        bap = y_predicted[:, bap_start_idx:]

        # Perform MLPG on normalized features
        mgc = paramgen.mlpg(mgc, np.ones(mgc.shape[-1]), windows)
        lf0 = paramgen.mlpg(lf0, np.ones(lf0.shape[-1]), windows)
        bap = paramgen.mlpg(bap, np.ones(bap.shape[-1]), windows)

        # When we use MGE training, denormalization should be done after MLPG.
        mgc = P.inv_scale(mgc, Y_mean[ty][:mgc_dim // len(windows)],
                          Y_std[ty][:mgc_dim // len(windows)])
        lf0 = P.inv_scale(lf0, Y_mean[ty][lf0_start_idx:lf0_start_idx + lf0_dim // len(windows)],
                          Y_std[ty][lf0_start_idx:lf0_start_idx + lf0_dim // len(windows)])
        bap = P.inv_scale(bap, Y_mean[ty][bap_start_idx:bap_start_idx + bap_dim // len(windows)],
                          Y_std[ty][bap_start_idx:bap_start_idx + bap_dim // len(windows)])
        vuv = P.inv_scale(vuv, Y_mean[ty][vuv_start_idx], Y_std[ty][vuv_start_idx])
    else:
        # Denormalization first
        y_predicted = P.inv_scale(y_predicted, Y_mean, Y_std)

        # Split acoustic features
        mgc = y_predicted[:, :lf0_start_idx]
        lf0 = y_predicted[:, lf0_start_idx:vuv_start_idx]
        vuv = y_predicted[:, vuv_start_idx]
        bap = y_predicted[:, bap_start_idx:]

        # Perform MLPG
        Y_var = Y_std[ty] * Y_std[ty]
        mgc = paramgen.mlpg(mgc, Y_var[:lf0_start_idx], windows)
        lf0 = paramgen.mlpg(lf0, Y_var[lf0_start_idx:vuv_start_idx], windows)
        bap = paramgen.mlpg(bap, Y_var[bap_start_idx:], windows)

    return mgc, lf0, vuv, bap
Exemplo n.º 16
0
    def transform(self, src):
        """Mapping source feature x to target feature y so that maximize the
        likelihood of y given x.

        Args:
            src (array): shape (`the number of frames`, `the order of spectral
                feature`) a sequence of source speaker's spectral feature that
                will be transformed.

        Returns:
            array: a sequence of transformed features
        """
        T, feature_dim = src.shape[0], src.shape[1]

        if feature_dim == self.static_dim:
            return super(MLPG, self).transform(src)

        # A suboptimum mixture sequence  (eq.37)
        optimum_mix = self.px.predict(src)

        # Compute E eq.(40)
        E = np.empty((T, feature_dim))
        for t in range(T):
            m = optimum_mix[t]  # estimated mixture index at time t
            xx = np.linalg.solve(self.covarXX[m], src[t] - self.src_means[m])
            # Eq. (22)
            E[t] = self.tgt_means[m] + np.dot(self.covarYX[m], xx)

        # Compute D eq.(23)
        # Approximated variances with diagonals so that we can do MLPG
        # efficiently in dimention-wise manner
        D = np.empty((T, feature_dim))
        for t in range(T):
            m = optimum_mix[t]
            # Eq. (23), with approximating covariances as diagonals
            D[t] = np.diag(self.covarYY[m]) - np.diag(self.covarYX[m]) / \
                np.diag(self.covarXX[m]) * np.diag(self.covarXY[m])

        # Once we have mean and variance over frames, then we can do MLPG
        return mlpg(E, D, self.windows)
Exemplo n.º 17
0
def test_minibatch_unit_variance_mlpg_gradcheck():
    static_dim = 2
    T = 5

    for windows in _get_windows_set():
        batch_size = 5
        torch.manual_seed(1234)

        # Prepare inputs
        means = torch.rand(T, static_dim * len(windows))
        means_expanded = means.expand(
            batch_size, means.shape[0], means.shape[1])
        reshaped_means = torch.from_numpy(
            G.reshape_means(means.numpy(), static_dim))
        reshaped_means_expanded = reshaped_means.expand(
            batch_size, reshaped_means.shape[0], reshaped_means.shape[1])

        # Target
        y = G.mlpg(means.numpy(), np.ones(static_dim * len(windows)), windows)
        y = Variable(torch.from_numpy(y), requires_grad=False)
        y_expanded = y.expand(batch_size, y.size(0), y.size(1))

        # Pack into variables
        means = Variable(means, requires_grad=True)
        means_expanded = Variable(means_expanded, requires_grad=True)
        reshaped_means = Variable(reshaped_means, requires_grad=True)
        reshaped_means_expanded = Variable(
            reshaped_means_expanded, requires_grad=True)

        # Case 1: 2d with reshaped means
        R = torch.from_numpy(G.unit_variance_mlpg_matrix(windows, T))
        y_hat1 = AF.unit_variance_mlpg(R, reshaped_means)

        # Case 2: 3d with reshaped means
        y_hat2 = AF.unit_variance_mlpg(R, reshaped_means_expanded)
        for i in range(batch_size):
            assert np.allclose(y_hat1.data.numpy(), y_hat2[i].data.numpy())

        nn.MSELoss()(y_hat1, y).backward()
        nn.MSELoss()(y_hat2, y_expanded).backward()

        # Check grad consistency
        for i in range(batch_size):
            grad1 = reshaped_means.grad.data.numpy()
            grad2 = reshaped_means_expanded.grad[i].data.numpy()
            assert np.allclose(grad1, grad2)

        # Case 3: 2d with non-reshaped input
        y_hat3 = AF.unit_variance_mlpg(R, means)

        # Case 4: 3d with non-reshaped input
        y_hat4 = AF.unit_variance_mlpg(R, means_expanded)

        for i in range(batch_size):
            assert np.allclose(y_hat1.data.numpy(), y_hat3.data.numpy())
            assert np.allclose(y_hat3.data.numpy(), y_hat4[i].data.numpy())

        nn.MSELoss()(y_hat3, y).backward()
        nn.MSELoss()(y_hat4, y_expanded).backward()

        # Check grad consistency
        for i in range(batch_size):
            grad1 = means.grad.data.numpy()
            grad2 = means_expanded.grad[i].data.numpy()
            assert np.allclose(grad1, grad2)
Exemplo n.º 18
0
    ### Get action
    mean, var = policy(inputs, length.tolist())
    m = mean.detach().to('cpu').numpy()
    v = var.detach().to('cpu').numpy()
    dm = librosa.feature.delta(m, width=9, order=1, axis=1)
    ddm = librosa.feature.delta(m, width=9, order=2, axis=1)
    dv = librosa.feature.delta(v, width=9, order=1, axis=1)
    dv = 2 * v + dv
    dv = np.where(dv <= 0, 1e-10, dv)
    ddv = librosa.feature.delta(dv, width=9, order=1, axis=1)
    ddv = 2 * dv + ddv
    ddv = np.where(ddv <= 0, 1e-10, ddv)

    m = np.concatenate((m, dm, ddm), axis=2)
    v = np.concatenate((v, dv, ddv), axis=2)
    action = G.mlpg(m[0], v[0], windows)
    action = torch.from_numpy(np.asarray(action, dtype=np.float32)).to(device)
    action = utils.trans_param(action).unsqueeze(dim=0)

    ### Store parameters
    tractParams = action[0, :length[0], :24].reshape(1, length[0], 24)
    glottisParams = action[0, :length[0], -6:].reshape(1, length[0], 6)
    t_param = tractParams.to('cpu')
    t_tmp = torch.zeros(1, 5, 24)
    t_tmp[:, 2, :] = t_param[:, 0, :] / 3
    t_tmp[:, 3, :] = t_param[:, 0, :] * 2 / 3
    t_tmp[:, 4, :] = t_param[:, 0, :]
    t_param = torch.cat((t_tmp, t_param), dim=1)
    for i in range(5):
        t_param = torch.cat((t_param, t_param[:, -1, :].unsqueeze(dim=1)),
                            dim=1)
Exemplo n.º 19
0
    tCC=[]
    trMSE=[]
    tCC_MLPG=[]
    trMSE_MLPG=[]
    tCC_KM=[]
    trMSE_KM=[]
    tCC_LPF=[]
    trMSE_LPF=[]
    for i in np.arange(0,len(Youttest)):
        s_in=X_testseq[i]
        #s_in=s_in[np.newaxis,:,0:inputDim]
        val=np.squeeze(model.predict(s_in));
        predSeq_wom[0,i]=val
        
        Dvar=np.tile(np.var(val,axis=0),(val.shape[0],1))
        predSeq_wm[0,i]=mlpg(val, Dvar, windows) #MLPG
        
        k_smth = kalmansmooth(val.transpose()).transpose() # Kalaman Filtering
        predSeq_kf[0,i]=k_smth
        #InSeq[0,i]=s_in
        yLPF = filtfilt(fb, fa, val.transpose()).transpose()
        predSeq_lpf[0,i]=yLPF        
        
        YtestOrg[0,i]=Youttest[i]

        iCC,irMSE=EvalMetric(val,np.squeeze(Youttest[i]))
        tCC.append(iCC)
        trMSE.append(irMSE)
        

        iCC,irMSE=EvalMetric(mlpg(val, Dvar, windows),np.squeeze(Youttest[i]))
Exemplo n.º 20
0
    tCC=[]
    trMSE=[]
    tCC_MLPG=[]
    trMSE_MLPG=[]
    tCC_KM=[]
    trMSE_KM=[]
    tCC_LPF=[]
    trMSE_LPF=[]
    paramgen = GMM_M(gmm, windows=windows) # Inherit the GMM class
    for i in np.arange(0,len(Youttest)):
        s_in=np.squeeze(X_testseq[i])
        #s_in=s_in[np.newaxis,:,0:inputDim]
        #val=model.predict(s_in);
        val, D, W=paramgen.transform(s_in) # val: Conditional Expectation; D: Conditional Variance; W: windows 
        predSeq_wom[0,i]=val
        predSeq_wm[0,i]=mlpg(val, D, W) #MLPG
        k_smth = kalmansmooth(val.transpose()).transpose() # Kalaman Filtering
        predSeq_kf[0,i]=k_smth
        #InSeq[0,i]=s_in
        yLPF = filtfilt(fb, fa, val.transpose()).transpose()
        predSeq_lpf[0,i]=yLPF        
        
        YtestOrg[0,i]=Youttest[i]

        iCC,irMSE=EvalMetric(val,np.squeeze(Youttest[i]))
        tCC.append(iCC)
        trMSE.append(irMSE)

        iCC,irMSE=EvalMetric(mlpg(val, D, W),np.squeeze(Youttest[i]))
        tCC_MLPG.append(iCC)
        trMSE_MLPG.append(irMSE)  
Exemplo n.º 21
0
def benchmark_mlpg(static_dim=59, T=100, batch_size=10, use_cuda=True):
    if use_cuda and not torch.cuda.is_available():
        return

    windows = _get_windows_set()[-1]
    np.random.seed(1234)
    torch.manual_seed(1234)
    means = np.random.rand(T, static_dim * len(windows)).astype(np.float32)
    variances = np.ones(static_dim * len(windows))
    reshaped_means = G.reshape_means(means, static_dim)

    # Ppseud target
    y = G.mlpg(means, variances, windows).astype(np.float32)

    # Pack into variables
    means = Variable(torch.from_numpy(means), requires_grad=True)
    reshaped_means = Variable(torch.from_numpy(reshaped_means),
                              requires_grad=True)
    y = Variable(torch.from_numpy(y), requires_grad=False)
    criterion = nn.MSELoss()

    # Case 1: MLPG
    since = time.time()
    for _ in range(batch_size):
        y_hat = AF.mlpg(means, torch.from_numpy(variances), windows)
        L = criterion(y_hat, y)
        assert np.allclose(y_hat.data.numpy(), y.data.numpy())
        L.backward()  # slow!
    elapsed_mlpg = time.time() - since

    # Case 2: UnitVarianceMLPG
    since = time.time()
    if use_cuda:
        y = y.cuda()
    R = G.unit_variance_mlpg_matrix(windows, T)
    R = torch.from_numpy(R)
    # Assuming minibatch are zero-ppaded, we only need to create MLPG matrix
    # per-minibatch, not per-utterance.
    if use_cuda:
        R = R.cuda()
    for _ in range(batch_size):
        if use_cuda:
            means = means.cpu()
            means = means.cuda()

        y_hat = AF.unit_variance_mlpg(R, means)
        L = criterion(y_hat, y)
        assert np.allclose(y_hat.cpu().data.numpy(),
                           y.cpu().data.numpy(),
                           atol=1e-5)
        L.backward()
    elapsed_unit_variance_mlpg = time.time() - since

    ratio = elapsed_mlpg / elapsed_unit_variance_mlpg

    print(
        "MLPG vs UnitVarianceMLPG (static_dim, T, batch_size, use_cuda) = ({}):"
        .format((static_dim, T, batch_size, use_cuda)))
    if ratio > 1:
        s = "faster"
        sys.stdout.write(OKGREEN)
    else:
        s = "slower"
        sys.stdout.write(FAIL)
    print(
        "UnitVarianceMLPG, {:4f} times {}. Elapsed times {:4f} / {:4f}".format(
            ratio, s, elapsed_mlpg, elapsed_unit_variance_mlpg))

    print(ENDC)
Exemplo n.º 22
0
rmse = 0
with torch.no_grad():
    policy.eval()
    mean, var = policy(inputs, length)
    m = mean.detach().to('cpu').numpy()
    v = var.detach().to('cpu').numpy()
    dm = librosa.feature.delta(m, width=9, order=1, axis=1)
    ddm = librosa.feature.delta(m, width=9, order=2, axis=1)
    dv = librosa.feature.delta(v, width=9, order=1, axis=1)
    dv = 2 * v + dv
    dv = np.where(dv <= 0, 1e-10, dv)
    ddv = librosa.feature.delta(dv, width=9, order=1, axis=1)
    ddv = 2 * dv + ddv
    ddv = np.where(ddv <= 0, 1e-10, ddv)

    m = np.concatenate((m, dm, ddm), axis=2)
    v = np.concatenate((v, dv, ddv), axis=2)
    action = np.zeros((target.shape[0], length[0], OUT_SIZE))
    for i in range(target.shape[0]):
        action[i] = G.mlpg(m[i], v[i], windows)
    action = torch.from_numpy(np.asarray(action, dtype=np.float32)).to(device)
    action = torch.clamp(action, min=0.0, max=1.0)[:, :, :24]

    loss = F.mse_loss(action, target, reduction='none')
    loss = loss.mean(dim=1)
    #print(torch.sqrt(loss.mean(dim=1)))
    #print(torch.sqrt(loss.mean()).item())
    act_dist = torch.sqrt(loss.mean(dim=0)).to('cpu').unsqueeze(dim=0).numpy()
    with open('log/eval.csv', 'a') as f:
        np.savetxt(f, act_dist, delimiter=',')
Exemplo n.º 23
0
    tCC = []
    trMSE = []
    tCC_MLPG = []
    trMSE_MLPG = []
    tCC_KM = []
    trMSE_KM = []
    tCC_LPF = []
    trMSE_LPF = []
    for i in np.arange(0, len(Youttest)):
        s_in = X_testseq[i]
        #s_in=s_in[np.newaxis,:,0:inputDim]
        val = np.squeeze(model.predict(s_in))
        predSeq_wom[0, i] = val

        Dvar = np.tile(np.var(val, axis=0), (val.shape[0], 1))
        predSeq_wm[0, i] = mlpg(val, Dvar, windows)  #MLPG

        k_smth = kalmansmooth(val.transpose()).transpose()  # Kalaman Filtering
        predSeq_kf[0, i] = k_smth
        #InSeq[0,i]=s_in
        yLPF = filtfilt(fb, fa, val.transpose()).transpose()
        predSeq_lpf[0, i] = yLPF

        YtestOrg[0, i] = Youttest[i]

        iCC, irMSE = EvalMetric(val, np.squeeze(Youttest[i]))
        tCC.append(iCC)
        trMSE.append(irMSE)

        iCC, irMSE = EvalMetric(mlpg(val, Dvar, windows),
                                np.squeeze(Youttest[i]))