示例#1
0
fout = open(out_file, 'wb')

skip = order + 1
for c in range(0, nb_frames):
    cfeat = enc.predict([features[c:c+1, :, :nb_used_features], periods[c:c+1, :, :]])
    for fr in range(0, feature_chunk_size):
        f = c*feature_chunk_size + fr
        a = features[c, fr, nb_features-order:]
        for i in range(skip, frame_size):
            pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
            fexc[0, 0, 1] = lin2ulaw(pred)

            p, state1, state2 = dec.predict([fexc, iexc, cfeat[:, fr:fr+1, :], state1, state2])
            #Lower the temperature for voiced frames to reduce noisiness
            p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 37] - .5))
            p = p/(1e-18 + np.sum(p))
            #Cut off the tail of the remaining distribution
            p = np.maximum(p-0.002, 0).astype('float64')
            p = p/(1e-8 + np.sum(p))

            iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0,0,:], 1))
            pcm[f*frame_size + i] = pred + ulaw2lin(iexc[0, 0, 0])
            fexc[0, 0, 0] = lin2ulaw(pcm[f*frame_size + i])
            mem = coef*mem + pcm[f*frame_size + i]
            #print(mem)
            np.array([np.round(mem)], dtype='int16').tofile(fout)
        skip = 0


示例#2
0
fout = open(out_file, 'wb')

skip = order + 1
for c in range(0, nb_frames):
    cfeat = enc.predict(
        [features[c:c + 1, :, :nb_used_features], periods[c:c + 1, :, :]])
    for fr in range(0, feature_chunk_size):
        f = c * feature_chunk_size + fr
        a = features[c, fr, nb_features - order:]
        for i in range(skip, frame_size):
            pred = -sum(a * pcm[f * frame_size + i - 1:f * frame_size + i -
                                order - 1:-1])
            fexc[0, 0, 1] = lin2ulaw(pred)

            p, state1, state2 = dec.predict(
                [fexc, cfeat[:, fr:fr + 1, :], state1, state2])
            #Lower the temperature for voiced frames to reduce noisiness
            p *= np.power(p, np.maximum(0, 1.5 * features[c, fr, 37] - .5))
            p = p / (1e-18 + np.sum(p))
            #Cut off the tail of the remaining distribution
            p = np.maximum(p - 0.002, 0).astype('float64')
            p = p / (1e-8 + np.sum(p))

            fexc[0, 0, 2] = np.argmax(np.random.multinomial(1, p[0, 0, :], 1))
            pcm[f * frame_size + i] = pred + ulaw2lin(fexc[0, 0, 2])
            fexc[0, 0, 0] = lin2ulaw(pcm[f * frame_size + i])
            mem = coef * mem + pcm[f * frame_size + i]
            #print(mem)
            np.array([np.round(mem)], dtype='int16').tofile(fout)
        skip = 0
示例#3
0
])
#noise = np.round(np.concatenate([np.zeros((len(data)*1//5)), np.random.laplace(0, 1.2, len(data)*1//5), np.random.laplace(0, .77, len(data)*1//5), np.random.laplace(0, .33, len(data)*1//5), np.random.randint(-1, 1, len(data)*1//5)]))
in_data = in_data + noise
in_data = np.clip(in_data, 0, 255)

features = np.reshape(features, (nb_frames * feature_chunk_size, nb_features))

# Note: the LPC predictor output is now calculated by the loop below, this code was
# for an ealier version that implemented the prediction filter in C

upred = np.zeros((nb_frames * pcm_chunk_size, ), dtype='int16')

# Use 16th order LPC to generate LPC prediction output upred[] and (in
# mu-law form) pred[]

pred_in = ulaw2lin(in_data)
for i in range(2, nb_frames * feature_chunk_size):
    upred[i * frame_size:(i + 1) * frame_size] = 0
    for k in range(16):
        upred[i*frame_size:(i+1)*frame_size] = upred[i*frame_size:(i+1)*frame_size] - \
            pred_in[i*frame_size-k:(i+1)*frame_size-k]*features[i, nb_features-16+k]

pred = lin2ulaw(upred)

in_data = np.reshape(in_data, (nb_frames, pcm_chunk_size, 1))
in_data = in_data.astype('uint8')

# LPC residual, which is the difference between the input speech and
# the predictor output, with a slight time shift this is also the
# ideal excitation in_exc
示例#4
0
parser.add_argument('--nb_samples',
                    type=int,
                    default=-1,
                    help='Optional number of samples to plot')
args = parser.parse_args()

data = np.fromfile(args.file1, dtype='uint8')
nb_samples = args.nb_samples
data = data[:nb_samples]

sig = np.array(data[0::4], dtype='float')
pred = np.array(data[1::4], dtype='float')
in_exc = np.array(data[2::4], dtype='float')
out_exc = np.array(data[3::4], dtype='float')

print("exc var: %4.3e" % (np.var(ulaw.ulaw2lin(in_exc))))

plt.figure(1)
plt.subplot(211)
plt.plot(ulaw.ulaw2lin(sig), label='sig')
plt.ylim((-30000, 30000))
plt.legend()
plt.subplot(212)
plt.plot(ulaw.ulaw2lin(pred), label='pred')
plt.ylim((-30000, 30000))
plt.legend()
plt.show(block=False)

plt.figure(2)
plt.subplot(211)
plt.plot(ulaw.ulaw2lin(in_exc), label='in_exc')
示例#5
0
def synthesis(args, hparams):
    model = LPCNet(hparams).cuda()
    feature_file = args.feature_file;
    out_file = args.out_file

    frame_size = hparams.frame_size
    nb_features = hparams.nb_features

    features = np.fromfile(feature_file, dtype='float32')
    features = features.reshape(-1, nb_features)
    #features = np.resize(features, (-1, nb_features)) #使用resize会导致最后一行数据丢失

    nb_frames = 1

    feature_chunk_size = features.shape[0]
    pcm_chunk_size = frame_size * feature_chunk_size
    features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features))
    periods = (.1 + 50*features[:,:,hparams.pitch_idx:hparams.pitch_idx+1]+100).astype('int16')

    if None == hparams.checkpoint_file or not os.path.isfile(hparams.checkpoint_file):
        return
    checkpoint_dict = torch.load(hparams.checkpoint_file)
    model.load_state_dict(checkpoint_dict['state_dict'])

    #model_init(model)

    model.eval()

    order = 16

    pcm = np.zeros((nb_frames * pcm_chunk_size,))
    fexc = np.zeros((1, 1, 2), dtype='float32')
    iexc = np.zeros((1, 1, 1), dtype='int16')
    state1 = torch.Tensor(np.zeros((1, 1, hparams.rnn_units1), dtype='float32')).cuda()
    state2 = torch.Tensor(np.zeros((1, 1, hparams.rnn_units2), dtype='float32')).cuda()

    mem = 0
    coef = 0.85

    fout = open(out_file, "wb")
    skip = order + 1

    for c in range(0, nb_frames):
        cfeat = model.encoder(features[c:c+1, :, :nb_features], periods[c:c+1, :, :])
        fexc[0, 0, 0] = 128  # 0 mulaw
        iexc[0, 0, 0] = 128
        for fr in range(0, feature_chunk_size):
            f = c * feature_chunk_size + fr
            a = features[c, fr, nb_features - order:]
            for i in range(skip, frame_size):
                pred = -sum(a*pcm[f*frame_size + i - 1:f*frame_size + i - order-1:-1])
                fexc[0, 0, 1] = lin2ulaw(pred)

                p_tensor, state1, state2 = model.decoder(fexc, iexc, cfeat[:, fr:fr+1, :], state1, state2)
                p = p_tensor.clone().cpu().detach().numpy()
                # Lower the temperature for voiced frames to reduce noisiness
                p *= np.power(p, np.maximum(0, 1.5 * features[c, fr, hparams.pitch_idx+1] - .5))
                p = p / (1e-18 + np.sum(p))
                # Cut off the tail of the remaining distribution
                p = np.maximum(p - 0.002, 0).astype('float64')
                p = p / (1e-8 + np.sum(p))

                iexc[0, 0, 0] = np.argmax(np.random.multinomial(1, p[0, 0, :], 1))
                pcm[f * frame_size + i] = pred + ulaw2lin(iexc[0, 0, 0])
                fexc[0, 0, 0] = lin2ulaw(pcm[f * frame_size + i])
                mem = coef * mem + pcm[f * frame_size + i]
                # print(mem)
                np.array([np.round(mem)], dtype='int16').tofile(fout)
            skip = 0