def generate_audio(): receptive_field_width_steps = 5 batch_size = 1 max_dilation = max(params.residual_conv_dilations) target_width = receptive_field_width_steps padded_input_width = receptive_field_width_steps + max_dilation # quantized signals generated by WaveNet generated_quantized_audio = np.zeros((padded_input_width, ), dtype=np.int32) for time_step in xrange(200): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[ -padded_input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image( padded_quantized_x_batch, quantized_channels=params.audio_channels) # generate next signal softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.random.choice(np.arange( params.audio_channels), p=softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) print generated_quantized_audio
def main(): # compute required input width num_layers = len(params.residual_conv_channels) receptive_steps_per_unit = params.residual_conv_filter_width**num_layers receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1 input_width = receptive_steps # padding for causal conv block input_width += len(params.causal_conv_channels) # quantized signals generated by WaveNet generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32) for time_step in xrange(1000): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[ -input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image( padded_quantized_x_batch, quantization_steps=params.quantization_steps) # generate next signal softmax = wavenet.forward_one_step(padded_x_batch, apply_softmax=True, as_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.random.choice(np.arange( params.quantization_steps), p=softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) print generated_quantized_signal,
def generate_audio(): # compute receptive field width learnable_steps = 1 batch_size = 1 num_layers = len(params.residual_conv_channels) receptive_steps_per_unit = params.residual_conv_filter_width ** num_layers receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1 target_width = learnable_steps input_width = receptive_steps # to compute all learnable targets input_width += learnable_steps - 1 ## padding for causal conv block input_width += len(params.causal_conv_channels) # quantized signals generated by WaveNet generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32) start = time.time() for time_step in xrange(9): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[-input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantization_steps=params.quantization_steps) # generate next signal softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.argmax(softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) print generated_quantized_signal, print generated_quantized_audio print time.time() - start wavenet.prev_causal_outputs = None wavenet.prev_residual_outputs_out = None wavenet.prev_residual_outputs_z = None generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32) start = time.time() for time_step in xrange(9): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[-input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantization_steps=params.quantization_steps) # generate next signal softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.argmax(softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) print generated_quantized_signal, print generated_quantized_audio print time.time() - start
def generate_audio(sampling_rate=48000, generate_sec=1, remove_silence_frames=False): batch_size = 1 # compute required input width num_layers = len(params.residual_conv_channels) receptive_steps_per_unit = params.residual_conv_filter_width ** num_layers receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1 input_width = receptive_steps # padding for causal conv block input_width += len(params.causal_conv_channels) # quantized signals generated by WaveNet generated_quantized_audio = np.zeros((input_width, ), dtype=np.int32) start_time = time.time() for time_step in xrange(1, int(sampling_rate * generate_sec)): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[-input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantization_steps=params.quantization_steps) # generate next signal if args.use_faster_wavenet: softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True) else: softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.random.choice(np.arange(params.quantization_steps), p=softmax) if generated_quantized_signal == 0 and remove_silence_frames: pass else: generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) if time_step % 10 == 0: sys.stdout.write("\rgenerating {:.2f} msec / {:.2f} msec".format(time_step * 1000.0 / sampling_rate, generate_sec * 1000.0)) sys.stdout.flush() print "\ndone in {:.3f} sec".format(time.time() - start_time) # remove zero paddings generated_quantized_audio = generated_quantized_audio[input_width:] try: os.mkdir(args.generate_dir) except: pass filename = "{}/generated.wav".format(args.generate_dir) data.save_audio_file(filename, generated_quantized_audio, params.quantization_steps, format="16bit_pcm", sampling_rate=sampling_rate)
def generate_audio(sampling_rate=48000, generate_sec=1, remove_silence_frames=False): # compute required input width num_layers = len(params.residual_conv_channels) receptive_steps_per_unit = params.residual_conv_filter_width ** num_layers receptive_steps = (receptive_steps_per_unit - 1) * params.residual_num_blocks + 1 input_width = receptive_steps # add paddings of causal conv block input_width += len(params.causal_conv_channels) # pad with silence signals generated_signals = np.full((input_width, ), 127, dtype=np.int32) start_time = time.time() for time_step in xrange(1, int(sampling_rate * generate_sec)): # signals in receptive field input_signals = generated_signals[-input_width:].reshape((1, -1)) # convert to image input_signals = data.onehot_pixel_image(input_signals, quantization_steps=params.quantization_steps) # generate next signal if args.fast: softmax = wavenet._forward_one_step(input_signals, apply_softmax=True, as_numpy=True) else: softmax = wavenet.forward_one_step(input_signals, apply_softmax=True, as_numpy=True) softmax = softmax[0, :, 0, -1] signal = np.random.choice(np.arange(params.quantization_steps), p=softmax) if signal == 127 and remove_silence_frames: pass else: generated_signals = np.append(generated_signals, [signal], axis=0) if time_step % 10 == 0: sys.stdout.write("\rgenerating {:.2f} msec / {:.2f} msec".format(time_step * 1000.0 / sampling_rate, generate_sec * 1000.0)) sys.stdout.flush() print "\ndone in {:.3f} sec".format(time.time() - start_time) # remove paddings generated_signals = generated_signals[input_width:] try: os.mkdir(args.output_dir) except: pass filename = "{}/generated.wav".format(args.output_dir) data.save_audio_file(filename, generated_signals, params.quantization_steps, format="16bit_pcm", sampling_rate=sampling_rate)
def generate_audio(): receptive_field_width_steps = 5 batch_size = 1 max_dilation = max(params.residual_conv_dilations) target_width = receptive_field_width_steps padded_input_width = receptive_field_width_steps + max_dilation # quantized signals generated by WaveNet generated_quantized_audio = np.mod(np.arange(1, padded_input_width + 1), 6).astype(np.int32) start = time.time() for time_step in xrange(500): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[-padded_input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantized_channels=params.audio_channels) # generate next signal softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.argmax(softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) print generated_quantized_audio print time.time() - start wavenet.prev_causal_outputs = None wavenet.prev_residual_outputs_out = None wavenet.prev_residual_outputs_z = None generated_quantized_audio = np.mod(np.arange(1, padded_input_width + 1), 6).astype(np.int32) start = time.time() for time_step in xrange(500): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[-padded_input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantized_channels=params.audio_channels) # generate next signal softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.argmax(softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) print generated_quantized_audio print time.time() - start
def generate_audio(receptive_field_width_ms=25, sampling_rate=48000, generate_duration_sec=1): # e.g. # 48000 Hz * 0.25 = 12000 time steps (= 250 milliseconds receptive field) receptive_steps = int(sampling_rate * receptive_field_width_ms / 1000.0) # compute required input width batch_size = 1 max_dilation = max(params.residual_conv_dilations) target_width = receptive_steps padded_input_width = receptive_steps + max_dilation * (params.residual_conv_kernel_width - 1) # quantized signals generated by WaveNet generated_quantized_audio = np.zeros((padded_input_width, ), dtype=np.int32) start_time = time.time() for time_step in xrange(1, int(sampling_rate * generate_duration_sec)): # quantized signals in receptive field padded_quantized_x_batch = generated_quantized_audio[-padded_input_width:].reshape((1, -1)) # convert to image padded_x_batch = data.onehot_pixel_image(padded_quantized_x_batch, quantized_channels=params.audio_channels) # generate next signal if args.use_faster_wavenet: softmax = wavenet._forward_one_step(padded_x_batch, softmax=True, return_numpy=True) else: softmax = wavenet.forward_one_step(padded_x_batch, softmax=True, return_numpy=True) softmax = softmax[0, :, 0, -1] generated_quantized_signal = np.random.choice(np.arange(params.audio_channels), p=softmax) generated_quantized_audio = np.append(generated_quantized_audio, [generated_quantized_signal], axis=0) if time_step % 10 == 0: sys.stdout.write("\rgenerating {:.2f} msec / {:.2f} msec".format(time_step * 1000.0 / sampling_rate, generate_duration_sec * 1000.0)) sys.stdout.flush() print "\ndone in {:.3f} sec".format(time.time() - start_time) # remove zero paddings generated_quantized_audio = generated_quantized_audio[padded_input_width:] try: os.mkdir(args.generate_dir) except: pass filename = "{}/generated.wav".format(args.generate_dir) data.save_audio_file(filename, generated_quantized_audio, params.audio_channels, format="16bit_pcm", sampling_rate=sampling_rate)
from scipy.io import wavfile import numpy as np import os, sys sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../"))) from args import args from model import params, wavenet input_size = 5 batchsize = 3 data = np.arange(1, batchsize * params.audio_channels * input_size + 1).reshape( (batchsize, params.audio_channels, 1, input_size)).astype(np.float32) wavenet.forward_one_step(data)