Пример #1
0
def get_subseq(dataset, batch_size, seq_len, overlap, q_type, q_levels):
    for batch in dataset:
        batch = quantize(batch, q_type, q_levels)
        num_samps = len(batch[0])
        for i in range(overlap, num_samps, seq_len):
            x = batch[:, i - overlap:i + seq_len]
            y = x[:, overlap:overlap + seq_len]
            yield (x, y)
Пример #2
0
def generate(path,
             ckpt_path,
             config,
             num_seqs=NUM_SEQS,
             dur=OUTPUT_DUR,
             sample_rate=SAMPLE_RATE,
             temperature=SAMPLING_TEMPERATURE,
             seed=None,
             seed_offset=None):
    model = create_inference_model(ckpt_path, num_seqs, config)
    q_type = model.q_type
    q_levels = model.q_levels
    q_zero = q_levels // 2
    num_samps = dur * sample_rate
    temperature = get_temperature(temperature, num_seqs)
    # Precompute sample sequences, initialised to q_zero.
    samples = []
    init_samples = np.full((model.batch_size, model.big_frame_size, 1), q_zero)
    # Set seed if provided.
    if seed is not None:
        seed_audio = load_seed_audio(seed, seed_offset, model.big_frame_size)
        seed_audio = tf.convert_to_tensor(seed_audio)
        init_samples[:, :model.big_frame_size, :] = quantize(
            seed_audio, q_type, q_levels)
    init_samples = tf.constant(init_samples, dtype=tf.int32)
    samples.append(init_samples)
    print_progress_every = NUM_FRAMES_TO_PRINT * model.big_frame_size
    start_time = time.time()
    for i in range(0, num_samps // model.big_frame_size):
        t = i * model.big_frame_size
        # Generate samples
        frame_samples = model(samples[i],
                              training=False,
                              temperature=temperature)
        samples.append(frame_samples)
        # Monitor progress
        if t % print_progress_every == 0:
            end = min(t + print_progress_every, num_samps)
            step_dur = time.time() - start_time
            print(
                f'Generated samples {t+1} - {end} of {num_samps} (time elapsed: {step_dur:.3f} seconds)'
            )
    samples = tf.concat(samples, axis=1)
    samples = samples[:, model.big_frame_size:, :]
    # Save sequences to disk
    path = path.split('.wav')[0]
    for i in range(model.batch_size):
        seq = np.reshape(samples[i], (-1, 1))[model.big_frame_size:].tolist()
        audio = dequantize(seq, q_type, q_levels)
        file_name = '{}_{}'.format(path,
                                   str(i)) if model.batch_size > 1 else path
        file_name = '{}.wav'.format(file_name)
        write_wav(file_name, audio, sample_rate)
        print('Generated sample output to {}'.format(file_name))
    print('Done')
Пример #3
0
 def train_step(inputs):
     with tf.GradientTape() as tape:
         inputs = quantize(inputs, q_type, q_levels)
         raw_output = model(inputs, training=True)
         prediction = tf.reshape(raw_output, [-1, q_levels])
         target = tf.reshape(inputs[:, model.big_frame_size:, :], [-1])
         loss = tf.reduce_mean(
             tf.nn.sparse_softmax_cross_entropy_with_logits(
                 logits=prediction, labels=target))
     grads = tape.gradient(loss, model.trainable_variables)
     grads, _ = tf.clip_by_global_norm(grads, 5.0)
     opt.apply_gradients(list(zip(grads, model.trainable_variables)))
     train_accuracy.update_state(target, prediction)
     return loss
Пример #4
0
def generate_and_save_samples(model, path, seed, seed_offset=0, dur=OUTPUT_DUR,
                              sample_rate=SAMPLE_RATE, temperature=SAMPLING_TEMPERATURE):
    q_type = model.q_type
    q_levels = model.q_levels
    q_zero = q_levels // 2
    num_samps = dur * sample_rate

    # Precompute sample sequences, initialised to q_zero.
    samples = np.full((model.batch_size, model.big_frame_size + num_samps, 1), q_zero, dtype='int32')

    # Set seed if provided.
    if seed is not None:
        seed_audio = load_seed_audio(seed, seed_offset, model.big_frame_size)
        samples[:, :model.big_frame_size, :] = quantize(seed_audio, q_type, q_levels)

    print_progress_every = 250
    start_time = time.time()

    # Run the model tiers. Generates a single sample per step. Each frame-level tier
    # consumes one frame of samples per step.
    for t in range(model.big_frame_size, model.big_frame_size + num_samps):

        # Top tier (runs every big_frame_size steps)
        if t % model.big_frame_size == 0:
            inputs = samples[:, t - model.big_frame_size : t, :].astype('float32')
            big_frame_outputs = model.big_frame_rnn(inputs)

        # Middle tier (runs every frame_size steps)
        if t % model.frame_size == 0:
            inputs = samples[:, t - model.frame_size : t, :].astype('float32')
            big_frame_output_idx = (t // model.frame_size) % (
                model.big_frame_size // model.frame_size
            )
            frame_outputs = model.frame_rnn(
                inputs,
                conditioning_frames=unsqueeze(big_frame_outputs[:, big_frame_output_idx, :], 1))

        # Sample level tier (runs once per step)
        inputs = samples[:, t - model.frame_size : t, :]
        frame_output_idx = t % model.frame_size
        sample_outputs = model.sample_mlp(
            inputs,
            conditioning_frames=unsqueeze(frame_outputs[:, frame_output_idx, :], 1))

        # Generate
        sample_outputs = tf.reshape(sample_outputs, [-1, q_levels])
        generated = sample(sample_outputs, temperature)

        # Monitor progress
        start = t - model.big_frame_size
        if start % print_progress_every == 0:
            end = min(start + print_progress_every, num_samps)
            duration = time.time() - start_time
            template = 'Generating samples {} - {} of {} (time elapsed: {:.3f} seconds)'
            print(template.format(start+1, end, num_samps, duration))

        # Update sequences
        samples[:, t] = np.array(generated).reshape([-1, 1])

    # Save sequences to disk
    path = path.split('.wav')[0]
    for i in range(model.batch_size):
        seq = samples[i].reshape([-1, 1])[model.big_frame_size :].tolist()
        audio = dequantize(seq, q_type, q_levels)
        file_name = '{}_{}'.format(path, str(i)) if model.batch_size > 1 else path
        file_name = '{}.wav'.format(file_name)
        write_wav(file_name, audio, sample_rate)
        print('Generated sample output to {}'.format(file_name))
    print('Done')
Пример #5
0
def generate(model,
             num_seqs=NUM_SEQS,
             dur=OUTPUT_DUR,
             sample_rate=SAMPLE_RATE,
             temperature=SAMPLING_TEMPERATURE,
             seed=None,
             seed_offset=None):
    q_type = model.q_type
    q_levels = model.q_levels
    q_zero = q_levels // 2
    num_samps = dur * sample_rate
    # print("generate()")
    # print(f" num_samps={num_samps}") # 128000
    # print(f" temperature={temperature}")
    temperature = get_temperature(temperature, num_seqs, num_samps, dur)
    # print(f" temperature'.shape={temperature.shape}")
    # Precompute sample sequences, initialised to q_zero.
    samples = []
    init_samples = np.full((model.batch_size, model.big_frame_size, 1), q_zero)
    # Set seed if provided.
    if seed:
        seed_audio = seed
        seed_audio = tf.convert_to_tensor(seed_audio)
        init_samples[:, :model.big_frame_size, :] = quantize(
            seed_audio, q_type, q_levels)
    init_samples = tf.constant(init_samples, dtype=tf.int32)
    samples.append(init_samples)
    # print(f" len(samples)={len(samples)}")
    # print(f" samples[0].shape={samples[0].shape}") # (1,64,1)
    print_progress_every = NUM_FRAMES_TO_PRINT * model.big_frame_size
    start_time = time.time()
    stats = [0.0] * 10
    for i in range(0, num_samps // model.big_frame_size):
        t = i * model.big_frame_size
        # Generate samples
        temp = temperature
        if temp.shape[-1] > 1:
            start = i * model.big_frame_size
            stop = (i + 1) * model.big_frame_size
            temp = temperature[:, start:stop]
        # print(f" temp.shape={temp.shape}")
        gen_start_time = time.time()
        frame_samples = model(samples[i], training=False, temperature=temp)
        # print(f" frame_samples.shape={frame_samples.shape}")
        gen_end_time = time.time()
        samples.append(frame_samples)
        # print(f" len(samples')={len(samples)}")
        del stats[0]
        stats.append(gen_end_time - gen_start_time)
        # Monitor progress
        if t % print_progress_every == 0:
            end = min(t + print_progress_every, num_samps)
            step_dur = time.time() - start_time
            stats_num = min(i + 1, len(stats)) * model.big_frame_size
            stats_dur = sum(stats)
            time_rem = 0
            if stats_dur > 0:
                rate = stats_num / stats_dur
                num_rem = num_samps - t
                time_rem = int(round(num_rem / rate))
            remaining = format_dur(time_rem)
            print(
                f'Generated samples {t+1} - {end} of {num_samps} (time elapsed: {step_dur:.3f} seconds, remaining: {remaining})'
            )
    samples = tf.concat(samples, axis=1)
    samples = samples[:, model.big_frame_size:, :]
    # Save sequences to disk
    for i in range(model.batch_size):
        seq = np.reshape(samples[i], (-1, 1))[model.big_frame_size:].tolist()
        audio = dequantize(seq, q_type, q_levels)
        yield audio.numpy()
    print('Done')