Exemplo n.º 1
0
num_blocks = int(sys.argv[2])
num_layers = int(sys.argv[3])
num_hidden = int(sys.argv[4])
duration = int(sys.argv[5])

Quantification = 256
if (duration * sample_rate / 2) % 2 == 0:
    num_time_samples = int(sample_rate * duration)
else:
    num_time_samples = int(sample_rate * duration) - 1

#Initialize the model before restoring it
model = Model(num_time_samples=num_time_samples,
              num_channels=1,
              gpu_fraction=1.0,
              num_classes=Quantification,
              num_blocks=num_blocks,
              num_layers=num_layers,
              num_hidden=num_hidden)

#Restoring the model
model.restore()

#Creating the Generator to make the prediction
generator = Generator(model)

seed = np.random.uniform(low=-1.0, high=1.0)

random_input = [[seed]]
random_inputs = []
#for i in range(num_hidden-1):
Exemplo n.º 2
0
                        type=bool,
                        default=False,
                        help='whether to resume training existing models')

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = set_args()

    # construct model
    wave_model = Model(args.x_len,
                       num_channels=1,
                       num_classes=args.num_classes,
                       num_blocks=args.num_blocks,
                       num_layers=args.num_layers,
                       num_hidden=args.num_hidden,
                       kernel_size=args.kernel_size)

    if not (args.device == 'default'):
        wave_model.set_device(torch.device(args.device))

    # create dataset and dataloader
    filelist = list_files(args.data)
    dataset = AudioData(filelist,
                        args.x_len,
                        y_len=wave_model.output_width - 1,
                        num_classes=args.num_classes,
                        store_tracks=True)
Exemplo n.º 3
0
parser.add_argument('--stopping_loss',
                    type=float,
                    default=0.1,
                    help='loss at which training stops')
FLAGS, unparsed = parser.parse_known_args()

SAMPLE_RATE = 24000

inputs, targets = make_batch('assets/SMvocals.wav')
num_time_samples = inputs.shape[1]
num_channels = 1
gpu_fraction = 1

model = Model(num_time_samples=num_time_samples,
              num_channels=num_channels,
              gpu_fraction=gpu_fraction,
              num_layers=FLAGS.num_layers or 5,
              learning_rate=FLAGS.learning_rate,
              stopping_loss=FLAGS.stopping_loss)

tic = time()
model.train(inputs, targets)
toc = time()

print('Training took {} seconds.'.format(toc - tic))

generator = Generator(model)

# Get first sample of input
input_ = inputs[:, 0:1, 0]

tic = time()
Exemplo n.º 4
0
from IPython.display import Audio

#get_ipython().magic(u'matplotlib inline')


# In[ ]:


inputs, targets = make_batch('assets/voice.wav')
num_time_samples = inputs.shape[1]
num_channels = 1
gpu_fraction = 1.0

model = Model(num_time_samples=num_time_samples,
              num_channels=num_channels,
              gpu_fraction=gpu_fraction)

Audio(inputs.reshape(inputs.shape[1]), rate=44100)


# In[ ]:


tic = time()
model.train(inputs, targets)
toc = time()

print('Training took {} seconds.'.format(toc-tic))

Exemplo n.º 5
0
from time import time

from wavenet.utils import make_batch
from wavenet.models import Model, Generator

num_channels = 1
gpu_fraction = 1.0
num_classes = 2048

inputs, targets = make_batch('assets/voice.wav', num_classes)
num_time_samples = inputs.shape[1]

print inputs.shape, targets.shape, num_time_samples
model = Model(  #num_time_samples=num_time_samples,
    num_channels=num_channels,
    gpu_fraction=gpu_fraction,
    num_classes=num_classes,
    prob_model_type='softmax')

tic = time()
model.train(inputs, targets)
toc = time()
print('Training took {} seconds.'.format(toc - tic))
Exemplo n.º 6
0
duration = 2
if (duration * sample_rate / 2) % 2 == 0:
    num_time_samples = int(sample_rate * duration)
else:
    num_time_samples = int(sample_rate * duration) - 1
num_channels = 1
gpu_fraction = 1.0
num_classes = Quantification
num_blocks = 2
num_layers = 12
num_hidden = 256

model = Model(num_time_samples=num_time_samples,
              num_channels=num_channels,
              gpu_fraction=gpu_fraction,
              num_classes=num_classes,
              num_blocks=num_blocks,
              num_layers=num_layers,
              num_hidden=num_hidden)

inputlist = []
targetlist = []

for w in WavList:

    path = 'assets/' + w + '.wav'
    inputs, targets = make_batch(path, sample_rate, duration=duration)
    inputlist.append(inputs)
    targetlist.append(targets)

inputlist = np.stack(inputlist)
Exemplo n.º 7
0
def main():
    ##############################
    # Get args
    args = get_arguments()
    ##############################

    ##############################
    # Build data chunk
    config_str = "_".join([
        str(args.sample_rate),
        str(args.sample_size),
        str(args.sliding_ratio),
        str(args.silence_threshold)
    ])
    files_dir = args.data_dir
    npy_dir = files_dir + '/' + config_str
    lock_file_db = files_dir + '/lock'
    # Check if exists
    while (os.path.isfile(lock_file_db)):
        # Wait for the end of construction by another process
        time.sleep(1)
    if not os.path.isdir(npy_dir):
        try:
            # Build if not
            ff = open(lock_file_db, 'w')
            build_db.main(files_dir, npy_dir, args.sample_rate,
                          args.sample_size, args.sliding_ratio,
                          args.silence_threshold)
            ff.close()
        except:
            shutil.rmtree(npy_dir)
        finally:
            os.remove(lock_file_db)
        # data_statistics.bar_activations(save_dir, save_dir, sample_size_padded)
    ##############################

    ##############################
    # Init dirs
    utils.init_directory(args.logdir_root)
    if args.summary:
        logdir_summary = os.path.join(args.logdir_root, 'summary')
        utils.init_directory(logdir_summary)
    # Save
    logdir_save = os.path.join(args.logdir_root, 'save')
    # Wave
    logdir_wav = os.path.join(args.logdir_root, 'wav')
    utils.init_directory(logdir_wav)
    ##############################

    ##############################
    # Get Data and Split them
    # Get list of data chunks
    chunk_list = build_db.find_files(npy_dir, pattern="*.npy")
    # To always have the same train/validate split, init the random seed
    random.seed(210691)
    random.shuffle(chunk_list)
    # Adapt batch_size if we have very few files
    num_chunk = len(chunk_list)
    batch_size = min(args.batch_size, num_chunk)
    # Split 90 / 10
    training_chunks = chunk_list[:int(0.9 * num_chunk)]
    valid_chunks = chunk_list[int(0.9 * num_chunk):]
    ##############################

    ##############################
    # Create network
    import time
    ttt = time.time()
    model = Model(num_time_samples=args.sample_size,
                  num_channels=1,
                  num_classes=args.q_levels,
                  num_blocks=args.num_blocks,
                  num_layers=args.num_layers,
                  num_hidden=args.num_hidden,
                  filter_width=args.filter_width,
                  gpu_fraction=0.9)
    print("TTT: Instanciate network : {}".format(time.time() - ttt))
    ##############################

    ##############################
    # Train
    tic = time.time()
    model.train(training_chunks, valid_chunks, args.batch_size,
                args.valid_freq, args.generate_freq)
    toc = time.time()
    print('Training took {} seconds.'.format(toc - tic))
    ##############################

    ##############################
    # Generate
    generator = Generator(model)
    # Get first sample of input
    input_ = inputs[:, 0:1, 0]
    tic = time()
    predictions = generator.run(input_, 32000)
    toc = time()
    print('Generating took {} seconds.'.format(toc - tic))
    ##############################
    return
Exemplo n.º 8
0
model_path = 'networks/snare_20/'
wav_template = 'rendered/snare_20/final/{}.wav'
samples_path = 'data/drum_samples'

len_sample = 0.2 # [s] # 0.1s hi-hat_20, 0.2s snare
len_sample = int(16000 * len_sample)

# Load samples
sorted_f = pickle.load(open(samples_path, 'rb'))
samples = sorted_f['Snare']

# j = 450   # kick_20
# j = 840 # hihat_20
j = 948 # snare_20

init_values = [load_sample(s, len_sample)[0] for s in samples[j:j+20]]

init_values = np.array(init_values)[:,0,0,0]

print init_values.shape
# Create the model
model = Model(num_time_samples=len_sample,
              num_channels=1,
              gpu_fraction=1.0)
last_epoch = model.load_model(model_path)

for i in xrange(1000):
    # Pick a random start
    init_value = np.random.choice(init_values)
    generator = Generator(model)
    generate_sample(generator, len_sample, wav_template.format(i), init_value = init_value)
Exemplo n.º 9
0
# Sample random samples
for i in range(1):
    # j = 450   # kick_20
    # j = 840 # hihat_20
    j = 948  # snare_20

    batches = [load_sample(s, len_sample) for s in samples[j:j + 20]]
    inputs = np.array([b[0] for b in batches])
    targets = np.array([b[1] for b in batches])
    print 'Loaded {} samples'.format(len(batches))

    all_inputs = inputs[:, 0, :, 0].flatten()
    save_sample(all_inputs, 'rendered/snare_20/inputs_{}.wav'.format(j))

# Create the model
model = Model(num_time_samples=len_sample, num_channels=1, gpu_fraction=1.0)
last_epoch = model.load_model(model_path)
if last_epoch:
    start = last_epoch + 1
else:
    start = 0

inds = np.arange(len(batches))

for i in xrange(start, 10000000):
    # Shuffle
    random.shuffle(inds)

    inputs = inputs[inds]
    targets = targets[inds]
Exemplo n.º 10
0
                     num_classes=args.num_classes,
                     store_tracks=False,
                     class_label=idx)
    return temp


if __name__ == '__main__':
    args = load_parameters()

    # construct model
    wave_model = Model(
        layers=args.num_layers,
        num_classes=args.num_classes,
        blocks=args.num_blocks,
        kernel_size=args.kernel_size,
        dilation_channels=args.dilation_channels,
        residual_channels=args.residual_channels,
        skip_channels=args.skip_channels,
        end_channels=args.end_channels,
        bias=args.bias,
        output_length=args.output_width,
    )
    if torch.cuda.is_available():
        args.device = 'cuda'

    if not (args.device == 'default'):
        wave_model.set_device(torch.device(args.device))

    # create dataset and dataloader
    datasets = []
    meta = []
    for idx, label in enumerate(args.class_labels):