Exemplo n.º 1
0
def char_to_audio(batch_size=1):
    lib.save_params('vctk_nmt_best.pkl')
    phon_to_idx = pickle.load(open('/data/lisa/exp/kumarrit/vctk/phon2code.pkl'))
    idx_to_phon = {x:y for y,x in phon_to_idx.iteritems()}
    itr = vctk_loader.data_loader('test',batch_size,append_tokens=True,conditioning='text')
    count=0
    batch = np.random.choice(500)
    for j in xrange(3):
        for test_spk,test_X,test_X_mask,test_ctx,test_ctx_mask in itr:
            if test_X.shape[1]>400:
                break
    # for i in xrange(random.choice(range(20))):
    #     test_ctx,test_ctx_mask,_,_ = itr.next()

    for i in tqdm(xrange(batch_size)):
        try:
            end_ctx = np.where(test_ctx[i]==0)[0][0]
        except IndexError:
            end_ctx = -1
        try:
            end_X = np.where(test_X_mask[i]==0)[0][0]
        except IndexError:
            end_X = -1
        phons = nmt_fn(test_ctx[i,:end_ctx][None,:],test_ctx_mask[i,:end_ctx][None,:]).flatten()
        try:
            end_idx = np.where(phons==0)[0][0]
            phons = phons[:end_idx].tolist()
        except:
            phons = phons.tolist()
        print ' '.join([idx_to_phon[x] for x in phons])
        pred_X = predict_fn(
              test_spk[i].reshape((1,)),
              np.asarray(phons,dtype=np.int32).reshape((1,-1))
            )
        save(pred_X[0],test_X[i,:end_X],i)
Exemplo n.º 2
0
def generate_and_save_samples(tag):

    costs = []
    for (images,) in test_data():
        out = eval_fn(images, ALPHA_ITERS+1)
        print out
        costs.append(out)
    print "test cost: {}".format(np.mean(costs))
    # return
    lib.save_params(os.path.join(OUT_DIR, tag + "_params.pkl"))
    def save_images(images, filename, i = None):
        """images.shape: (batch, n channels, height, width)"""
        if i is not None:
            new_tag = "{}_{}".format(tag, i)
        else:
            new_tag = tag

        images = images.reshape((10,10,28,28))

        # pickle.save("{}/{}_{}.pkl".format(OUT_DIR, filename, tag))
        # rowx, rowy, height, width -> rowy, height, rowx, width
        images = images.transpose(1,2,0,3)
        images = images.reshape((10*28, 10*28))

        image = scipy.misc.toimage(images, cmin=0.0, cmax=1.0)
        image.save('{}/{}_{}.jpg'.format(OUT_DIR, filename, new_tag))

    latents = np.random.normal(size=(10, LATENT_DIM))
    latents = np.repeat(latents, 10, axis=0)

    latents = latents.astype(theano.config.floatX)

    samples = np.zeros(
        (100, N_CHANNELS, HEIGHT, WIDTH),
        dtype=theano.config.floatX
    )

    next_sample = samples.copy()

    t0 = time.time()
    for j in xrange(HEIGHT):
        for k in xrange(WIDTH):
            for i in xrange(N_CHANNELS):
                samples_p_value = sample_fn(latents, next_sample)
                next_sample = binarize(samples_p_value)
                samples[:, i, j, k] = samples_p_value[:, i, j, k]

    t1 = time.time()
    print("Time taken for generation normally {:.4f}".format(t1 - t0))

    save_images(samples_p_value, 'samples_pval_repeated_')
Exemplo n.º 3
0
def generate_and_save_samples(tag):

    lib.save_params(os.path.join(OUT_DIR, tag + "_params.pkl"))

    def save_images(images, filename, i=None):
        """images.shape: (batch, n channels, height, width)"""
        if i is not None:
            new_tag = "{}_{}".format(tag, i)
        else:
            new_tag = tag

        images = images.reshape((10, 10, 28, 28))

        images = images.transpose(1, 2, 0, 3)
        images = images.reshape((10 * 28, 10 * 28))

        # image = scipy.misc.toimage(images, cmin=0.0, cmax=1.0)
        image = Image.fromarray((images * 255).astype(np.uint8))
        image.save('{}/{}_{}.jpg'.format(OUT_DIR, filename, new_tag))

    latents = np.random.normal(size=(100, LATENT_DIM))

    latents = latents.astype(theano.config.floatX)

    samples = np.zeros((100, N_CHANNELS, HEIGHT, WIDTH),
                       dtype=theano.config.floatX)

    next_sample = samples.copy()

    t0 = time.time()
    for j in range(HEIGHT):
        for k in range(WIDTH):
            for i in range(N_CHANNELS):
                samples_p_value = sample_fn(latents, next_sample)
                next_sample[:, i, j, k] = binarize(samples_p_value)[:, i, j, k]
                samples[:, i, j, k] = samples_p_value[:, i, j, k]

    t1 = time.time()
    print("Time taken for generation {:.4f}".format(t1 - t0))

    save_images(samples_p_value, 'samples')
Exemplo n.º 4
0
def generate_and_save_samples(tag):

    costs = []
    # for (images,) in test_data():
    #     costs.append(eval_fn(images))
    # print "test cost: {}".format(np.mean(costs))
    lib.save_params(os.path.join(OUT_DIR, tag + "_params.pkl"))

    def save_images(images, filename):
        """images.shape: (batch, n channels, height, width)"""
        images = images.reshape((10,10,28,28))
        # rowx, rowy, height, width -> rowy, height, rowx, width
        images = images.transpose(1,2,0,3)
        images = images.reshape((10*28, 10*28))

        image = scipy.misc.toimage(images, cmin=0.0, cmax=1.0)
        image.save('{}/{}_{}.jpg'.format(OUT_DIR, filename, tag))

    samples = np.zeros(
        (100, N_CHANNELS, HEIGHT, WIDTH), 
        dtype=theano.config.floatX
    )

    next_sample = samples.copy()

    t0 = time.time()
    for j in xrange(HEIGHT):
        for k in xrange(WIDTH):
            for i in xrange(N_CHANNELS):
                samples_p_value = sample_fn(next_sample)
                next_sample[:,i,j,k] = binarize(samples_p_value)[:,i,j,k]
                samples[:, i, j, k] = samples_p_value[:, i, j, k]

    t1 = time.time()
    save_images(samples, 'samples')
    print("Time taken with slowest generation is {:.4f}s".format(t1 - t0))
Exemplo n.º 5
0
                for images, targets in dev_data():
                    images = images.reshape((-1, HEIGHT, WIDTH, 1))
                    binarized = binarize(images)
                    dev_cost = eval_fn(binarized)
                    dev_costs.append(dev_cost)
            else:
                dev_costs.append(0.)

            print "epoch:{}\ttotal iters:{}\ttrain cost:{}\tdev cost:{}\ttotal time:{}\ttime per iter:{}".format(
                epoch, total_iters, numpy.mean(costs), numpy.mean(dev_costs),
                total_time, total_time / total_iters)

            tag = "iters{}_time{}".format(total_iters, total_time)
            if GEN_SAMPLES:
                generate_and_save_samples(tag)
            lib.save_params('params_{}.pkl'.format(tag))

            costs = []
            last_print_time += PRINT_TIME
            last_print_iters += PRINT_ITERS

        if (TRAIN_MODE=='iters' and total_iters == STOP_ITERS) or \
            (TRAIN_MODE=='time' and total_time >= STOP_TIME):

            print "Done!"

            try:  # This only matters on Ishaan's computer
                import experiment_tools
                experiment_tools.send_sms("done!")
            except ImportError:
                pass
Exemplo n.º 6
0
            epoch, total_iters, (time() - exp_start) / 3600, lowest_valid_cost,
            corresponding_test_cost, numpy.mean(costs), total_time / 3600,
            total_time / total_iters, valid_cost, valid_time / 3600, test_cost,
            test_time / 3600)
        print print_info

        tag = "e{}_i{}_t{:.2f}_tr{:.4f}_v{:.4f}"
        tag = tag.format(epoch, total_iters, total_time / 3600,
                         numpy.mean(cost), valid_cost)
        tag += ("_best" if new_lowest_cost else "")

        # 3. Save params of model (IO bound, time consuming)
        # If saving params is not successful, there shouldn't be any trace of
        # successful monitoring step in train_log as well.
        print "Saving params!",
        lib.save_params(os.path.join(PARAMS_PATH, 'params_{}.pkl'.format(tag)))
        print "Done!"

        # 4. Save and graph training progress (fast)
        training_info = {
            epoch_str: epoch,
            iter_str: total_iters,
            train_nll_str: numpy.mean(costs),
            valid_nll_str: valid_cost,
            test_nll_str: test_cost,
            lowest_valid_str: lowest_valid_cost,
            corresp_test_str: corresponding_test_cost,
            'train time': total_time,
            'valid time': valid_time,
            'test time': test_time,
            'wall clock time': time() - exp_start
Exemplo n.º 7
0
        start_time = time.time()
        cost, h0 = train_fn(seqs, h0, reset)
        total_time += time.time() - start_time
        total_iters += 1

        costs.append(cost)

        if (TRAIN_MODE=='iters' and total_iters-last_print_iters == PRINT_ITERS) or \
            (TRAIN_MODE=='time' and total_time-last_print_time >= PRINT_TIME):
            
            print "epoch:{}\ttotal iters:{}\ttrain cost:{}\ttotal time:{}\ttime per iter:{}".format(
                epoch,
                total_iters,
                numpy.mean(costs),
                total_time,
                total_time / total_iters
            )
            tag = "iters{}_time{}".format(total_iters, total_time)
            generate_and_save_samples(tag)
            lib.save_params('params_{}.pkl'.format(tag))

            costs = []
            last_print_time += PRINT_TIME
            last_print_iters += PRINT_ITERS

        if (TRAIN_MODE=='iters' and total_iters == STOP_ITERS) or \
            (TRAIN_MODE=='time' and total_time >= STOP_TIME):

            print "Done!"
            sys.exit()
    itr = vctk_loader.data_loader('train',BATCH_SIZE)
    for train_spk,train_X,train_mask,train_ctx,_ in itr:
        iter += 1
        start = time.time()

        _loss = train_fn(
            noise_arr[i],
            train_X,
            train_spk,
            train_ctx,
            train_mask,
            LR
            )

        times.append(time.time()-start)
        costs.append(_loss)

        if iter%50==0:
            print "Iter: {} (Epoch {}) Cost: {} Time: {}".format(iter,i+1,np.mean(np.asarray(costs),axis=0),np.mean(times))

    print "\n\nEpoch %d Completed!"%(i+1)
    print "\tMean train cost: ",np.mean(np.asarray(costs),axis=0)
    print "\tMean time: ",np.mean(times)
    print ""

    cost = score()
    if cost<best_cost:
        best_cost = cost
        lib.save_params(SAVE_FILE_NAME)
        print "Saving Model {}!\n".format(SAVE_FILE_NAME)
Exemplo n.º 9
0
        # Generate and save samples
        print "Sampling!",
        tag = "e{}_i{}_t{:.2f}_tr{:.4f}_v{:.4f}"
        tag = tag.format(epoch,
                         total_iters,
                         total_time/3600,
                         numpy.mean(cost),
                         valid_cost)
        tag += ("_best" if new_lowest_cost else "")
        # Generate samples
        generate_and_save_samples(tag)
        print "Done!"

        # Save params of model
        lib.save_params(
                os.path.join(PARAMS_PATH, 'params_{}.pkl'.format(tag))
        )
        print "Params saved!"

        if total_iters-last_print_iters == PRINT_ITERS \
            or total_time-last_print_time >= PRINT_TIME:
                # If we are here b/c of onom_end_of_batch, we shouldn't mess
                # with costs and last_print_iters
            costs = []
            last_print_time += PRINT_TIME
            last_print_iters += PRINT_ITERS

        end_of_batch = False
        new_lowest_cost = False

        print "Validation Done!\nBack to Training..."
Exemplo n.º 10
0
            cost, big_h0 = ip_train_fn(seqs, big_h0, reset)
            total_time += time.time() - start_time
            total_iters += 1

            costs.append(cost)



            if (PRE_TRAIN_MODE=='iters' and total_iters-last_print_iters == PRE_PRINT_ITERS) or \
                (PRE_TRAIN_MODE=='time' and total_time-last_print_time >= PRE_PRINT_TIME):

                print "epoch:{}\ttotal iters:{}\ttrain cost:{}\ttotal time:{}\ttime per iter:{}".format(
                    epoch, total_iters, numpy.mean(costs), total_time,
                    total_time / total_iters)
                tag = "iters{}_time{}".format(total_iters, total_time)
                lib.save_params('params_pretrain_{}.pkl'.format(tag))

                costs = []
                last_print_time += PRE_PRINT_TIME
                last_print_iters += PRE_PRINT_ITERS

            if (PRE_TRAIN_MODE=='iters' and total_iters == PRE_STOP_ITERS) or \
                (PRE_TRAIN_MODE=='time' and total_time >= PRE_STOP_TIME):

                print "Done!"

                pretrain_finished = True

print "Training!"
total_iters = 0
total_time = 0.
Exemplo n.º 11
0
def train_loop(inputs,
               cost,
               train_data,
               times,
               prints=None,
               inject_total_iters=False,
               test_data=None,
               callback=None,
               optimizer=lasagne.updates.adam,
               save_params=False,
               nan_guard=False):

    params = lib.search(cost, lambda x: hasattr(x, 'param'))
    lib.print_params_info(params)

    grads = T.grad(cost, wrt=params, disconnected_inputs='warn')

    grads = [T.clip(g, lib.floatX(-1), lib.floatX(1)) for g in grads]

    updates = optimizer(grads, params)

    if prints is None:
        prints = [('cost', cost)]
    else:
        prints = [('cost', cost)] + prints

    print "Compiling train function..."
    if nan_guard:
        from theano.compile.nanguardmode import NanGuardMode
        mode = NanGuardMode(nan_is_error=True,
                            inf_is_error=True,
                            big_is_error=True)
    else:
        mode = None
    train_fn = theano.function(inputs, [p[1] for p in prints],
                               updates=updates,
                               on_unused_input='warn',
                               mode=mode)

    print "Compiling eval function..."
    eval_fn = theano.function(inputs, [p[1] for p in prints],
                              on_unused_input='warn')

    print "Training!"

    total_iters = 0
    total_seconds = 0.
    last_print = 0
    last_gen = 0

    if len(times) >= 4:
        gen_every = times[3]
    else:
        gen_every = times[1]

    if len(times) >= 5:
        early_stop = times[4]
        if len(times) >= 6:
            early_stop_min = times[5]
        else:
            early_stop_min = 0
    else:
        early_stop = None
        early_stop_min = None

    best_test_cost = np.inf
    best_test_cost_iter = 0.

    all_outputs = []
    all_stats = []
    for epoch in itertools.count():

        generator = train_data()
        while True:
            try:
                inputs = generator.__next__()
            except StopIteration:
                break

            if inject_total_iters:
                inputs = [np.int32(total_iters)] + list(inputs)

            start_time = time.time()
            outputs = train_fn(*inputs)
            total_seconds += time.time() - start_time
            total_iters += 1

            all_outputs.append(outputs)

            if total_iters == 1:
                try:  # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.register_crash_notifier()
                except ImportError:
                    pass

            if (times[0]=='iters' and total_iters-last_print == times[1]) or \
                (times[0]=='seconds' and total_seconds-last_print >= times[1]):

                mean_outputs = np.array(all_outputs).mean(axis=0)

                if test_data is not None:
                    if inject_total_iters:
                        test_outputs = [
                            eval_fn(np.int32(total_iters), *inputs)
                            for inputs in test_data()
                        ]
                    else:
                        test_outputs = [
                            eval_fn(*inputs) for inputs in test_data()
                        ]
                    test_mean_outputs = np.array(test_outputs).mean(axis=0)

                stats = collections.OrderedDict()
                stats['epoch'] = epoch
                stats['iters'] = total_iters
                for i, p in enumerate(prints):
                    stats['train ' + p[0]] = mean_outputs[i]
                if test_data is not None:
                    for i, p in enumerate(prints):
                        stats['test ' + p[0]] = test_mean_outputs[i]
                stats['secs'] = total_seconds
                stats['secs/iter'] = total_seconds / total_iters

                if test_data != None and (stats['test cost'] < best_test_cost
                                          or
                                          (early_stop_min != None
                                           and total_iters <= early_stop_min)):
                    best_test_cost = stats['test cost']
                    best_test_cost_iter = total_iters

                print_str = ""
                for k, v in stats.items():
                    if isinstance(v, int):
                        print_str += "{}:{}\t".format(k, v)
                    else:
                        print_str += "{}:{:.4f}\t".format(k, v)
                print print_str[:-1]  # omit the last \t

                all_stats.append(stats)

                all_outputs = []
                last_print += times[1]

            if (times[0]=='iters' and total_iters-last_gen==gen_every) or \
                (times[0]=='seconds' and total_seconds-last_gen >= gen_every):
                tag = "iters{}_time{}".format(total_iters, total_seconds)
                if callback is not None:
                    callback(tag)
                if save_params:
                    lib.save_params('params_{}.pkl'.format(tag))

                last_gen += gen_every

            if (times[0]=='iters' and total_iters == times[2]) or \
                (times[0]=='seconds' and total_seconds >= times[2]) or \
                (test_data != None and early_stop != None and total_iters > (3*early_stop) and (total_iters-best_test_cost_iter) > early_stop):

                if (test_data != None and early_stop != None and total_iters >
                    (3 * early_stop)
                        and (total_iters - best_test_cost_iter) > early_stop):
                    print "Early stop! Best test cost was {} at iter {}".format(
                        best_test_cost, best_test_cost_iter)

                print "Done!"

                try:  # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.send_sms("done!")
                except ImportError:
                    pass

                return all_stats
Exemplo n.º 12
0
    save_images(images, "samples")


if args.resume:
    lib.load_params(os.path.join(OUT_DIR_PARAMS, "params_last.pkl"))

print "Creating data loader.."
train_data, dev_data, test_data = lib.celeba.load(BATCH_SIZE, TEST_BATCH_SIZE)

generate_and_save_samples("initial")

print "Running Train loop.."

try:
    lib.train_loop.train_loop(inputs=[total_iters, images],
                              inject_total_iters=True,
                              cost=cost,
                              prints=[('KL', kl_cost),
                                      ('reconst', reconst_cost),
                                      ('alpha/beta', alpha)],
                              optimizer=functools.partial(lasagne.updates.adam,
                                                          learning_rate=0.001),
                              train_data=train_data,
                              test_data=dev_data,
                              callback=generate_and_save_samples,
                              times=TIMES)
except:
    print "Training interrupted. Saving params.. Please wait..."
    lib.save_params(os.path.join(OUT_DIR_PARAMS, "params_last.pkl"))
Exemplo n.º 13
0
def train_loop(
    inputs,
    cost,
    train_data,
    times,
    prints=None,
    inject_total_iters=False,
    test_data=None,
    callback=None,
    optimizer=lasagne.updates.adam,
    save_params=False,
    nan_guard=False
    ):

    params = lib.search(cost, lambda x: hasattr(x, 'param'))
    lib.print_params_info(params)

    grads = T.grad(cost, wrt=params, disconnected_inputs='warn')
    grads = [T.clip(g, lib.floatX(-1), lib.floatX(1)) for g in grads]

    updates = optimizer(grads, params)

    if prints is None:
        prints = [('cost', cost)]
    else:
        prints = [('cost', cost)] + prints

    print "Compiling train function..."
    if nan_guard:
        from theano.compile.nanguardmode import NanGuardMode
        mode = NanGuardMode(
            nan_is_error=True, 
            inf_is_error=True, 
            big_is_error=True
        )
    else:
        mode = None
    train_fn = theano.function(
        inputs,
        [p[1] for p in prints],
        updates=updates,
        on_unused_input='warn',
        mode=mode
    )

    print "Compiling eval function..."
    eval_fn = theano.function(
        inputs,
        [p[1] for p in prints],
        on_unused_input='warn'
    )

    print "Training!"
    total_iters = 0
    total_seconds = 0.
    last_print = 0
    all_outputs = []
    all_stats = []
    for epoch in itertools.count():

        for inputs in train_data():

            if inject_total_iters:
                inputs = [np.int32(total_iters)] + list(inputs)

            start_time = time.time()
            outputs = train_fn(*inputs)
            total_seconds += time.time() - start_time
            total_iters += 1

            all_outputs.append(outputs)

            if total_iters == 1:
                try: # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.register_crash_notifier()
                except ImportError:
                    pass

            if (times[0]=='iters' and total_iters-last_print == times[1]) or \
                (times[0]=='seconds' and total_seconds-last_print >= times[1]):

                mean_outputs = np.array(all_outputs).mean(axis=0)

                if test_data is not None:
                    if inject_total_iters:
                        test_outputs = [
                            eval_fn(np.int32(total_iters), *inputs)
                            for inputs in test_data()
                        ]
                    else:
                        test_outputs = [
                            eval_fn(*inputs) 
                            for inputs in test_data()
                        ]
                    test_mean_outputs = np.array(test_outputs).mean(axis=0)

                stats = collections.OrderedDict()
                stats['epoch'] = epoch
                stats['iters'] = total_iters
                for i,p in enumerate(prints):
                    stats['train '+p[0]] = mean_outputs[i]
                if test_data is not None:
                    for i,p in enumerate(prints):
                        stats['test '+p[0]] = test_mean_outputs[i]
                stats['secs'] = total_seconds
                stats['secs/iter'] = total_seconds / total_iters

                print_str = ""
                for k,v in stats.items():
                    if isinstance(v, int):
                        print_str += "{}:{}\t".format(k,v)
                    else:
                        print_str += "{}:{:.4f}\t".format(k,v)
                print print_str[:-1] # omit the last \t

                all_stats.append(stats)

                tag = "iters{}_time{}".format(total_iters, total_seconds)
                if callback is not None:
                    callback(tag)
                if save_params:
                    lib.save_params('params_{}.pkl'.format(tag))

                all_outputs = []
                last_print += times[1]

            if (times[0]=='iters' and total_iters == times[2]) or \
                (times[0]=='seconds' and total_seconds >= times[2]):

                print "Done!"

                try: # This only matters on Ishaan's computer
                    import experiment_tools
                    experiment_tools.send_sms("done!")
                except ImportError:
                    pass

                return all_stats
Exemplo n.º 14
0
            costs.append(cost)



            if (PRE_TRAIN_MODE=='iters' and total_iters-last_print_iters == PRE_PRINT_ITERS) or \
                (PRE_TRAIN_MODE=='time' and total_time-last_print_time >= PRE_PRINT_TIME):
                
                print "epoch:{}\ttotal iters:{}\ttrain cost:{}\ttotal time:{}\ttime per iter:{}".format(
                    epoch,
                    total_iters,
                    numpy.mean(costs),
                    total_time,
                    total_time / total_iters
                )
                tag = "iters{}_time{}".format(total_iters, total_time)
                lib.save_params('params_pretrain_{}.pkl'.format(tag))

                costs = []
                last_print_time += PRE_PRINT_TIME
                last_print_iters += PRE_PRINT_ITERS

            if (PRE_TRAIN_MODE=='iters' and total_iters == PRE_STOP_ITERS) or \
                (PRE_TRAIN_MODE=='time' and total_time >= PRE_STOP_TIME):

                print "Done!"

                pretrain_finished = True

print "Training!"
total_iters = 0
total_time = 0.