def main(): print("Build the network") input_of_image = tensor.matrix('features') input_to_hidden = Linear(name='input_to_hidden', input_dim=784, output_dim=100) h = Tanh().apply(input_to_hidden.apply(input_of_image)) hidden_to_output = Linear(name='hidden_to_output', input_dim=100, output_dim=10) output_hat = Softmax().apply(hidden_to_output.apply(h)) output = tensor.lmatrix('targets') cost = CategoricalCrossEntropy().apply(output.flatten(), output_hat) correct_rate = 1 - MisclassificationRate().apply(output.flatten(), output_hat) correct_rate.name = 'correct_rate' print(type(correct_rate)) cost.name = 'cost' cg = ComputationGraph(cost) # Initialize the parameters input_to_hidden.weights_init = hidden_to_output.weights_init = IsotropicGaussian( 0.01) input_to_hidden.biases_init = hidden_to_output.biases_init = Constant(0) input_to_hidden.initialize() hidden_to_output.initialize() # Train print("Prepare the data.") mnist_train = MNIST("train") mnist_test = MNIST("test") ## Carve the data into lots of batches. data_stream_train = DataStream(mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, batch_size=256)) ## Set the algorithm for the training. algorithm = GradientDescent(cost=cost, params=cg.parameters, step_rule=CompositeRule( [Scale(0.9), StepSwitcher(0.05, 0.1)])) ## Add a monitor extension for the training. data_stream_test = DataStream( mnist_test, iteration_scheme=SequentialScheme(mnist_test.num_examples, batch_size=1024)) test_monitor = DataStreamMonitoring(variables=[cost, correct_rate], data_stream=data_stream_test, prefix="test", after_every_epoch=True) train_monitor = TrainingDataMonitoring( variables=[cost, correct_rate, algorithm.total_step_norm], prefix='train', after_every_batch=True) ## Add a plot monitor. plot = Plot(document='new', channels=[['train_correct_rate', 'test_correct_rate']], start_server=True, after_every_batch=True) print("Start training") main_loop = MainLoop(algorithm=algorithm, data_stream=data_stream_train, extensions=[ plot, test_monitor, train_monitor, FinishAfter(after_n_epochs=20), Printing() ]) main_loop.run()
return tuple([data[0] * data[2], data[1], data[2]]) data_dir = os.environ['FUEL_DATA_PATH'] data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz') data_stats = numpy.load(data_dir) sp_mean = data_stats['sp_mean'] sp_std = data_stats['sp_std'] f0_mean = data_stats['f0_mean'] f0_std = data_stats['f0_std'] dataset = Blizzard(which_sets=('train', ), filename="sp_blizzard.hdf5") data_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme( batch_size * (dataset.num_examples / batch_size), batch_size)) data_stream = Mapping(data_stream, _is_nonzero, add_sources=('voiced', )) data_stream = ScaleAndShift(data_stream, scale=1 / sp_std, shift=-sp_mean / sp_std, which_sources=('sp', )) data_stream = ScaleAndShift(data_stream, scale=1 / f0_std, shift=-f0_mean / f0_std, which_sources=('f0', )) data_stream = Mapping(data_stream, _zero_for_unvoiced) data_stream = Mapping(data_stream, _transpose) data_stream = SegmentSequence(data_stream, seq_size, add_flag=True) data_stream = ForceFloatX(data_stream) train_stream = data_stream
CUDA = True print(MODEL_PATH_BEST) batch_size = 1 train_data = H5PYDataset(DATASET_PATH, which_sets=('train', ), sources=('s_transition_obs', 'r_transition_obs', 'obs', 'actions')) stream_train = DataStream(train_data, iteration_scheme=ShuffledScheme( train_data.num_examples, batch_size)) valid_data = H5PYDataset(DATASET_PATH, which_sets=('valid', ), sources=('s_transition_obs', 'r_transition_obs', 'obs', 'actions')) stream_valid = DataStream(valid_data, iteration_scheme=SequentialScheme( valid_data.num_examples, batch_size)) net = LstmSimpleNet2Pusher(15, 10) print(net) # import ipdb; ipdb.set_trace() if CUDA: net.cuda() viz = VisdomExt([["loss", "validation loss"], ["diff"]], [ dict(title='LSTM loss', xlabel='iteration', ylabel='loss'), dict(title='Diff loss', xlabel='iteration', ylabel='error') ]) means = { 'o': np.array([
def train(self, n_datap, subj_idx, training_type): path = 'data/subj{1}/leftright_{0}.hdf5'.format( training_type, subj_idx) n0, n_datap, split = 0, n_datap, .8 n_train = int(n_datap * split) train_set = H5PYDataset(path, which_sets=('train', ), subset=slice(n0, n0 + n_train)) train_stream = DataStream.default_stream( train_set, iteration_scheme=SequentialScheme(train_set.num_examples, self.batch_size)) train_epoch_it = train_stream.get_epoch_iterator() test_set = H5PYDataset(path, which_sets=('train', ), subset=slice(n0 + n_train, n0 + n_datap)) features_test, labels_test = test_set.get_data( test_set.open(), slice(0, n_datap - n_train)) feat_test_lstm = [ features_test[(i - min(j, self.seq_max_len)):i] for i in range(1, n_datap - n_train + 1) for j in range(1, min(i + 1, self.seq_max_len + 1)) ] xlen_lstm_test = np.asarray( [feat_test_lstm[i].shape[0] for i in range(len(feat_test_lstm))]) labels_test_ltsm = np.concatenate([ labels_test[i - 1][np.newaxis] for i in range(1, n_datap - n_train + 1) for j in range(1, min(i + 1, self.seq_max_len + 1)) ], axis=0) feat_test_lstm = apply_to_zeros(feat_test_lstm, self.seq_max_len) # zero padding idx_test_seq_max_len = np.where(xlen_lstm_test == self.seq_max_len)[0] feat_test_lstm = feat_test_lstm[idx_test_seq_max_len] xlen_lstm_test = xlen_lstm_test[idx_test_seq_max_len] labels_test_ltsm = labels_test_ltsm[idx_test_seq_max_len] # [i for i in range(1, n_datap - n_train+1) for j in range(1,min(i+1, self.seq_max_len + 1))] # apply_to_zeros(np.asarray([features_test[(i - min(j, self.seq_max_len)):i] for i in range(1, n_datap - n_train) for j in range(1,min(i+1, self.seq_max_len + 1))]), self.seq_max_len) # # for i in range(N): # for j in range(i): # features_test[(i - max(j, self.seq_max_len)):i] print('starting training \n') count = 0 train_loss = 0 nb_epoch = 0 train_acc = 0 best_acc = -1 nb_batch = 0 patience = 0 while nb_epoch < 150 and patience < 100: try: stdout.write(".") stdout.flush() features, labels = train_epoch_it.__next__() nb_batch += 1 nb_features = features.shape[0] feat_lstm = [ features[(i - min(j, self.seq_max_len)):i] for i in range(1, nb_features + 1) for j in range(1, min(i + 1, self.seq_max_len + 1)) ] # apply_to_zeros(np.asarray([features[i:(i + self.seq_max_len)] for i in range(min(self.batch_size,nb_features))]), self.seq_max_len) xlen_lstm = np.asarray( [feat_lstm[i].shape[0] for i in range(len(feat_lstm))]) # xlen_lstm = np.asarray([features[i:(i + self.seq_max_len)].shape[0] for i in range(min(self.batch_size,nb_features))]) lab_lstm = np.concatenate([ labels[i - 1][np.newaxis] for i in range(1, nb_features + 1) for j in range(1, min(i + 1, self.seq_max_len + 1)) ], axis=0) feat_lstm = apply_to_zeros(feat_lstm, self.seq_max_len) # lab_lstm = np.concatenate([labels[i + xlen_lstm[i] - 1][np.newaxis] for i in range(min(self.batch_size,nb_features))], axis=0) _, c, p = self.sess.run([self.optimizer, self.loss, self.pred], feed_dict={ self.X: feat_lstm, self.Y: lab_lstm, self.seqlen: xlen_lstm }) train_loss = (c / nb_batch + train_loss * (nb_batch - 1) / nb_batch) train_acc = np.mean(np.argmax(p, 1) == np.argmax( lab_lstm, 1)) / nb_batch + train_acc * (nb_batch - 1) / nb_batch except StopIteration: nb_epoch += 1 tc, tp = self.sess.run( [self.loss, self.pred], feed_dict={ self.X: feat_test_lstm, self.Y: labels_test_ltsm, self.seqlen: xlen_lstm_test }) pred_acc = np.mean( np.argmax(tp, 1) == np.argmax(labels_test_ltsm, 1)) print('\n') print('finished {0} epoch'.format(nb_epoch)) print('number of ones prediction is {0}'.format( np.sum(np.argmax(tp, 1) == 1))) print('test loss is {0}'.format(tc)) print('test accuracy is {0}'.format(pred_acc)) print('train loss is {0}'.format(train_loss)) print('train accuracy is {0}'.format(train_acc)) print('new best accuracy {0}'.format(pred_acc)) print('patience is {0}'.format(patience)) if pred_acc > best_acc: patience = 0 best_acc = pred_acc save_path = self.saver.save( self.sess, "data/subj{1}/lstm_leftright_{0}.ckpt".format( training_type, subj_idx)) else: patience += 1 #* (best_acc > .6) train_epoch_it = train_stream.get_epoch_iterator() train_loss = 0 train_acc = 0 nb_batch = 0 print('\ntraining finished with accuracy {0}'.format(best_acc)) self.saver.restore( self.sess, "./data/subj{1}/lstm_leftright_{0}.ckpt".format( training_type, subj_idx)) self.learning_done = True print('number of ones in test set : {0}'.format( np.sum(labels_test_ltsm[:, 0] == 1) / len(labels_test_ltsm[:, 0]))) return [np.argmax(tp, 1), np.argmax(labels_test_ltsm, 1)]
print "got " + str(numSamplesTest) + " test examples" print "batch size for training", batch_size print "batch size for testing", 1 if iterationSeed != -1: data_stream = DataStream(train_set, iteration_scheme=ShuffledExampleSchemeBatch( numTrainingBatches * batch_size, batch_size, iterationSeed)) else: data_stream = DataStream(train_set, iteration_scheme=ShuffledExampleSchemeBatch( numTrainingBatches * batch_size, batch_size)) data_stream_test = DataStream(test_set, iteration_scheme=SequentialScheme( numTestBatches, 1)) ################################ # allocate symbolic variables for the data x = T.matrix('x') # the data is presented as rasterized images y = T.imatrix('y') # the labels are presented as 1D vector of # [int] labels length = T.imatrix('length') time2 = time.time() print "time for preparing data structures: " + str(time2 - time1) ###################### # BUILD ACTUAL MODEL # ######################
def main(num_epochs=1000): # MODEL # defining the data x = tensor.matrix('features') y = tensor.lmatrix('targets') # defining the model softmax_regressor = SoftmaxRegressor(input_dim=784, n_classes=10) # defining the cost to learn on probs = softmax_regressor.get_probs(features=x) cost = softmax_regressor.get_cost(probs=probs, targets=y).mean() cost.name = 'cost' # defining the cost to monitor misclassification = softmax_regressor.get_misclassification( probs=probs, targets=y).mean() misclassification.name = 'misclassification' # DATASETS # defining the datasets train_dataset = MNIST('train') test_dataset = MNIST('test') # TRAINING ALGORITHM # defining the algorithm params = softmax_regressor.get_params() algorithm = GradientDescent(cost=cost, params=params, step_rule=Momentum(learning_rate=0.1, momentum=0.1)) # defining the data stream # how the dataset is read train_data_stream = ForceFloatX( data_stream=DataStream(dataset=train_dataset, iteration_scheme=ShuffledScheme( examples=train_dataset.num_examples, batch_size=100, ))) test_data_stream = ForceFloatX( data_stream=DataStream(dataset=test_dataset, iteration_scheme=SequentialScheme( examples=test_dataset.num_examples, batch_size=1000, ))) # MONITORING # defining the extensions extensions = [] # timing the training and each epoch extensions.append(Timing()) # ending the training after a certain number of epochs extensions.append(FinishAfter(after_n_epochs=num_epochs)) # monitoring the test set extensions.append( DataStreamMonitoring([cost, misclassification], test_data_stream, prefix='test')) # monitoring the training set while training extensions.append( TrainingDataMonitoring([cost, misclassification], prefix='train', after_every_epoch=True)) # printing quantities extensions.append(Printing()) # MERGING IT TOGETHER # defining the model model = Model(cost) # defining the training main loop main_loop = MainLoop(model=model, data_stream=train_data_stream, algorithm=algorithm, extensions=extensions) main_loop.run()
def train(args, model_args): #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_' model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_' model_dir = create_log_dir(args, model_id) model_id2 = 'logs/walkback_' model_dir2 = create_log_dir(args, model_id2) print model_dir print model_dir2 + '/' + 'log.jsonl.gz' logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz', formatter=None) # TODO batches_per_epoch should not be hard coded lrate = args.lr import sys sys.setrecursionlimit(10000000) args, model_args = parse_args() #trng = RandomStreams(1234) if args.resume_file is not None: print "Resuming training from " + args.resume_file from blocks.scripts import continue_training continue_training(args.resume_file) ## load the training data if args.dataset == 'MNIST': print 'loading MNIST' from fuel.datasets import MNIST dataset_train = MNIST(['train'], sources=('features',)) dataset_test = MNIST(['test'], sources=('features',)) n_colors = 1 spatial_width = 28 elif args.dataset == 'CIFAR10': from fuel.datasets import CIFAR10 dataset_train = CIFAR10(['train'], sources=('features',)) dataset_test = CIFAR10(['test'], sources=('features',)) n_colors = 3 spatial_width = 32 elif args.dataset == "lsun" or args.dataset == "lsunsmall": print "loading lsun class!" from load_lsun import load_lsun print "loading lsun data!" if args.dataset == "lsunsmall": dataset_train, dataset_test = load_lsun(args.batch_size, downsample=True) spatial_width=32 else: dataset_train, dataset_test = load_lsun(args.batch_size, downsample=False) spatial_width=64 n_colors = 3 elif args.dataset == "celeba": print "loading celeba data" from fuel.datasets.celeba import CelebA dataset_train = CelebA(which_sets = ['train'], which_format="64", sources=('features',), load_in_memory=False) dataset_test = CelebA(which_sets = ['test'], which_format="64", sources=('features',), load_in_memory=False) spatial_width = 64 n_colors = 3 tr_scheme = SequentialScheme(examples=dataset_train.num_examples, batch_size=args.batch_size) ts_scheme = SequentialScheme(examples=dataset_test.num_examples, batch_size=args.batch_size) train_stream = DataStream.default_stream(dataset_train, iteration_scheme = tr_scheme) test_stream = DataStream.default_stream(dataset_test, iteration_scheme = ts_scheme) dataset_train = train_stream dataset_test = test_stream #epoch_it = train_stream.get_epoch_iterator() elif args.dataset == 'Spiral': print 'loading SPIRAL' train_set = Spiral(num_examples=100000, classes=1, cycles=2., noise=0.01, sources=('features',)) dataset_train = DataStream.default_stream(train_set, iteration_scheme=ShuffledScheme( train_set.num_examples, args.batch_size)) else: raise ValueError("Unknown dataset %s."%args.dataset) model_options = locals().copy() if args.dataset != 'lsun' and args.dataset != 'celeba': train_stream = Flatten(DataStream.default_stream(dataset_train, iteration_scheme=ShuffledScheme( examples=dataset_train.num_examples - (dataset_train.num_examples%args.batch_size), batch_size=args.batch_size))) else: train_stream = dataset_train test_stream = dataset_test print "Width", WIDTH, spatial_width shp = next(train_stream.get_epoch_iterator())[0].shape print "got epoch iterator" Xbatch = next(train_stream.get_epoch_iterator())[0] scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2)) shft = -np.mean(Xbatch*scl) print 'Building model' params = init_params(model_options) if args.reload_: print "Trying to reload parameters" if os.path.exists(args.saveto_filename): print 'Reloading Parameters' print args.saveto_filename params = load_params(args.saveto_filename, params) tparams = init_tparams(params) print tparams x, cost, start_temperature, step_chain = build_model(tparams, model_options) inps = [x.astype('float32'), start_temperature, step_chain] x_Data = T.matrix('x_Data', dtype='float32') temperature = T.scalar('temperature', dtype='float32') step_chain_part = T.scalar('step_chain_part', dtype='int32') forward_diffusion = one_step_diffusion(x_Data, model_options, tparams, temperature, step_chain_part) print tparams grads = T.grad(cost, wrt=itemlist(tparams)) #get_grads = theano.function(inps, grads) for j in range(0, len(grads)): grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]), grads[j]) # compile the optimizer, the actual computational graph is compiled here lr = T.scalar(name='lr') print 'Building optimizers...', optimizer = args.optimizer f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams, grads, inps, cost) print 'Done' #for param in tparams: # print param # print tparams[param].get_value().shape print 'Buiding Sampler....' f_sample = sample(tparams, model_options) print 'Done' uidx = 0 estop = False bad_counter = 0 max_epochs = 4000 batch_index = 1 print 'Number of steps....' print args.num_steps print "Number of metasteps...." print args.meta_steps print 'Done' count_sample = 1 for eidx in xrange(max_epochs): n_samples = 0 print 'Starting Next Epoch ', eidx for data in train_stream.get_epoch_iterator(): if args.dataset == 'CIFAR10': if data[0].shape[0] == args.batch_size: data_use = (data[0].reshape(args.batch_size,3*32*32),) else: continue t0 = time.time() batch_index += 1 n_samples += len(data_use[0]) uidx += 1 if data_use[0] is None: print 'No data ' uidx -= 1 continue ud_start = time.time() t1 = time.time() data_run = data_use[0] temperature_forward = args.temperature meta_cost = [] for meta_step in range(0, args.meta_steps): data_run = data_run.astype('float32') meta_cost.append(f_grad_shared(data_run, temperature_forward, meta_step)) f_update(lrate) if args.meta_steps > 1: data_run, sigma, _, _ = forward_diffusion(data_run, temperature_forward, meta_step) temperature_forward *= args.temperature_factor cost = sum(meta_cost) / len(meta_cost) ud = time.time() - ud_start #gradient_updates_ = get_grads(data_use[0],args.temperature) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' return 1. logger.log({'epoch': eidx, 'batch_index': batch_index, 'uidx': uidx, 'training_error': cost}) if batch_index%20==0: print batch_index, "cost", cost if batch_index%1000==0: print 'saving params' params = unzip(tparams) save_params(params, model_dir + '/' + 'params_' + str(batch_index) + '.npz') if batch_index%200==0: count_sample += 1 ''' temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps -1 )) temperature_forward = args.temperature for num_step in range(args.num_steps * args.meta_steps): print "Forward temperature", temperature_forward if num_step == 0: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(data[0].astype('float32'), temperature_forward, num_step) x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp, model_dir + '/' + "batch_" + str(batch_index) + '_corrupted' + 'epoch_' + str(count_sample) + '_time_step_' + str(num_step)) else: x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(x_data.astype('float32'), temperature_forward, num_step) x_data = np.asarray(x_data).astype('float32').reshape(args.batch_size, INPUT_SIZE) x_temp = x_data.reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp, model_dir + '/batch_' + str(batch_index) + '_corrupted' + '_epoch_' + str(count_sample) + '_time_step_' + str(num_step)) temperature_forward = temperature_forward * args.temperature_factor; x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH, WIDTH) plot_images(x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) + '_batch_index_' + str(batch_index)) temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1 )) for i in range(args.num_steps*args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1) print 'On backward step number, using temperature', i, temperature reverse_time(scl, shft, x_data, model_dir + '/'+ "batch_" + str(batch_index) + '_samples_backward_' + 'epoch_' + str(count_sample) + '_time_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ''' if args.noise == "gaussian": x_sampled = np.random.normal(0.5, 2.0, size=(args.batch_size,INPUT_SIZE)).clip(0.0, 1.0) else: s = np.random.binomial(1, 0.5, INPUT_SIZE) temperature = args.temperature * (args.temperature_factor ** (args.num_steps*args.meta_steps - 1)) x_data = np.asarray(x_sampled).astype('float32') for i in range(args.num_steps*args.meta_steps + args.extra_steps): x_data, sampled, sampled_activation, sampled_preactivation = f_sample(x_data.astype('float32'), temperature, args.num_steps*args.meta_steps -i - 1) print 'On step number, using temperature', i, temperature reverse_time(scl, shft, x_data, model_dir + '/batch_index_' + str(batch_index) + '_inference_' + 'epoch_' + str(count_sample) + '_step_' + str(i)) x_data = np.asarray(x_data).astype('float32') x_data = x_data.reshape(args.batch_size, INPUT_SIZE) if temperature == args.temperature: temperature = temperature else: temperature /= args.temperature_factor ipdb.set_trace()