def save_model(self, save_path): with open(save_path, 'w') as f: data = L.get_all_param_values(self.network) pkl.dump(data, f) for item in self.trackers: data = L.get_all_param_values(item) pkl.dump(data, f)
def get_param_values(self): ''' returns a list of the trainable parameters *values*, as np.ndarray's. ''' return ( get_all_param_values(self.l_embed_query, trainable=True) + get_all_param_values(self.l_embed_context, trainable=True) )
def train(data_train, data_val, train_fn, val_fn, network, max_epochs=100, patience=20, save_run=True, eval_fn=None): """Generic train strategy for neural networks (batch training, train/val sets, patience) Trains a neural network according to some data (list of inputs, targets) and a train function and an eval function on that data""" print("training...") run = [] best_model = None if patience <= 0: patience = max_epochs patience_val = 0 best_val = None for epoch in range(max_epochs): start_time = time.time() train_err, val_err = train_iteration(data_train, data_val, train_fn, val_fn) run.append(layers.get_all_param_values(network)) if np.isnan(val_err) or np.isnan(train_err): print("Train error or validation error is NaN, " "stopping now.") break # Calculating patience if best_val == None or val_err < best_val: best_val = val_err patience_val = 0 best_model = layers.get_all_param_values(network) else: patience_val += 1 if patience_val > patience: print("No improvements after {} iterations, " "stopping now".format(patience)) break # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, max_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err)) print(" validation loss:\t\t{:.6f}".format(val_err)) try: print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) except: if eval_fn != None: acc = eval_fn(*data_val) print(" validation accuracy:\t\t{:.2f} %".format(acc)) return best_model, run
def parameter_analysis(layer): all_params = ll.get_all_param_values(layer, trainable=True) param_names = [p.name for p in ll.get_all_params(layer, trainable=True)] print_gradinfo(param_names, {'nneg':[np.count_nonzero(p < 0) / np.product(p.shape) for p in all_params], 'norm':[np.linalg.norm(p) for p in all_params], 'shape':[p.shape for p in all_params]}) """
def multi_driver_training(options): """ Train model using all drivers' data in the given datapath.""" print("Loading data...") dataset = load_all(options) print("Building model and compiling functions...") net = models.cnn_1(options, output_size=len(dataset['label_map'])) # net = models.softmax_only(output_size=len(dataset['label_map'])) print("Starting training...") start_time = time.time() try: train_loop(dataset['train_data'], dataset['val_data'], net, options) print('Training Complete...') except KeyboardInterrupt: print('Keyboard Interrupt...') end_time = time.time() print('--------------------') print(' Saving model, check logs for results.\n' ' Time taken: {0}\n' .format(end_time - start_time)) utils.save_model(model=get_all_param_values(net), options=options) return net
def load_model_predict(PATH_simresult, test_set_X): # Load sim results print 'loading', PATH_simresult, '\n' with open(PATH_simresult, "rb") as f: temp = pickle.load(f) network = temp[-1] best_network_params = get_all_param_values(network) # extract input var print 'extract input var \n' X = get_all_layers(network)[0].input_var # build test function print 'build test function and reinit network \n' test_fn = build_test_func(test_set_X, network, X) reinitiate_set_params(network, weights=best_network_params) print 'test set shape', test_set_X.shape, 'type:', type(test_set_X), '\n' print 'make prediction \n' # predictedy = test_fn(test_set_X) # batched implementation batch_size = 128 n_test_batches = test_set_X.shape[0] // batch_size + 1 test_set_x_size = test_set_X.shape[0] predictedy = [test_fn( test_set_X[index * batch_size: min((index + 1) * batch_size, test_set_x_size)]) for index in range(n_test_batches)] predictedy = np.vstack(predictedy) return predictedy
def summary(self, light=False): """ Print a summary of the network architecture """ layer_list = get_all_layers(self.output_layer) def filter_function(layer): """ We only display the layers in the list below""" return np.any([isinstance(layer, layer_type) for layer_type in [InputLayer, Conv2DLayer, Pool2DLayer, Deconv2DLayer, ConcatLayer]]) layer_list = filter(filter_function, layer_list) output_shape_list = map(get_output_shape, layer_list) layer_name_function = lambda s: str(s).split('.')[3].split('Layer')[0] if not light: print('-' * 75) print 'Warning : all the layers are not displayed \n' print ' {:<15} {:<20} {:<20}'.format('Layer', 'Output shape', 'W shape') for i, (layer, output_shape) in enumerate(zip(layer_list, output_shape_list)): if hasattr(layer, 'W'): input_shape = layer.W.get_value().shape else: input_shape = '' print '{:<3} {:<15} {:<20} {:<20}'.format(i + 1, layer_name_function(layer), output_shape, input_shape) if isinstance(layer, Pool2DLayer) | isinstance(layer, Deconv2DLayer): print('') print '\nNumber of Convolutional layers : {}'.format( len(filter(lambda x: isinstance(x, Conv2DLayer) | isinstance(x, Deconv2DLayer), layer_list))) print 'Number of parameters : {}'.format(np.sum(map(np.size, get_all_param_values(self.output_layer)))) print('-' * 75)
def insert_weights(self, regr): ''' In order the following operations are done: - Update mask main part: activate another node - Copy the 'new_node' weights in the main part of the net - Copy the regr weights in the 'new_node' part - Recompile the net Structure of parameters: - W1: (num_classes*num_filters1*num_nodes, num_inputs, filter_length1) - b1: (num_classes*num_filters1*num_nodes, ) - W2: (num_classes, num_classes*num_filters1*num_nodes, filter_length2) - b2: (num_classes,) ''' # ------------------ # Update mask: # ------------------ self.net.layers_['mask'].add_node() actNode = self.net.layers_['mask'].active_nodes self.active_nodes = actNode # ------------------ # Get weights: # ------------------ W1, b1, maskParam, W2, b2 = layers.get_all_param_values(self.net.layers_['conv2']) newNode_W1, newNode_b1, newNode_W2, newNode_b2 = layers.get_all_param_values(self.net.layers_['conv2_newNode']) reg_W1, reg_b1, reg_W2, reg_b2 = layers.get_all_param_values(regr.net.layers_['conv2']) # boost_const = self.net.layers_['boosting_merge'].boosting_constant.get_value() # -------------------- # Update main part: # -------------------- if actNode>0: nNodes = self.num_filters1 ### ReLU MOD start = nNodes*(actNode-1) stop = nNodes*actNode slice_weights = slice(start,stop) W1[slice_weights,:,:], b1[slice_weights] = newNode_W1, newNode_b1 # For the moment I don't touch b2... Not sure about this... W2[:,slice_weights,:], b2 = newNode_W2, b2+newNode_b2 layers.set_all_param_values(self.net.layers_['conv2'], [W1, b1, maskParam, W2, b2]) # -------------------- # Insert new node: # -------------------- newNode_W1, newNode_b1, newNode_W2, newNode_b2 = reg_W1, reg_b1, reg_W2, reg_b2 layers.set_all_param_values(self.net.layers_['conv2_newNode'], [newNode_W1, newNode_b1, newNode_W2, newNode_b2])
def train(data, train_fn, val_fn, network, max_epochs=4000, patience=100): (train_words, train_clusters), (test_words, test_clusters) = data run = [] best_model = None if patience <= 0: patience = max_epochs patience_val = 0 best_val = None for epoch in range(max_epochs): data_train = sample_data.generate_abnet_batch( train_words, train_clusters, epoch, features_getter, input_features_getter, return_indexes=False) data_val = sample_data.generate_abnet_batch( test_words, test_clusters, epoch, features_getter, input_features_getter, return_indexes=False) start_time = time.time() train_err, val_err = abnet2.train_iteration( data_train, data_val, train_fn, val_fn) if epoch % 20 == 0: run.append(layers.get_all_param_values(network)) if np.isnan(val_err) or np.isnan(train_err): print("Train error or validation error is NaN, " "stopping now.") break # Calculating patience if best_val == None or val_err < best_val: best_val = val_err patience_val = 0 best_model = layers.get_all_param_values(network) else: patience_val += 1 if patience_val > patience: print("No improvements after {} iterations, " "stopping now".format(patience)) break # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, max_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err)) print(" validation loss:\t\t{:.6f}".format(val_err)) acc = nnet.eer(*data_val) print(" score eer:\t\t{:.2f} %".format(acc)) auc = nnet.auc(*data_val) print(" score auc:\t\t{:.2f} %".format(auc)) return best_model, run
def save_params(self, filename, quiet=False): if not quiet: print "Saving network weights to " + filename + "..." self._prepare_for_save() params = get_all_param_values(self.approximator.network) pickle.dump(params, open(filename, "wb")) if not quiet: print "Saving finished."
def parameter_analysis(layer): all_params = ll.get_all_param_values(layer, regularizable=True) for param in all_params: print(param.shape) nneg_w = np.count_nonzero(param < 0) / np.product(param.shape) normed_norm = np.linalg.norm(param) / np.product(param.shape) print("Number of negative weights: %0.2f" % nneg_w) print("Weight norm (normalized by size): %0.10f" % normed_norm)
def test_get_all_param_values(self): from lasagne.layers import (InputLayer, DenseLayer, get_all_param_values) l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) pvs = get_all_param_values(l3) assert len(pvs) == 4
def save(self, filename, quiet=False): if not quiet: print "Saving qengine to " + filename + "..." self._prepare_for_save() network_params = get_all_param_values(self._evaluator.network) params = [self.setup, network_params] pickle.dump(params, open(filename, "wb")) if not quiet: print "Saving finished."
def save_model(self, epoch=None): if epoch is not None: fname = self.conf.save_model.replace('%e', str(epoch).zfill(5)) else: fname = self.conf.save_model.replace('%e', 'final') if self.conf.verbosity > 1: print "Saving model to", fname np.savez(fname, *get_all_param_values(self.autoencoder))
def save_model(network, params, fname): weights = get_all_param_values(network) params['input_dim'] = weights[0].shape[0] params['output_dim'] = weights[-1].shape[0] params = {k: v for k, v in params.iteritems() if k in _standard_config} p = _standard_config.copy() p.update(params) with open(fname, 'wb') as fout: pickle.dump((p, weights), fout, -1)
def build_treatment_model(self, n_vars, **kwargs): input_vars = TT.matrix() instrument_vars = TT.matrix() targets = TT.vector() inputs = layers.InputLayer((None, n_vars), input_vars) inputs = layers.DropoutLayer(inputs, p=0.2) dense_layer = layers.DenseLayer(inputs, 2 * kwargs['dense_size'], nonlinearity=nonlinearities.rectify) dense_layer = layers.batch_norm(dense_layer) dense_layer= layers.DropoutLayer(dense_layer, p=0.2) for _ in xrange(kwargs['n_dense_layers'] - 1): dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.rectify) dense_layer = layers.batch_norm(dense_layer) self.treatment_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear) init_params = layers.get_all_param_values(self.treatment_output) prediction = layers.get_output(self.treatment_output, deterministic=False) test_prediction = layers.get_output(self.treatment_output, deterministic=True) l2_cost = regularization.regularize_network_params(self.treatment_output, regularization.l2) loss = gmm_loss(prediction, targets, instrument_vars) + 1e-4 * l2_cost params = layers.get_all_params(self.treatment_output, trainable=True) param_updates = updates.adadelta(loss, params) self._train_fn = theano.function( [ input_vars, targets, instrument_vars, ], loss, updates=param_updates ) self._loss_fn = theano.function( [ input_vars, targets, instrument_vars, ], loss, ) self._output_fn = theano.function( [ input_vars, ], test_prediction, ) return init_params
def insert_weights(self, boostedPerceptron): ''' In order the following operations are done: - Update mask main part: activate another perceptron - Copy the boostedPerceptron weights in the greedyLayer Structure of parameters: (check lasagne doc) - W1: (num_classes*num_filters1*num_nodes, num_inputs, filter_length1) - b1: (num_classes*num_filters1*num_nodes, ) - W2: (num_classes, num_classes*num_filters1*num_nodes, filter_length2) - b2: (num_classes,) ''' # ------------------ # Update mask: # ------------------ self.net.layers_['mask'].add_perceptron() self.active_perceptrons = self.net.layers_['mask'].active_perceptrons # ------------------ # Get weights: # ------------------ all_net_params = layers.get_all_param_values(self.net.layers_['greedyConv_2']) W1, b1, maskParam, W2, b2 = all_net_params[-5:] perc_W1, perc_b1, perc_W2, perc_b2 = layers.get_all_param_values(boostedPerceptron.net.layers_['greedyConv_2'])[-4:] # -------------------- # Update main part: # -------------------- start = self.nodes_partition[self.active_perceptrons-1] stop = self.nodes_partition[self.active_perceptrons] slice_weights = slice(start,stop) # !!! For the moment I don't touch b2... !!! # b1[slice_weights] = perc_b1 if self.layer_type=="conv": W1[slice_weights,:,:] = perc_W1 W2[:,slice_weights,:] = perc_W2 if self.layer_type=="trans_conv": W1[:,slice_weights,:] = perc_W1 W2[slice_weights,:,:] = perc_W2 layers.set_all_param_values(self.net.layers_['greedyConv_2'], all_net_params[:-5] + [W1, b1, maskParam, W2, b2])
def build_instrument_model(self, n_vars, **kwargs): targets = TT.vector() instrument_vars = TT.matrix() instruments = layers.InputLayer((None, n_vars), instrument_vars) instruments = layers.DropoutLayer(instruments, p=0.2) dense_layer = layers.DenseLayer(instruments, kwargs['dense_size'], nonlinearity=nonlinearities.tanh) dense_layer = layers.DropoutLayer(dense_layer, p=0.2) for _ in xrange(kwargs['n_dense_layers'] - 1): dense_layer = layers.DenseLayer(dense_layer, kwargs['dense_size'], nonlinearity=nonlinearities.tanh) dense_layer = layers.DropoutLayer(dense_layer, p=0.5) self.instrument_output = layers.DenseLayer(dense_layer, 1, nonlinearity=nonlinearities.linear) init_params = layers.get_all_param_values(self.instrument_output) prediction = layers.get_output(self.instrument_output, deterministic=False) test_prediction = layers.get_output(self.instrument_output, deterministic=True) # flexible here, endog variable can be categorical, continuous, etc. l2_cost = regularization.regularize_network_params(self.instrument_output, regularization.l2) loss = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() + 1e-4 * l2_cost loss_total = objectives.squared_error(prediction.flatten(), targets.flatten()).mean() params = layers.get_all_params(self.instrument_output, trainable=True) param_updates = updates.adadelta(loss, params) self._instrument_train_fn = theano.function( [ targets, instrument_vars, ], loss, updates=param_updates ) self._instrument_loss_fn = theano.function( [ targets, instrument_vars, ], loss_total ) self._instrument_output_fn = theano.function([instrument_vars], test_prediction) return init_params
def save(self, prefix): import json # save weights np.savez(prefix + "_weights.npz", *get_all_param_values(self.output)) # save network config params with open(prefix + "_config.json", "w") as f: f.write(json.dumps( {"emb_dim": self.emb_dim, "rnn_dim": self.rnn_dim, "hid_dim": self.hid_dim, "vocab_size": self.vocab_size, "context": self.context, "cell": self.cell, "add_dense": self.add_dense, "depth": self.depth, "cell_args": self.cell_args}))
def print_weight_distribution(net, layer_name=None): n_layers = len(net.layers) layers_names = [net.layers[i][1]['name'] for i in range(1,n_layers)] mean, std, weights = {}, {}, {} for name in layers_names: if "conv" in name: layer = net.layers_[name] W, _ = get_all_param_values(layer)[-2:] mean[name], std[name], weights[name] = W.mean(), W.std(), W if layer_name: # print "Mean: %g; \tstd: %g" %(mean[layer_name], std[layer_name]) return mean[layer_name], std[layer_name] else: for name in mean: print "Layer %s: \tMean: %g; \tstd: %g" %(name, mean[name], std[name])
def per_driver_train_predict(options): """ Predict output probabilities for each driver. If `options.train` is true, this trains a model""" # TODO: support for predict only # TODO: support for pretrained weights print("Loading data...") dataset = load_all(options) all_outputs = deque([]) for driver_index in range(len(dataset['label_map'])): driver_id = dataset['label_map'][driver_index] print('Working on Driver {0}'.format(driver_id)) net = _train_single(driver_index, dataset, options) utils.save_model(model=get_all_param_values(net), options=options, driver_id=driver_id) print('Training complete for driver {0}, making predictions...'.format(driver_id)) outputs = _predict_single(net, driver_id, options) utils.save_outputs(outputs, options, driver_id=driver_id) all_outputs.extend(outputs) utils.save_outputs(all_outputs, options) return
def convert(dataset_name): dataset = Dataset(dataset_name) # Create theano graph input_var = T.tensor4('input') net = build_model(input_var) # Load caffe model net_caffe = caffe.Net(dataset.model_path, dataset.pretrained_path, caffe.TEST) # Set the parameters from caffe into lasagne load_caffe_model(net, net_caffe) # Save the parameters p = join(dirname(__file__), 'pretrained', dataset.model_name + '.pkl') output = open(p, 'wb') params = get_all_param_values(net['prob']) pickle.dump(params, output) output.close()
def reinitiate_set_params(network, weights = None): # change weights of a trained network to a random set or a user defined value # useful in case of big networks and cross validation # instead of the long time of recompiling you can just # re-init the network weights if not weights: old = get_all_param_values(network) weights = [] for layer in old: shape = layer.shape if len(shape)<2: shape = (shape[0], 1) W= GlorotUniform()(shape) if W.shape != layer.shape: W = np.squeeze(W, axis= 1) weights.append(W) set_all_param_values(network, weights) return network
def save_model(network, epoch, model_name, learning_rate = 0.0, directory = 'models'): """ Saves networks parameters, epoch, learning_rate """ params = layers.get_all_param_values(network) file_name = model_name + "-ep" + str(epoch) + ".pickle" file_path = directory + '/' + file_name print "==> Saving model to %s" % file_path if (not os.path.exists(directory)): os.makedirs(directory) with open(file_path, 'w') as save_file: pickle.dump( obj = { 'params' : params, 'epoch' : epoch, 'learning_rate' : learning_rate, }, file = save_file, protocol = -1 )
def test_batch_size(): input_var01, input_var16 = T.tensor3s('input01', 'input16') l_output01 = model(input_var01, batch_size=1) l_output16 = model(input_var16, batch_size=16) # Share the parameters for both models params01 = get_all_param_values(l_output01) set_all_param_values(l_output16, params01) posterior_fn01 = theano.function([input_var01], get_output(l_output01)) posterior_fn16 = theano.function([input_var16], get_output(l_output16)) example_input = np.random.rand(16, 30, 8) example_output16 = posterior_fn16(example_input) example_output01 = np.zeros_like(example_output16) for i in range(16): example_output01[i] = posterior_fn01(example_input[i][np.newaxis, :, :]) assert example_output16.shape == (16, 30, 8) assert np.allclose(example_output16, example_output01, atol=1e-3)
def __init__(self, rc): # steal stuff from world/initialize self.rc = rc self.world = mj.MJCWorld(rc['model_file']) self.model = self.world.get_model() self.dX = self.model['nq'] + self.model['nv'] self.dU = self.model['nu'] # compute dO # TODO: put this somewhere else, reorganize observation module dO = 0 ndims = np.sum(rc['obs_dims']) fields = rc['obs_fields'] if 'qpos' in fields: dO += self.model['nq'] if 'qvel' in fields: dO += self.model['nv'] if 'xipos' in fields: dO += ndims*(self.model['nbody'] - 1) if 'ximat' in fields: dO += ndims*ndims*(self.model['nbody'] - 1) if 'site_xpos' in fields: dO += ndims*self.model['nsite'] if 'to_target' in fields: dO += ndims self.dO = dO # TODO: start with synthetic data included/reorganize buffer storage self.buf = ReplayBuffer(self.dO, self.dU) # create nets and copy train net to target net self.net = ActorCriticNet(self.dO, self.dU, rc['num_units'], rc['ctrl_limits']) self.target_net = ActorCriticNet(self.dO, self.dU, rc['num_units'], rc['ctrl_limits']) values = ll.get_all_param_values(self.net.critic) ll.set_all_param_values(self.target_net.critic, values) # compile all the theano functions self._compile()
def train_model(data_logs, runtime_configuration): print("********************************************************") print("Setting up inputs & targets. This could take a while ...") print("") packaged_data = set_inputs_n_targets(data_logs, runtime_configuration) neural_net = conv_net_X(runtime_configuration,\ num_output_units=len(packaged_data['class_labels'])) print("********************************************************") print("Now training ...") tick = time.time() try: train_using(packaged_data['train_with'], packaged_data['val_with'],\ neural_net, data_logs, runtime_configuration) print('Finished training without interruption ...') except KeyboardInterrupt: print('Terminating ...') tock = time.time() saved_to = os.path.abspath(data_logs['record_stats_location']) save_it(model=get_all_param_values(neural_net), data_logs=data_logs,\ runtime_configuration=runtime_configuration) mins, secs = divmod(int(tock-tick), 60) hrs, mins = divmod(mins, 60) print('===============================') print(' Network\'s "state" saved to ' ' "{0}".\n' '===============================\n' 'Training time: {1[0]:02d} hrs, {1[1]:02d} mins, {1[2]:02d} secs \n' .format(saved_to, (hrs,mins,secs))) return neural_net
def learn(self, render_training=False, render_test=False, learning_steps_per_epoch=10000, \ test_episodes_per_epoch=1, epochs=200, max_test_steps=2000): print "Starting the training!" train_results = [] test_results = [] time_start = time() for epoch in range(epochs): print "\nEpoch %d\n-------" % (epoch + 1) eps = self.exploration_rate(epoch + 1, epochs) print "Eps = %.2f" % eps train_episodes_finished = 0 train_scores = [] print "Training..." s1 = env.reset() s1 = self.preprocess(s1) score = 0 for learning_step in trange(learning_steps_per_epoch): s2, reward, isterminal = self.perform_learning_step( epoch, epochs, s1) ''' a = self.get_best_action(s1) (s2, reward, isterminal, _) = env.step(a) # TODO: Check a s2 = self.preprocess(s2) if not isterminal else None ''' score += reward s1 = s2 if (render_training): env.render() if isterminal: train_scores.append(score) s1 = env.reset() s1 = self.preprocess(s1) train_episodes_finished += 1 score = 0 print "%d training episodes played." % train_episodes_finished train_scores = np.array(train_scores) print "Results: mean: %.1f±%.1f," % (train_scores.mean(), train_scores.std()), \ "min: %.1f," % train_scores.min(), "max: %.1f," % train_scores.max() train_results.append((train_scores.mean(), train_scores.std())) print("Saving training results...") with open("train_results.txt", "w") as train_result_file: train_result_file.write(str(train_results)) print "\nTesting..." test_scores = [] for test_episode in trange(test_episodes_per_epoch): s1 = env.reset() s1 = self.preprocess(s1) score = 0 isterminal = False frame = 0 while not isterminal and frame < max_test_steps: a = self.get_best_action(s1) (s2, reward, isterminal, _) = env.step(a) # TODO: Check a s2 = self.preprocess(s2) if not isterminal else None score += reward s1 = s2 if (render_test): env.render() frame += 1 test_scores.append(score) test_scores = np.array(test_scores) print "Results: mean: %.1f±%.1f," % (test_scores.mean( ), test_scores.std()), "min: %.1f" % test_scores.min( ), "max: %.1f" % test_scores.max() test_results.append((test_scores.mean(), test_scores.std())) print("Saving test results...") with open("test_results.txt", "w") as test_result_file: test_result_file.write(str(test_results)) print "Saving the network weigths..." pickle.dump(get_all_param_values(self.dqn), open('weights.dump', "w")) print "Total elapsed time: %.2f minutes" % ( (time() - time_start) / 60.0) env.render(close=True) print "======================================" print "Training finished. It's time to watch!"
def save_model(self, save_path): data = L.get_all_param_values([self.e_net, self.q_net]) with open(save_path, 'w') as f: pkl.dump(data, f)
def train_loop(output_layer, iter_funcs, dataset, batch_size, max_epochs, patience=100, learning_rate_start=theano.shared(float32(0.03)), learning_rate_stop=theano.shared(float32(0.001)), momentum_start=theano.shared(float32(0.9)), momentum_stop=theano.shared(float32(0.999)), verbose=True): best_valid_loss = np.inf best_valid_epoch = 0 best_train_loss = np.inf best_weights = None learning_rates = np.logspace( np.log10(learning_rate_start.get_value()), np.log10(learning_rate_stop.get_value()), max_epochs) momentums = np.linspace( momentum_start.get_value(), momentum_stop.get_value(), max_epochs) now = time.time() history = [] if verbose: printer = ProgressPrinter(color=True) try: for epoch in train(iter_funcs, dataset, batch_size=batch_size): epoch_number = epoch['number'] train_loss = epoch['train_loss'] valid_loss = epoch['valid_loss'] valid_acc = epoch['valid_accuracy'] info = OrderedDict([ ('epoch', epoch_number), ('train_loss', train_loss), ('train_loss_best', train_loss <= best_train_loss), ('train_loss_worse', train_loss > history[-1]['train_loss'] if len(history) > 0 else False), ('valid_loss', valid_loss), ('valid_loss_best', valid_loss <= best_valid_loss), ('valid_loss_worse', valid_loss > history[-1]['valid_loss'] if len(history) > 0 else False), ('valid_accuracy', valid_acc), ('duration', time.time() - now)]) history.append(info) now = time.time() if verbose: printer(history) # early stopping if epoch['valid_loss'] < best_valid_loss: best_valid_loss = valid_loss best_valid_epoch = epoch_number best_weights = get_all_param_values(output_layer) elif epoch['number'] >= max_epochs: break elif best_valid_epoch + patience < epoch_number: if verbose: print(" stopping early") print(" best validation loss was {:.6f} at epoch {}." .format(best_valid_loss, best_valid_epoch)) break if epoch['number'] >= max_epochs: if verbose: print(' last epoch') print(' best validation loss was {:.6f} at epoch {}.' .format(best_valid_loss, best_valid_epoch)) break # adjust learning rate and momentum new_learning_rate = float32(learning_rates[epoch_number-1]) learning_rate_start.set_value(new_learning_rate) new_momentum = float32(momentums[epoch_number-1]) momentum_start.set_value(new_momentum) except KeyboardInterrupt: pass return best_valid_loss, best_valid_epoch, best_weights, history
def get_model_parameters(self, layer='output'): """return all the parameters of the network""" return layers.get_all_param_values(self.network[layer])
def weights(self): return layers.get_all_param_values(self.outputs)
best_action_index = get_best_action(state) game.make_action(actions[best_action_index], skiprate + 1) r = game.get_total_reward() test_rewards.append(r) test_end = time() test_time = test_end - test_start print "Test results:" test_rewards = np.array(test_rewards) print "mean:", test_rewards.mean(), "std:", test_rewards.std(), "max:", test_rewards.max(), "min:", test_rewards.min() print "t:", str(round(test_time, 2)) + "s" if params_savefile: print "Saving network weigths to:", params_savefile pickle.dump(get_all_param_values(net), open(params_savefile, "w")) print "=========================" print "Training finished! Time to watch!" game.close() game.set_window_visible(True) game.set_mode(Mode.ASYNC_PLAYER) game.init() # Sleeping time between episodes, for convenience. episode_sleep = 0.0 for i in range(episodes_to_watch): game.new_episode() while not game.is_episode_finished():
print(('[Epoch %03i][trn] cost %9.6f (cla %6.4f, reg %6.4f), |grad| = %.06f, acc = %7.5f %% (%.2fsec)') % (it_count, epoch_cost[0], epoch_cost[1], epoch_cost[2], epoch_cost[3], epoch_cost[4] * 100, time.time() - tic)) if np.isnan(epoch_cost[0]): print("NaN in the loss function...let's stop here") break if (it_count % eval_freq) == 0: v_c, v_a = [], [] for x_ in ds.test_iter(): tmp = funcs['acc_loss'](*x_) v_a.append(tmp[0]) v_c.append(tmp[1]) test_cost = [np.mean(v_c), np.mean(v_a)] print((' [tst] cost %9.6f, acc = %7.5f %%') % (test_cost[0], test_cost[1] * 100)) if epoch_cost[0] < best_trn: kvs.store('best_train_params', [it_count, LL.get_all_param_values(ffn)]) best_trn = epoch_cost[0] if test_cost[0] < best_tst: kvs.store('best_test_params', [it_count, LL.get_all_param_values(ffn)]) best_tst = test_cost[0];\ print("...done training %f" % (time.time() - start_time)) rewrite = True out_path = '../output/' print "Saving output to: %s" % out_path if not os.path.isdir(out_path) or rewrite==True: try: os.makedirs(out_path) except:
def train(self, dataset, save_name='Best_model', num_epochs=100, batch_size=1, LR_start=1e-4, LR_decay=1, compute_confusion=False, justTest=False, debug=False, roundParams=False, withNoise=False, noiseType='white',ratio_dB=0, logger=logger_RNNtools): X_train, y_train, valid_frames_train, X_val, y_val, valid_frames_val, X_test, y_test, valid_frames_test = dataset confusion_matrices = [] # try to load performance metrics of stored model best_val_acc, test_acc, old_train_info = self.loadPreviousResults(save_name) #stores old_train_info into self.network_train_info logger.info("Initial best Val acc: %s", best_val_acc) logger.info("Initial best test acc: %s\n", test_acc) self.best_val_acc = best_val_acc logger.info("Pass over Test Set") test_cost, test_acc, test_topk_acc = self.run_epoch(X=X_test, y=y_test, valid_frames=valid_frames_test) logger.info("Test cost:\t\t{:.6f} ".format(test_cost)) logger.info("Test accuracy:\t\t{:.6f} %".format(test_acc)) logger.info("Test Top 3 accuracy:\t{:.6f} %".format(test_topk_acc)) self.network_train_info['nb_params'] = lasagne.layers.count_params(self.network_lout_batch) if justTest: if os.path.exists(save_name+".npz"): self.saveFinalResults(logger, noiseType, ratio_dB, roundParams, save_name, test_acc, test_cost, test_topk_acc, withNoise) return 0 # else do nothing and train anyway else: self.network_train_info['test_cost'].append(test_cost) self.network_train_info['test_acc'].append(test_acc) self.network_train_info['test_topk_acc'].append(test_topk_acc) logger.info("\n* Starting training...") LR = LR_start self.best_cost = 100 for epoch in range(num_epochs): self.curr_epoch += 1 epoch_time = time.time() logger.info("\n\nCURRENT EPOCH: %s", self.curr_epoch) logger.info("Pass over Training Set") train_cost, train_acc, train_topk_acc = self.run_epoch(X=X_train, y=y_train, valid_frames=valid_frames_train, LR=LR) logger.info("Pass over Validation Set") val_cost, val_acc, val_topk_acc = self.run_epoch(X=X_val, y = y_val, valid_frames=valid_frames_val) # Print epoch summary logger.info("Epoch {} of {} took {:.3f}s.".format( epoch + 1, num_epochs, time.time() - epoch_time)) logger.info("Learning Rate:\t\t{:.6f} %".format(LR)) logger.info("Training cost:\t{:.6f}".format(train_cost)) logger.info("Validation Top 3 accuracy:\t{:.6f} %".format(val_topk_acc)) logger.info("Validation cost:\t{:.6f} ".format(val_cost)) logger.info("Validation accuracy:\t\t{:.6f} %".format(val_acc)) logger.info("Validation Top 3 accuracy:\t{:.6f} %".format(val_topk_acc)) # better model, so save parameters if val_acc > self.best_val_acc: # only reset if significant improvement if val_acc - self.best_val_acc > 0.2: self.epochsNotImproved = 0 # store new parameters self.best_cost = val_cost self.best_val_acc = val_acc self.best_epoch = self.curr_epoch self.best_param = L.get_all_param_values(self.network_lout) self.best_updates = [p.get_value() for p in self.updates.keys()] logger.info("New best model found!") if save_name is not None: logger.info("Model saved as " + save_name) self.save_model(save_name) logger.info("Pass over Test Set") test_cost, test_acc, test_topk_acc = self.run_epoch(X=X_test, y=y_test, valid_frames=valid_frames_test) logger.info("Test cost:\t\t{:.6f} ".format(test_cost)) logger.info("Test accuracy:\t\t{:.6f} %".format(test_acc)) logger.info("Test Top 3 accuracy:\t{:.6f} %".format(test_topk_acc)) # save the training info self.network_train_info['train_cost'].append(train_cost) self.network_train_info['val_cost'].append(val_cost) self.network_train_info['val_acc'].append(val_acc) self.network_train_info['val_topk_acc'].append(val_topk_acc) self.network_train_info['test_cost'].append(test_cost) self.network_train_info['test_acc'].append(test_acc) self.network_train_info['test_topk_acc'].append(test_topk_acc) saveToPkl(save_name + '_trainInfo.pkl', self.network_train_info) logger.info("Train info written to:\t %s", save_name + '_trainInfo.pkl') if compute_confusion: confusion_matrices.append(self.create_confusion(X_val, y_val)[0]) logger.info(' Confusion matrix computed') with open(save_name + '_conf.pkl', 'wb') as cPickle_file: cPickle.dump( [confusion_matrices], cPickle_file, protocol=cPickle.HIGHEST_PROTOCOL) # update LR, see if we can stop training LR = self.updateLR(LR, LR_decay, logger=logger_RNNtools) if self.epochsNotImproved >= 3: logging.warning("\n\nNo more improvements, stopping training...") logger.info("Pass over Test Set") test_cost, test_acc, test_topk_acc = self.run_epoch(X=X_test, y=y_test, valid_frames=valid_frames_test) logger.info("Test cost:\t\t{:.6f} ".format(test_cost)) logger.info("Test accuracy:\t\t{:.6f} %".format(test_acc)) logger.info("Test Top 3 accuracy:\t{:.6f} %".format(test_topk_acc)) self.network_train_info['test_cost'][-1]=test_cost self.network_train_info['test_acc'][-1] = test_acc self.network_train_info['test_topk_acc'][-1] = test_topk_acc self.saveFinalResults(logger, noiseType, ratio_dB, roundParams, save_name, test_acc, test_cost, test_topk_acc, withNoise) break
"datatype: {}".format(Recurrent_output_value.shape, type(Recurrent_output_value))) print("network output" "Shape: {}" "datatype: {}".format(network_output_value.shape, type(network_output_value))) print("Pointwise Cost: {}" "Cost: {}".format(cost_values_pointwise.shape, cost_value)) cost_vector = [] for epoch in range(NUM_EPOCHS): #pdb.set_trace() shuffle_order = np.random.permutation(x.shape[0]) x = x[shuffle_order, :] y = y[shuffle_order, :] y_merged = y.reshape([-1]) mask = mask[shuffle_order, :] mask_merged = mask.reshape([-1]) cost = train(x,y_merged,mask,mask_merged) print("Epoch: {}" "\tcost = {}".format(epoch,cost) ) cost_vector.append(cost) if epoch % 10 == 0: np.savez('CLM_model.npz', *get_all_param_values(l_dense, trainable=True)) # plt.plot(np.arange(NUM_EPOCHS),cost_vector) # plt.show()
def save_all_params(self, agent, key=None): """saves agent params into the database under given name. overwrites by default""" key = key or self.default_params_key all_params = get_all_param_values( list(agent.agent_states) + agent.policy + agent.action_layers) self.redis.set(key, self.dumps(all_params))
def save_model(filename, model): save(filename, LL.get_all_param_values(model))
grad, params, learning_rate=0.05 ) #u can change learning rate, 0.05 was being used in tutorial f_train = t.function([x_sym, y_sym], [loss, acc], updates=updates) f_predict = t.function([x_sym], pred) batch_size = 100 #u can change batch size too according to ur number of images max_epoch = 5 #u can change this number too,it is number of cycles n_batches = len( x_train ) // batch_size #try to make this value integer otherwise the remaining images won't be trained train_batches = batch_gen(x_train, y_train, batch_size) for epoch in range(max_epoch): train_loss = 0 train_acc = 0 for _ in range(n_batches): x, y = next(train_batches) loss, acc = f_train(x, y) train_loss += loss train_acc += acc train_loss /= n_batches train_acc /= n_batches print(epoch, train_loss, train_acc) np.savez( 'c:/users/Microsoft/desktop/trained_parameters' + str(epoch) + '.npz', *L.get_all_param_values(l_output)) #np.savez('c:/users/Microsoft/desktop/trained_parameters.npz', *L.get_all_param_values(l_output)) #this will save trained parameters in .npz file to use for testing #f = open('c:/users/microsoft/desktop/trained_parameters1.pkl', 'wb') #pickle.dump(L.get_all_param_values(l_output), f, protocol=pickle.HIGHEST_PROTOCOL)#this will save trained parameters in .pkl file.u can use either one of the files #f.close()
def train(): """ Training model. """ # Compile training and testing functions [model, train_fn, val_fn, predict_fn] = get_model() # Load training data print('Loading training data...') X, y = load_train_data() #print('Pre-processing images...') #X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_epoch = 200 batch_size = 32 calc_crps = 0 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) NOT IMPLEMENTED YET print('-' * 50) print('Training...') print('-' * 50) min_val_err = sys.float_info.max patience = 0 for i in range(nb_epoch): print('-' * 50) print('Iteration {0}/{1}'.format(i + 1, nb_epoch)) print('-' * 50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) # In each epoch, we do a full pass over the training data: print('Fitting model...') train_err = 0 train_batches = 0 for batch in iterate_minibatches(X_train_aug, y_train, batch_size, shuffle=True): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch val_err += val_fn(inputs, targets) val_batches += 1 print('Saving weights...') # save weights so they can be loaded later # np.savez('weights.npz', *get_all_param_values(model)) # for best (lowest) val losses, save weights if val_err < min_val_err: patience = 0 min_val_err = val_err np.savez('weights_best.npz', *get_all_param_values(model)) else: patience += 1 print('error on validation set: ' + str(val_err)) print('patience variable is: ' + str(patience)) print('\n') # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='a') as f: f.write(str(val_err)) f.write('\n') if (patience == 8): break