Пример #1
0
    def trainFunction(self):

        trainPrediction = get_output(self.output_layer)
        trainLoss = categorical_crossentropy(trainPrediction,
                                             self.target_var).mean()

        params = get_all_params(self.output_layer, trainable=True)
        update = momentum(trainLoss, params, learning_rate=0.001, momentum=0.9)
        trainFunc = theano.function([self.input_var, self.target_var],
                                    [trainLoss],
                                    updates=update)

        return trainFunc
Пример #2
0
    def __init__(self, feature_size, lr, beta):
        self.beta = beta
        self.input_var = T.matrix('inputs', dtype=floatX)
        target_var = T.vector('targets', dtype=floatX)
        network = self._build_mlp(feature_size, 6)
        prediction = lasagne.layers.get_output(network)
        loss = lasagne.objectives.squared_error(prediction, target_var)
        mean_loss = loss.mean()
        var_loss = loss.var()
        params = lasagne.layers.get_all_params(network, trainable=True)
        updates = momentum(mean_loss, params, learning_rate=lr, momentum=0.2)

        self.predict_f = theano.function([self.input_var], prediction)
        self.train_f = theano.function([self.input_var, target_var],
                                       [mean_loss, var_loss], updates=updates)
Пример #3
0
def get_updates(nnet, train_obj, trainable_params, solver=None):

    implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop",
                           "adadelta", "adam", "adamax")

    if solver not in implemented_solvers:
        nnet.sgd_solver = "adam"
    else:
        nnet.sgd_solver = solver

    if nnet.sgd_solver == "sgd":
        updates = l_updates.sgd(train_obj,
                                trainable_params,
                                learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "momentum":
        updates = l_updates.momentum(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     momentum=Cfg.momentum)
    elif nnet.sgd_solver == "nesterov":
        updates = l_updates.nesterov_momentum(train_obj,
                                              trainable_params,
                                              learning_rate=Cfg.learning_rate,
                                              momentum=Cfg.momentum)
    elif nnet.sgd_solver == "adagrad":
        updates = l_updates.adagrad(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "rmsprop":
        updates = l_updates.rmsprop(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate,
                                    rho=Cfg.rho)
    elif nnet.sgd_solver == "adadelta":
        updates = l_updates.adadelta(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     rho=Cfg.rho)
    elif nnet.sgd_solver == "adam":
        updates = l_updates.adam(train_obj,
                                 trainable_params,
                                 learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "adamax":
        updates = l_updates.adamax(train_obj,
                                   trainable_params,
                                   learning_rate=Cfg.learning_rate)

    return updates
Пример #4
0
    def __init__(self, *args, **kwargs):
        super(TrainerMixin, self).__init__(*args, **kwargs)
        input_var = tensor.tensor4('inputs')
        target_var = tensor.ivector('targets')

        loss, _ = loss_acc(self.model,
                           input_var,
                           target_var,
                           deterministic=False)
        layers = get_all_layers(self.model)
        decay = regularize_layer_params(layers, l2) * 0.0001
        loss = loss + decay

        params = get_all_params(self.model, trainable=True)
        updates = momentum(loss,
                           params,
                           momentum=0.9,
                           learning_rate=self.learning_rate)
        self.set_training(input_var, target_var, loss, updates)
Пример #5
0
    def trainFunction(self):

        startTime = time.time()

        trainPrediction = get_output(self.sectorNet)
        trainLoss = categorical_crossentropy(trainPrediction,
                                             self.targetVar).mean()
        trainACC = T.mean(T.eq(T.argmax(trainPrediction, axis=1),
                               self.targetVar),
                          dtype=theano.config.floatX)

        params = get_all_params(self.sectorNet, trainable=True)
        update = momentum(trainLoss, params, learning_rate=0.001, momentum=0.9)
        trainFunc = theano.function([self.inputVar, self.targetVar],
                                    [trainLoss, trainACC],
                                    updates=update)
        self.logger.info(
            'Compiling the train function, which spends {}.'.format(
                time.time() - startTime))

        return trainFunc
Пример #6
0
def Train(options,init_params,build_model,DataHandler):
	load=options['load'];
	loadHis=options['loadHis'];
	saveto=options['saveto'];
	loadfrom=options['loadfrom'];
	dataset=options['dataset'];
	last_n=options['last_n'];
	fsize=options['videosize'];


	print ">>>init params & build graph";
	tparams=init_params(options);
	cost,preds,inner_state,inps,use_noise=build_model(options,tparams);
	print "build done"

	print ">>>compile cost&updates function";
	start=time.time();
	f=theano.function(inps,[cost,preds],allow_input_downcast=True,on_unused_input='ignore');

	print "cost function ready"
	if options['finetune']:
		updates=momentum(cost, itemlist(tparams), options['lrate'], momentum=options['momentum']);
	else:
		updates=adam(cost, itemlist(tparams), learning_rate=options['lrate'], beta1=0.9, beta2=0.999, epsilon=1e-08); 
	print len(itemlist(tparams))
	print "updates ready",len(updates)
	f_update=theano.function(inps,[cost,preds],updates=updates,allow_input_downcast=True,on_unused_input='ignore');
	print "update function ready"
	print "compile finish, use %.1fmin"%((time.time()-start)/60);

	print '>>>Optimization'
	# ready dataset
	dh_train = DataHandler(options['dataset'],datatype=0,fps=options['fps']); dh_train.SetMode('source');
	dh_valid = DataHandler(options['dataset'],datatype=1,fps=options['fps']); dh_valid.SetMode('source');
	
	train_log=np.empty((0,4),dtype='float32');
	min_valid_cost=1e8;
	max_valid_acc=0;
	if loadHis and os.path.exists(loadfrom):
		print "load log history from",loadfrom
		train_log = np.load(loadfrom)['train_log'];
		min_valid_cost=train_log[:,2].min();
	 	max_valid_acc=train_log[:,3].max();

	train_num=dh_train.batch_num;  # should be set to dh_train.batch_num
	for epochidx in xrange(options['max_epochs']):
		use_noise.set_value(1.0);
		dh_train.Reset();
		print 'Epoch ', epochidx
		start=time.time();
		for vidx in xrange(train_num):
			x,mask,y=dh_train.GetSingleVideoFromSource(size=fsize,scale=1);
			x=x.reshape([x.shape[0],x.shape[1],fsize,fsize,3]);
			x=x.transpose([0,1,4,2,3]);
			x=x.reshape([x.shape[0],x.shape[1],-1]);
			cost,preds=f_update(x,mask,y);

			acc=((y.mean(0)).argmax(1)==preds).mean();
			print cost,acc;
			# print tparams['recog/cnn_conv2_w'].get_value().sum(),tparams['recog/cnn_conv3_w'].get_value().sum(),tparams['recog/cnn_conv4_w'].get_value().sum(),tparams['recog/cnn_conv5_w'].get_value().sum(),(tparams['recog/cnn_conv5_w'].get_value()**2).sum()
			if ((vidx+1)%100==0):
				print "%d/%d, use %.1fmin"%(vidx+1,dh_train.batch_num,(time.time()-start)/60.0);
				start=time.time();


		use_noise.set_value(0.0);
		#compute train error
		dh_train.Reset(); 
		print ">>train cost";
		tcost,tacc=Predict(options,f,dh_train,verbose=True,train_num=200);
		print "cost: %.3f, acc: %.3f"%(tcost,tacc);

		#compute valid error
		dh_valid.Reset();
		print ">>valid cost";
		vcost,vacc=Predict(options,f,dh_valid,verbose=True);
		print "cost: %.3f, acc: %.3f"%(vcost,vacc);

		print ">>save point:",options['saveto'];
		train_log=np.append(train_log,np.array([tcost,tacc,vcost,vacc])[None,...],axis=0);
		# train_log.append([tcost,tacc,vcost,vacc]);
		params = unzip(tparams);
		np.savez(saveto, train_log=train_log, options=options, **params);

		if (vcost<min_valid_cost):
			min_valid_cost=vcost;
			max_valid_acc=max(max_valid_acc,vacc);
			print ">>save best:",options['bestsaveto'];
			np.savez(options['bestsaveto'], train_log=train_log, options=options, **params);
		elif (vacc>max_valid_acc):
			max_valid_acc=vacc;
			min_valid_cost=min(min_valid_cost,vcost);
			print ">>save best:",options['bestsaveto'];
			np.savez(options['bestsaveto'], train_log=train_log, options=options, **params);
Пример #7
0
 def update(all_grads, all_params, learning_rate):
     """ Compute updates from gradients """
     return momentum(all_grads, all_params, learning_rate, momentum=m)
Пример #8
0
def main(cf):

    ########
    # DATA #
    ########

    print 'Creating data generators...'
    train_iterator, valid_iterator, test_iterator = create_data_generators(cf)

    ##############################
    # COST, GRADIENT AND UPDATES #
    ##############################

    print 'Building model...'

    cost, accuracy = cf.model.compute_cost(deterministic=False)
    cost_val, accuracy_val = cf.model.compute_cost(deterministic=True)

    params = get_all_params(cf.model.net, trainable=True)

    if cf.algo == 'adam':
        updates = adam(cost, params, cf.learning_rate)
    elif cf.algo == 'sgd':
        updates = sgd(cost, params, cf.learning_rate)
    elif cf.algo == 'momentum':
        updates = momentum(cost, params, cf.learning_rate)
    else:
        raise ValueError('Specified algo does not exist')

    ##############
    # MONITORING #
    ##############

    print 'Creating extensions and compiling functions...',

    train_monitor = TrainMonitor(cf.train_freq_print, cf.model.vars,
                                 [cost, accuracy], updates)

    monitoring_vars = [cost_val, accuracy_val]
    valid_monitor = ValMonitor('Validation', cf.valid_freq_print,
                               cf.model.vars, monitoring_vars, valid_iterator)

    test_monitor = ValMonitor('Test', cf.valid_freq_print, cf.model.vars,
                              monitoring_vars, valid_iterator)

    train_saver = VariableSaver(train_monitor, cf.dump_every_batches,
                                cf.dump_path, 'train')

    valid_saver = VariableSaver(valid_monitor, cf.dump_every_batches,
                                cf.dump_path, 'valid')

    test_saver = VariableSaver(test_monitor, None, cf.dump_path, 'test')

    # Ending conditions
    end_conditions = []
    if hasattr(cf, 'max_iter'):
        end_conditions.append(MaxIteration(cf.max_iter))
    if hasattr(cf, 'max_time'):
        end_conditions.append(MaxTime(cf.max_iter))

    extensions = [
        valid_monitor, test_monitor, train_saver, valid_saver, test_saver
    ]

    train_m = Trainer(train_monitor, train_iterator, extensions,
                      end_conditions)

    ############
    # TRAINING #
    ############

    train_m.train()
Пример #9
0
    def _prepare(self, X, y, X_valid=None, y_valid=None, sample_weight=None,
                 whole_dataset_in_device=True):

        self._stats = []
        self._class_label_encoder = LabelEncoder()
        if self.is_classification is True:
            self._class_label_encoder.fit(y)
            self.classes_ = self._class_label_encoder.classes_
            y = self._class_label_encoder.transform(y).astype(y.dtype)
            self.y_train_transformed = y
            if y_valid is not None:
                y_valid_transformed = self._class_label_encoder.transform(
                    y_valid).astype(y_valid.dtype)

        self._l_x_in = layers.InputLayer(shape=(None, X.shape[1]))
        batch_index, X_batch, y_batch, batch_slice = get_theano_batch_variables(
            self.batch_size, y_softmax=self.is_classification)

        if sample_weight is not None:
            t_sample_weight = T.vector('sample_weight')
            sample_weight = sample_weight.astype(theano.config.floatX)
        else:
            t_sample_weight = T.scalar('sample_weight')

        if self.is_classification is True:
            y_dim = len(set(y.flatten().tolist()))
        else:
            y_dim = y.shape[1]

        self._prediction_layer = self._build_model(y_dim)
        self._layers = layers.get_all_layers(self._prediction_layer)
        self._build_prediction_functions(X_batch, self._prediction_layer)

        if self.input_noise_function is None:
            output = layers.get_output(self._prediction_layer, X_batch)

        else:
            X_batch_noisy = self.input_noise_function(X_batch)
            output = layers.get_output(self._prediction_layer, X_batch_noisy)

        if self.is_classification:
            loss = -T.mean(t_sample_weight * T.log(output)
                           [T.arange(y_batch.shape[0]), y_batch])
        else:
            loss = T.mean(
                t_sample_weight * T.sum((output - y_batch) ** 2, axis=1))

        loss_unreg = loss

        all_params = layers.get_all_params(self._prediction_layer)
        if self._output_softener_coefs is not None:
            all_params.append(self._output_softener_coefs)

        W_params = layers.get_all_param_values(
            self._prediction_layer, regularizable=True)

        # regularization
        if self.L1_factor is not None:
            for L1_factor_layer, W in zip(self.L1_factor, W_params):
                loss = loss + L1_factor_layer * T.sum(abs(W))

        if self.L2_factor is not None:
            for L2_factor_layer, W in zip(self.L2_factor, W_params):
                loss = loss + L2_factor_layer * T.sum(W**2)

        if self.optimization_method == 'nesterov_momentum':
            gradient_updates = updates.nesterov_momentum(loss, all_params, learning_rate=self.learning_rate,
                                                         momentum=self.momentum)
        elif self.optimization_method == 'adadelta':
            # don't need momentum there
            gradient_updates = updates.adadelta(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'adam':
            gradient_updates = updates.Adam(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'momentum':
            gradient_updates = updates.momentum(
                loss, all_params, learning_rate=self.learning_rate,
                momentum=self.momentum
            )
        elif self.optimization_method == 'adagrad':
            gradient_updates = updates.adadelta(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'rmsprop':
            gradient_updates = updates.adadelta(
                loss, all_params, learning_rate=self.learning_rate)
        elif self.optimization_method == 'sgd':
            gradient_updates = updates.sgd(
                loss, all_params, learning_rate=self.learning_rate,
            )
        else:
            raise Exception("wrong optimization method")

        nb_batches = X.shape[0] // self.batch_size
        if (X.shape[0] % self.batch_size) != 0:
            nb_batches += 1

        X = X.astype(theano.config.floatX)
        if self.is_classification == True:
            y = y.astype(np.int32)
        else:
            y = y.astype(theano.config.floatX)

        if whole_dataset_in_device == True:
            X_shared = theano.shared(X, borrow=True)
            y_shared = theano.shared(y, borrow=True)

            givens = {
                X_batch: X_shared[batch_slice],
                y_batch: y_shared[batch_slice]
            }

            if sample_weight is not None:
                sample_weight_shared = theano.shared(
                    sample_weight, borrow=True)
                givens[t_sample_weight] = sample_weight_shared[batch_slice]
            else:
                givens[t_sample_weight] = T.as_tensor_variable(
                    np.array(1., dtype=theano.config.floatX))

            iter_update_batch = theano.function(
                [batch_index], loss,
                updates=gradient_updates,
                givens=givens,

            )
        else:
            if sample_weight is None:
                iter_update_gradients = theano.function(
                    [X_batch, y_batch],
                    loss,
                    updates=gradient_updates,
                    givens={t_sample_weight: T.as_tensor_variable(
                        np.array(1., dtype=theano.config.floatX))},

                )

                def iter_update_batch(batch_index):
                    sl = slice(batch_index * self.batch_size,
                               (batch_index + 1) * self.batch_size)
                    return iter_update_gradients(X[sl], y[sl])

            else:
                iter_update_gradients = theano.function(
                    [X_batch, y_batch, t_sample_weight],
                    loss,
                    updates=gradient_updates
                )

                def iter_update_batch(batch_index):
                    sl = slice(batch_index * self.batch_size,
                               (batch_index + 1) * self.batch_size)
                    return iter_update_gradients(X[sl], y[sl], sample_weight[sl])
        self._iter_update_batch = iter_update_batch
        self._get_loss = theano.function(
            [X_batch, y_batch, t_sample_weight], loss_unreg, allow_input_downcast=True)

        def iter_update(epoch):
            losses = []
            #self.learning_rate.set_value(self.learning_rate.get_value() * np.array(0.99, dtype=theano.config.floatX))
            for i in xrange(nb_batches):
                losses.append(self._iter_update_batch(i))
                # max norm
                if self.max_norm is not None:
                    for max_norm_layer, layer in zip(self.max_norm, self._layers):
                        layer.W = updates.norm_constraint(
                            layer.W, self.max_norm)

            losses = np.array(losses)

            d = OrderedDict()
            d["epoch"] = epoch
            #d["loss_train_std"] = losses.std()

            #d["loss_train"] = losses.mean()
            d["loss_train"] = self._get_loss(
                self.X_train, self.y_train_transformed, 1.)

            d["accuracy_train"] = (
                self.predict(self.X_train) == self.y_train).mean()

            if X_valid is not None and y_valid is not None:
                d["loss_valid"] = self._get_loss(
                    X_valid, y_valid_transformed, 1.)

                if self.is_classification == True:
                    d["accuracy_valid"] = (
                        self.predict(X_valid) == y_valid).mean()

            if self.verbose > 0:
                if (epoch % self.report_each) == 0:
                    print(tabulate([d], headers="keys"))
            self._stats.append(d)
            return d

        def quitter(update_status):
            cur_epoch = len(self._stats) - 1
            if self.patience_nb_epochs > 0:
                # patience heuristic (for early stopping)
                cur_patience_stat = update_status[self.patience_stat]

                if self.cur_best_patience_stat is None:
                    self.cur_best_patience_stat = cur_patience_stat
                    first_time = True
                else:
                    first_time = False

                thresh = self.patience_progression_rate_threshold
                if cur_patience_stat < self.cur_best_patience_stat * thresh or first_time:

                    if self.verbose >= 2:
                        fmt = "--Early stopping-- good we have a new best value : {0}={1}, last best : epoch {2}, value={3}"
                        print(fmt.format(self.patience_stat, cur_patience_stat,
                                         self.cur_best_epoch, self.cur_best_patience_stat))
                    self.cur_best_epoch = cur_epoch
                    self.cur_best_patience_stat = cur_patience_stat
                    if hasattr(self, "set_state") and hasattr(self, "get_state"):
                        self.cur_best_model = self.get_state()
                    else:
                        self.cur_best_model = pickle.dumps(
                            self.__dict__, protocol=pickle.HIGHEST_PROTOCOL)
                if (cur_epoch - self.cur_best_epoch) >= self.patience_nb_epochs:
                    finish = True
                    if hasattr(self, "set_state") and hasattr(self, "get_state"):
                        self.set_state(self.cur_best_model)
                    else:
                        self.__dict__.update(pickle.loads(self.cur_best_model))

                    self._stats = self._stats[0:self.cur_best_epoch + 1]
                    if self.verbose >= 2:
                        print("out of patience...take the model at epoch {0} and quit".format(
                            self.cur_best_epoch + 1))
                else:
                    finish = False
                return finish
            else:
                return False

        def monitor(update_status):
            pass

        def observer(monitor_output):
            pass

        return (iter_update, quitter, monitor, observer)
def Train(options,init_params,build_model,DataHandler):
	load=options['load'];
	loadHis=options['loadHis'];
	saveto=options['saveto'];
	loadfrom=options['loadfrom'];
	dataset=options['dataset'];
	last_n=options['last_n'];

	print ">>>init params & build graph";
	tparams=init_params(options);
	cost,preds,inner_state,inps,use_noise=build_model(options,tparams);
	print "build done"

	print ">>>compile cost&updates function";
	start=time.time();
	f=theano.function(inps,[cost,preds],allow_input_downcast=True,on_unused_input='ignore');

	
	constraint_params=ParamsFilter(tparams,prefix='recog/saliencyFgbg_w');



	if options['finetune']:
		updates=momentum(cost, itemlist(tparams), options['lrate'], momentum=options['momentum']);
	else:
		updates=adam(cost, itemlist(tparams), learning_rate=options['lrate'], beta1=0.9, beta2=0.999, epsilon=1e-08); 
	f_update=theano.function(inps,[cost,preds],updates=updates,allow_input_downcast=True,on_unused_input='ignore');
	print "compile finish, use %.1fmin"%((time.time()-start)/60);

	print '>>>Optimization'
	# ready dataset
	dh_train = DataHandler(options['dataset'],datatype=0,fps=options['fps']); dh_train.SetMode('single');
	dh_valid = DataHandler(options['dataset'],datatype=1,fps=options['fps']); dh_valid.SetMode('single');
	
	train_log=np.empty((0,4),dtype='float32');
	min_valid_cost=1e8;
	max_valid_acc=0;
	if load and loadHis and os.path.exists(loadfrom):
		print "load log history from",loadfrom
		train_log = np.load(loadfrom)['train_log'];
		min_valid_cost=train_log[:,2].min();
	 	max_valid_acc=train_log[:,3].max();

	train_num=dh_train.batch_num;  # should be set to dh_train.batch_num
	for epochidx in xrange(options['max_epochs']):
		use_noise.set_value(1.0);
		dh_train.Reset();
		print 'Epoch ', epochidx
		start=time.time();
		for vidx in xrange(train_num):
			x,mask,y=dh_train.GetSingleVideoFeature();
			# switch the last two  feature dim
			x=x.reshape([x.shape[0],x.shape[1],options['featureMaps'],options['locations']]); #1024*49
			x=x.transpose([0,1,3,2]);
			x=x.reshape([x.shape[0],x.shape[1],-1]);

			cost,preds=f_update(x,mask,y);
			if math.isnan(cost):
				print "cost is nan, exit";
				exit(-1);
			ParamsConstraint(constraint_params);  # make contraint
			# acc=((y.mean(0)).argmax(1)==preds).mean();
			#print "%.3f %.3f, use %.0fs"%(cost,acc,(time.time()-start));
			# print cost,acc #,preds.reshape([preds.shape[0]])
			if ((vidx+1)%100==0):
				print "%d/%d, use %.1fmin"%(vidx+1,dh_train.batch_num,(time.time()-start)/60.0);
				start=time.time();

		# if (epochidx%10!=0):
		# 	continue
		use_noise.set_value(0.0);
		#compute train error
		dh_train.Reset(); 
		print ">>train cost";
		tcost,tacc=Predict(options,f,dh_train,verbose=True,train_num=200);
		print "cost: %.3f, acc: %.3f"%(tcost,tacc);

		#compute valid error
		dh_valid.Reset();
		print ">>valid cost";
		vcost,vacc=Predict(options,f,dh_valid,verbose=True);
		print "cost: %.3f, acc: %.3f"%(vcost,vacc);

		print ">>save point:",options['saveto'];
		train_log=np.append(train_log,np.array([tcost,tacc,vcost,vacc])[None,...],axis=0);
		# train_log.append([tcost,tacc,vcost,vacc]);
		params = unzip(tparams);
		np.savez(saveto, train_log=train_log, options=options, **params);

		if (vcost<min_valid_cost):
			min_valid_cost=vcost;
			max_valid_acc=max(max_valid_acc,vacc);
			print ">>save best:",options['bestsaveto'];
			np.savez(options['bestsaveto'], train_log=train_log, options=options, **params);
		elif (vacc>max_valid_acc):
			max_valid_acc=vacc;
			min_valid_cost=min(min_valid_cost,vcost);
			print ">>save best:",options['bestsaveto'];
			np.savez(options['bestsaveto'], train_log=train_log, options=options, **params);
Пример #11
0
def model_class(ds, paths, param_arch, param_cost, param_updates, param_train):

    # create a log file containing the architecture configuration
    formatter = logging.Formatter('%(message)s')
    logger = logging.getLogger('log_config')
    if 'start_from_epoch' in param_train:
        name_tmp = 'config_from_epoch=%04d.log' % (
            param_train['start_from_epoch'])
    else:
        name_tmp = 'config.log'
    path_tmp = os.path.join(paths['exp'], name_tmp)
    if not os.path.isfile(path_tmp):
        handler = logging.FileHandler(
            path_tmp,
            mode='w')  # to append at the end of the file use: mode='a'
    else:
        raise Exception('[e] the log file ', name_tmp, ' already exists!')
    handler.setFormatter(formatter)
    handler.setLevel(logging.INFO)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    # input dimensions
    dim_desc = ds.descs_train[0].shape[1]
    dim_labels = ds.labels_train[0].shape[0]
    print(dim_labels)

    # architecture definition:
    print(("[i] architecture definition... "), end=' ')
    tic = time.time()
    if param_arch['type'] == 0:
        desc, patch_op, cla, net, logger = arch_class_00(
            dim_desc, dim_labels, param_arch, logger)
    elif param_arch['type'] == 1:
        desc, patch_op, cla, net, logger = arch_class_01(
            dim_desc, dim_labels, param_arch, logger)
    elif param_arch['type'] == 2:
        desc, patch_op, cla, net, logger = arch_class_02(
            dim_desc, dim_labels, param_arch, logger)
    else:
        raise Exception('[e] architecture not supported!')
    print(("%02.2fs" % (time.time() - tic)))

    # cost function definition:
    print(("[i] cost function definition... "), end=' ')
    tic = time.time()
    pred = LL.get_output(cla, deterministic=True)  # in case we use dropout
    feat = LL.get_output(net)
    target = T.ivector('target')
    # data term
    if param_cost['cost_func'] == 'cross_entropy':
        if param_arch['non_linearity'] == 'softmax':
            cost_dataterm = T.mean(
                LO.categorical_crossentropy(pred, target)
            )  # in the original code we were using *.mean() instead of T.mean(*)
        elif param_arch['non_linearity'] == 'log_softmax':
            cost_dataterm = T.mean(
                categorical_crossentropy_logdomain(pred, target))
    elif param_cost['cost_func'] == 'cross_entropy_stable':
        if param_arch['non_linearity'] == 'softmax':
            cost_dataterm = T.mean(
                categorical_crossentropy_stable(pred, target))
        else:
            raise Exception(
                '[e] the chosen cost function is not implemented for the chosen non-linearity!'
            )
    else:
        raise Exception('[e] the chosen cost function is not supported!')
    # classification accuracy
    acc = LO.categorical_accuracy(pred, target).mean()
    # regularization
    cost_reg = param_cost['mu'] * LR.regularize_network_params(cla, LR.l2)
    # cost function
    cost = cost_dataterm + cost_reg
    # get params
    params = LL.get_all_params(cla)
    # gradient definition
    grad = T.grad(cost, params)
    grad_norm = T.nlinalg.norm(T.concatenate([g.flatten() for g in grad]), 2)
    print(("%02.2fs" % (time.time() - tic)))

    # updates definition:
    print(("[i] gradient updates definition... "), end=' ')
    tic = time.time()
    if param_updates['method'] == 'momentum':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1.0
        else:
            raise Exception('[e] missing learning_rate parameter!')
        if param_updates.get('momentum') is not None:
            momentum = param_updates['momentum']  # default: 0.9
        else:
            raise Exception('[e] missing learning_rate parameter!')
        updates = LU.momentum(grad, params, learning_rate, momentum)
    elif param_updates['method'] == 'adagrad':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1.0
        else:
            raise Exception('[e] missing learning_rate parameter!')
        updates = LU.adagrad(grad, params, learning_rate)
    elif param_updates['method'] == 'adadelta':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1.0
        else:
            raise Exception('[e] missing learning_rate parameter!')
        updates = LU.adadelta(grad, params, learning_rate)
    elif param_updates['method'] == 'adam':
        if param_updates.get('learning_rate') is not None:
            learning_rate = param_updates['learning_rate']  # default: 1e-03
        else:
            raise Exception('[e] missing learning_rate parameter!')
        if param_updates.get('beta1') is not None:
            beta1 = param_updates['beta1']  # default: 0.9
        else:
            raise Exception('[e] missing beta1 parameter!')
        if param_updates.get('beta2') is not None:
            beta2 = param_updates['beta2']  # default: 0.999
        else:
            raise Exception('[e] missing beta2 parameter!')
        if param_updates.get('epsilon') is not None:
            epsilon = param_updates['epsilon']  # default: 1e-08
        else:
            raise Exception('[e] missing epsilon parameter!')
        updates = LU.adam(grad, params, learning_rate, beta1, beta2, epsilon)
    else:
        raise Exception('[e] updates method not supported!')
    print(("%02.2fs" % (time.time() - tic)))

    # train / test functions:
    funcs = dict()
    print(("[i] compiling function 'train'... "), end=' ')
    tic = time.time()
    funcs['train'] = theano.function(
        [desc.input_var, patch_op.input_var, target],
        [cost, cost_dataterm, cost_reg, grad_norm, acc],
        updates=updates,
        allow_input_downcast=True,
        on_unused_input='warn')
    print(("%02.2fs" % (time.time() - tic)))
    print(("[i] compiling function 'fwd'... "), end=' ')
    tic = time.time()
    funcs['fwd'] = theano.function(
        [desc.input_var, patch_op.input_var, target], [cost, grad_norm, acc],
        allow_input_downcast=True,
        on_unused_input='ignore')
    print(("%02.2fs" % (time.time() - tic)))
    print(("[i] compiling function 'pred'... "), end=' ')
    tic = time.time()
    funcs['pred'] = theano.function(
        [desc.input_var, patch_op.input_var, target], [pred],
        allow_input_downcast=True,
        on_unused_input='ignore')
    print(("%02.2fs" % (time.time() - tic)))
    print(("[i] compiling function 'feat'... "), end=' ')
    tic = time.time()
    funcs['feat'] = theano.function(
        [desc.input_var, patch_op.input_var, target], [feat],
        allow_input_downcast=True,
        on_unused_input='ignore')
    print(("%02.2fs" % (time.time() - tic)))

    # save cost function parameters to a config file
    logger.info('\nCost function parameters:')
    logger.info('   cost function = %s' % param_cost['cost_func'])
    logger.info('   mu            = %e' % param_cost['mu'])

    # save updates parameters to a config file
    logger.info('\nUpdates parameters:')
    logger.info('   method        = %s' % param_updates['method'])
    logger.info('   learning rate = %e' % param_updates['learning_rate'])
    if param_updates['method'] == 'momentum':
        logger.info('   momentum      = %e' % param_updates['momentum'])
    if param_updates['method'] == 'adam':
        logger.info('   beta1         = %e' % param_updates['beta1'])
        logger.info('   beta2         = %e' % param_updates['beta2'])
        logger.info('   epsilon       = %e' % param_updates['epsilon'])

    # save training parameters to a config file
    logger.info('\nTraining parameters:')
    logger.info('   epoch size = %d' % ds.epoch_size)

    return funcs, cla, updates
Пример #12
0
def main(cf):

    ########
    # DATA #
    ########

    print 'Creating data generators...'
    train_iterator, valid_iterator, test_iterator = create_data_generators(cf)

    ##############################
    # COST, GRADIENT AND UPDATES #
    ##############################

    print 'Building model...'

    cost, accuracy = cf.model.compute_cost(deterministic=False)
    cost_val, accuracy_val = cf.model.compute_cost(deterministic=True)

    params = get_all_params(cf.model.net, trainable=True)

    if cf.algo == 'adam':
        updates = adam(cost, params, cf.learning_rate)
    elif cf.algo == 'sgd':
        updates = sgd(cost, params, cf.learning_rate)
    elif cf.algo == 'momentum':
        updates = momentum(cost, params, cf.learning_rate)
    else:
        raise ValueError('Specified algo does not exist')

    ##############
    # MONITORING #
    ##############

    print 'Creating extensions and compiling functions...',

    train_monitor = TrainMonitor(
        cf.train_freq_print, cf.model.vars, [cost, accuracy], updates)

    monitoring_vars = [cost_val, accuracy_val]
    valid_monitor = ValMonitor(
        'Validation', cf.valid_freq_print, cf.model.vars, monitoring_vars,
        valid_iterator)

    test_monitor = ValMonitor(
        'Test', cf.valid_freq_print, cf.model.vars, monitoring_vars,
        valid_iterator)

    train_saver = VariableSaver(
        train_monitor, cf.dump_every_batches, cf.dump_path, 'train')

    valid_saver = VariableSaver(
        valid_monitor, cf.dump_every_batches, cf.dump_path, 'valid')

    test_saver = VariableSaver(test_monitor, None, cf.dump_path, 'test')

    # Ending conditions
    end_conditions = []
    if hasattr(cf, 'max_iter'):
        end_conditions.append(MaxIteration(cf.max_iter))
    if hasattr(cf, 'max_time'):
        end_conditions.append(MaxTime(cf.max_iter))

    extensions = [
        valid_monitor,
        test_monitor,

        train_saver,
        valid_saver,
        test_saver
    ]

    train_m = Trainer(train_monitor, train_iterator,
                      extensions, end_conditions)

    ############
    # TRAINING #
    ############

    train_m.train()