def test_adagrad(): max_epoch = 15 # Create an Nd gaussian functions to optimize. These functions are not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. for N in range(1, 5): center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), ((param-center).T))) loss = DummyLossWithGradient(cost, param) # Even with a really high gradient step, AdaGrad can still converge. # Actually, it is faster than using the optimal gradient step with SGD. optimizer = AdaGrad(loss, lr=100, eps=1e-1) trainer = Trainer(optimizer, DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Monitor the gradient of `loss` w.r.t. to `param`. tracker = tasks.Tracker(loss.gradients[param]) trainer.append_task(tracker) trainer.train() # After 15 epochs, param should be around the center and gradients near 0. assert_array_almost_equal(param.get_value(), center) assert_array_almost_equal(tracker[0], 0.)
def test_max_epoch_stopping(): max_epoch = 7 trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) trainer.train() assert_equal(trainer.status.current_epoch, max_epoch)
def _build_experiment(self): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5 * np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum( 0.5 * T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))), (param - center).T)) loss = DummyLossWithGradient(cost, param) optimizer = SGD(loss) direction_modifier = DecreasingLearningRate(lr=self.lr, dc=self.dc) optimizer.append_direction_modifier(direction_modifier) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger( views.MonitorVariable( list(direction_modifier.parameters.values())[0])) trainer.append_task(logger) return trainer, logger, direction_modifier
def _build_experiment(self, threshold=1): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5 * np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum( 0.5 * T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))), (param - center).T)) loss = DummyLossWithGradient(cost, param) gradient_clipping = DirectionClipping(threshold=threshold) loss.append_gradient_modifier(gradient_clipping) optimizer = SGD(loss) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger( views.MonitorVariable(list(optimizer.directions.values())[0]), views.MonitorVariable(list(loss.gradients.values())[0]), views.MonitorVariable(list(loss.orig_gradients.values())[0]), views.MonitorVariable(gradient_clipping.grad_norm)) trainer.append_task(logger) return trainer, logger, gradient_clipping
def test_simple_perceptron(): with Timer("Loading dataset"): trainset, validset, testset = load_mnist() with Timer("Creating model"): # TODO: We should the number of different targets in the dataset, # but I'm not sure how to do it right (keep in mind the regression?). output_size = 10 model = Perceptron(trainset.input_size, output_size) model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): optimizer = SGD(loss=NLL(model, trainset)) optimizer.append_update_rule(ConstantLearningRate(0.0001)) with Timer("Building trainer"): # Train for 10 epochs batch_scheduler = MiniBatchScheduler(trainset, 100) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(10)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Print mean/stderror of classification errors. classif_error = views.ClassificationError(model.use, validset) trainer.append_task( tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror)) with Timer("Training"): trainer.train()
def test_simple_perceptron(): with Timer("Loading dataset"): trainset, validset, testset = load_mnist() with Timer("Creating model"): # TODO: We should the number of different targets in the dataset, # but I'm not sure how to do it right (keep in mind the regression?). output_size = 10 model = Perceptron(trainset.input_size, output_size) model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): optimizer = SGD(loss=NLL(model, trainset)) optimizer.append_direction_modifier(ConstantLearningRate(0.0001)) with Timer("Building trainer"): # Train for 10 epochs batch_scheduler = MiniBatchScheduler(trainset, 100) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(10)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Print mean/stderror of classification errors. classif_error = views.ClassificationError(model.use, validset) trainer.append_task(tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror)) with Timer("Training"): trainer.train()
def _build_experiment(self): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T)) loss = DummyLossWithGradient(cost, param) optimizer = SGD(loss) direction_modifier = ConstantLearningRate(lr=self.lr) optimizer.append_direction_modifier(direction_modifier) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger(views.MonitorVariable(list(direction_modifier.parameters.values())[0])) trainer.append_task(logger) return trainer, logger, direction_modifier
def test_sgd(): # Create simple Nd gaussian functions to optimize. These functions are # (perfectly) well-conditioned so it should take only one gradient step # to converge using 1/L, where L is the largest eigenvalue of the hessian. max_epoch = 2 for N in range(1, 5): center = np.arange(1, N+1)[None, :].astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), T.eye(N)), (param-center).T)) loss = DummyLossWithGradient(cost, param) trainer = Trainer(SGD(loss), DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Monitor the gradient of `loss` w.r.t. to `param`. gparam = tasks.MonitorVariable(loss.gradients[param]) trainer.append_task(gparam) trainer.train() # Since the problem is well-conditionned and we use an optimal gradient step 1/L, # two epochs should be enough for `param` to be around `center` and the gradients near 0. assert_array_almost_equal(param.get_value(), center) assert_array_almost_equal(gparam.value, 0.) # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. #cost = T.sum(N*0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), ((param-center).T))) max_epoch = 80 N = 4 center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T)) loss = DummyLossWithGradient(cost, param) trainer = Trainer(SGD(loss), DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) #trainer.append_task(tasks.PrintVariable("Loss param : {}", param)) #trainer.append_task(tasks.PrintVariable("Loss gradient: {}", loss.gradients[param])) # Monitor the gradient of `loss` w.r.t. to `param`. gparam = tasks.MonitorVariable(loss.gradients[param]) trainer.append_task(gparam) trainer.train() # Since the problem is well-conditionned and we use an optimal gradient step 1/L, # two epochs should be enough for `param` to be around `center` and the gradients near 0. assert_array_almost_equal(param.get_value(), center, decimal=6) assert_array_almost_equal(gparam.value, 0.)
def _build_experiment(self, threshold=1): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T)) loss = DummyLossWithGradient(cost, param) gradient_clipping = DirectionClipping(threshold=threshold) loss.append_gradient_modifier(gradient_clipping) optimizer = SGD(loss) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger(views.MonitorVariable(list(optimizer.directions.values())[0]), views.MonitorVariable(list(loss.gradients.values())[0]), views.MonitorVariable(list(loss.orig_gradients.values())[0]), views.MonitorVariable(gradient_clipping.grad_norm)) trainer.append_task(logger) return trainer, logger, gradient_clipping
def _build_trainer(nb_epochs, optimizer_cls): print( "Will build a trainer is going to train a Perceptron for {0} epochs." .format(nb_epochs)) print("Building model") model = Perceptron(trainset.input_size, nb_classes) model.initialize(initer.UniformInitializer(random_seed=1234)) print("Building optimizer") loss = NLL(model, trainset) optimizer = optimizer_cls(loss=loss) print("Optimizer: {}".format(type(optimizer).__name__)) #optimizer = SGD(loss=loss) #optimizer.append_direction_modifier(ConstantLearningRate(0.1)) # Use mini batches of 100 examples. batch_scheduler = MiniBatchScheduler(trainset, 100) print("Building trainer") trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) # Print NLL mean/stderror. nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset)) logger = tasks.Logger(loss_monitor, avg_loss, nll.mean) trainer.append_task(logger, avg_loss) # Train for `nb_epochs` epochs (stopping criteria should be added at the end). trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def _build_trainer(nb_epochs, optimizer_cls): print("Will build a trainer is going to train a Perceptron for {0} epochs.".format(nb_epochs)) print("Building model") model = Perceptron(trainset.input_size, nb_classes) model.initialize(initer.UniformInitializer(random_seed=1234)) print("Building optimizer") loss = NLL(model, trainset) optimizer = optimizer_cls(loss=loss) print("Optimizer: {}".format(type(optimizer).__name__)) #optimizer = SGD(loss=loss) #optimizer.append_direction_modifier(ConstantLearningRate(0.1)) # Use mini batches of 100 examples. batch_scheduler = MiniBatchScheduler(trainset, 100) print("Building trainer") trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) # Print NLL mean/stderror. nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset)) logger = tasks.Logger(loss_monitor, avg_loss, nll.mean) trainer.append_task(logger, avg_loss) # Train for `nb_epochs` epochs (stopping criteria should be added at the end). trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def test_simple_perceptron(): #Loading dataset trainset, validset, testset = load_mnist() #Creating model nb_classes = 10 model = Perceptron(trainset.input_size, nb_classes) model.initialize() # By default, uniform initialization. #Building optimizer loss = NLL(model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.1)) # Train for 10 epochs batch_scheduler = MiniBatchScheduler(trainset, 100) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(10)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print NLL mean/stderror. nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset)) trainer.append_task(tasks.Print("Validset - NLL : {0:.1%} ± {1:.1%}", nll.mean, nll.stderror)) # Print mean/stderror of classification errors. classif_error = views.LossView(loss=ClassificationError(model, validset), batch_scheduler=FullBatchScheduler(validset)) trainer.append_task(tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror)) trainer.train()
def test_early_stopping(): MAX_EPOCH = 100 # Add a max epoch just in case we got an infinite loop. class DummyCost(View): def __init__(self, initial_cost, costs): super().__init__() self.initial_cost = initial_cost self.costs = costs self.cpt = 0 def update(self, status): if status.current_update == 0: return self.initial_cost cost = self.costs[self.cpt] self.cpt += 1 return cost # 20 identical costs but should stop after 9 unchanged epochs. constant_cost = DummyCost(1, np.ones(20)) lookahead = 9 def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(constant_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 1.) assert_equal(constant_cost.cpt, lookahead) # `lookahead` identical costs followed by `lookahead` lower identical costs. lookahead = 9 costs = np.r_[np.ones(lookahead - 1), np.zeros(lookahead + 1)] simple_cost = DummyCost(1, costs) def callback(task, status): # This callback function should be called once after `lookahead` epoch. if status.current_epoch != lookahead: msg = "Callback should be fired up at epoch #{} not #{}.".format( lookahead, status.current_epoch) raise NameError(msg) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, 2 * lookahead) assert_equal(early_stopping.best_epoch, lookahead) assert_equal(early_stopping.best_cost, 0.) # 20 increasing costs but should stop after 9 increasing epochs. lookahead = 9 costs = range(20) increasing_cost = DummyCost(0, costs) def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 0.) # Test `min_nb_epochs` lookahead = 9 min_nb_epochs = 5 costs = range(20) increasing_cost = DummyCost(0, costs) early_stopping = stopping_criteria.EarlyStopping( increasing_cost, lookahead, min_nb_epochs=min_nb_epochs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead + min_nb_epochs) # Test that at the end the model is the best one. # `lookahead` decreasing costs followed by `lookahead+1` constant identical costs. lookahead = 9 costs = np.r_[-np.arange(lookahead), np.zeros(lookahead + 1)] simple_cost = DummyCost(1, costs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) model = trainer._optimizer.loss.model # Add some parameters to the model. model.parameters.extend([sharedX(np.zeros(4)), sharedX(np.zeros((3, 5)))]) # Callback that will change model parameters after each epoch. def callback(task, status): for param in model.parameters: param.set_value(param.get_value() + 1) trainer.append_task(tasks.Callback(callback)) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() for param in model.parameters: assert_array_equal(param.get_value(), lookahead * np.ones_like(param.get_value()))
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'use_layer_normalization': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder( args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_mask_classifier_dataset( args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_mask_classifier_dataset( args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) batch_scheduler = MaskClassifierBatchScheduler( trainset, hyperparams['batch_size'], seed=hyperparams['seed']) print("An epoch will be composed of {} updates.".format( batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension model = FFNN_Classification(trainset_volume_manager, input_size, hyperparams['hidden_sizes']) model.initialize( weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = BinaryCrossEntropy(model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier( DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager valid_loss = BinaryCrossEntropy(model, validset) valid_batch_scheduler = MaskClassifierBatchScheduler( validset, hyperparams['batch_size'], seed=hyperparams['seed']) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task( tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable( T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping( lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()
def main(): parser = buildArgsParser() args = parser.parse_args() # Extract experiments hyperparameters hyperparams = dict(vars(args)) # Remove hyperparams that should not be part of the hash del hyperparams['max_epoch'] del hyperparams['keep'] del hyperparams['force'] del hyperparams['name'] # Get/generate experiment name experiment_name = args.name if experiment_name is None: experiment_name = utils.generate_uid_from_string(repr(hyperparams)) # Create experiment folder experiment_path = pjoin(".", "experiments", experiment_name) resuming = False if os.path.isdir(experiment_path) and not args.force: resuming = True print("### Resuming experiment ({0}). ###\n".format(experiment_name)) # Check if provided hyperparams match those in the experiment folder hyperparams_loaded = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) if hyperparams != hyperparams_loaded: print("{\n" + "\n".join(["{}: {}".format(k, hyperparams[k]) for k in sorted(hyperparams.keys())]) + "\n}") print("{\n" + "\n".join(["{}: {}".format(k, hyperparams_loaded[k]) for k in sorted(hyperparams_loaded.keys())]) + "\n}") print("The arguments provided are different than the one saved. Use --force if you are certain.\nQuitting.") sys.exit(1) else: if os.path.isdir(experiment_path): shutil.rmtree(experiment_path) os.makedirs(experiment_path) utils.save_dict_to_json_file(pjoin(experiment_path, "hyperparams.json"), hyperparams) with Timer("Loading dataset"): trainset, validset, testset = datasets.load(args.dataset) image_shape = (28, 28) nb_channels = 1 + (args.use_mask_as_input is True) batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(trainset, args.batch_size, use_mask_as_input=args.use_mask_as_input, seed=args.ordering_seed) print("{} updates per epoch.".format(len(batch_scheduler))) with Timer("Building model"): if args.use_lasagne: if args.with_residual: model = DeepConvNadeWithResidualUsingLasagne(image_shape=image_shape, nb_channels=nb_channels, convnet_blueprint=args.convnet_blueprint, fullnet_blueprint=args.fullnet_blueprint, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input) else: model = DeepConvNadeUsingLasagne(image_shape=image_shape, nb_channels=nb_channels, convnet_blueprint=args.convnet_blueprint, fullnet_blueprint=args.fullnet_blueprint, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input, use_batch_norm=args.batch_norm) elif args.with_residual: model = DeepConvNADEWithResidual(image_shape=image_shape, nb_channels=nb_channels, convnet_blueprint=args.convnet_blueprint, fullnet_blueprint=args.fullnet_blueprint, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input) else: builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input) if args.blueprints_seed is not None: convnet_blueprint, fullnet_blueprint = generate_blueprints(args.blueprint_seed, image_shape[0]) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) else: if args.convnet_blueprint is not None: builder.build_convnet_from_blueprint(args.convnet_blueprint) if args.fullnet_blueprint is not None: builder.build_fullnet_from_blueprint(args.fullnet_blueprint) model = builder.build() # print(str(model.convnet)) # print(str(model.fullnet)) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) print(str(model)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, trainset) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) if args.max_epoch is not None: trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. model.deterministic = True # For batch normalization, see https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/normalization.py#L198 nll = views.LossView(loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(validset, batch_size=0.1*len(validset), use_mask_as_input=args.use_mask_as_input, keep_mask=True, seed=args.ordering_seed+1)) # trainer.append_task(tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror, each_k_update=100)) trainer.append_task(tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) # direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm, each_k_update=50)) # Save training progression def save_model(*args): trainer.save(experiment_path) trainer.append_task(stopping_criteria.EarlyStopping(nll.mean, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_model)) trainer.build_theano_graph() if resuming: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train() trainer.save(experiment_path) model.save(experiment_path)
def test_simple_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 3 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset))) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.build_theano_graph() with Timer("Training"): trainer.train() with Timer("Checking the probs for all possible inputs sum to 1"): rng = np.random.RandomState(ordering_seed) D = np.prod(image_shape) inputs = cartesian([[0, 1]] * int(D), dtype=np.float32) ordering = np.arange(D, dtype=np.int32) rng.shuffle(ordering) symb_input = T.vector("input") symb_input.tag.test_value = inputs[-len(inputs) // 4] symb_ordering = T.ivector("ordering") symb_ordering.tag.test_value = ordering nll_of_x_given_o = theano.function([symb_input, symb_ordering], model.nll_of_x_given_o( symb_input, symb_ordering), name="nll_of_x_given_o") #theano.printing.pydotprint(nll_of_x_given_o, '{0}_nll_of_x_given_o_{1}'.format(model.__class__.__name__, theano.config.device), with_ids=True) for i in range(nb_orderings): print("Ordering:", ordering) ordering = np.arange(D, dtype=np.int32) rng.shuffle(ordering) nlls = [] for no, input in enumerate(inputs): print("{}/{}".format(no, len(inputs)), end='\r') nlls.append(nll_of_x_given_o(input, ordering)) print("{}/{} Done".format(len(inputs), len(inputs))) p_x = np.exp(np.logaddexp.reduce(-np.array(nlls))) print("Sum of p(x) for all x:", p_x) assert_almost_equal(p_x, 1., decimal=5)
def _build_trainer(nb_epochs): print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(nb_epochs)) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize(initer.UniformInitializer(random_seed=1234)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset), keep_mask=True)) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'feed_previous_direction': False, 'normalize': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) if args.view: tsne_view(trainset, trainset_volume_manager) sys.exit(0) batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True) print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension if hyperparams['feed_previous_direction']: input_size += 3 model = model_factory(hyperparams, input_size=input_size, output_size=batch_scheduler.target_size, volume_manager=trainset_volume_manager) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = loss_factory(hyperparams, model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # if args.learn_to_stop: # l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error)) # avg_l2err = tasks.AveragePerEpoch(l2err_monitor) # trainer.append_task(avg_l2err) # # crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy)) # avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor) # trainer.append_task(avg_crossentropy) # # trainer.append_task(tasks.Print("Avg. training L2 err: : {}", avg_l2err)) # trainer.append_task(tasks.Print("Avg. training stopping: : {}", avg_crossentropy)) # trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100)) # trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100)) # Print NLL mean/stderror. # train_loss = L2DistanceForSequences(model, trainset) # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000, # noisy_streamlines_sigma=None, # nb_updates_per_epoch=None, # seed=1234) # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler) # trainer.append_task(tasks.Print("Trainset - Error : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager valid_loss = loss_factory(hyperparams, model, validset) valid_batch_scheduler = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task(tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) if args.view: import pylab as plt def _plot(*args, **kwargs): plt.figure(1) plt.clf() plt.show(False) plt.subplot(121) plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train") plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid") plt.legend() plt.subplot(122) plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||") plt.draw() trainer.append_task(tasks.Callback(_plot)) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()
def test_early_stopping(): MAX_EPOCH = 100 # Add a max epoch just in case we got an infinite loop. class DummyCost(View): def __init__(self, initial_cost, costs): super().__init__() self.initial_cost = initial_cost self.costs = costs self.cpt = 0 def update(self, status): if status.current_update == 0: return self.initial_cost cost = self.costs[self.cpt] self.cpt += 1 return cost # 20 identical costs but should stop after 9 unchanged epochs. constant_cost = DummyCost(1, np.ones(20)) lookahead = 9 def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(constant_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 1.) assert_equal(constant_cost.cpt, lookahead) # `lookahead` identical costs followed by `lookahead` lower identical costs. lookahead = 9 costs = np.r_[np.ones(lookahead-1), np.zeros(lookahead+1)] simple_cost = DummyCost(1, costs) def callback(task, status): # This callback function should be called once after `lookahead` epoch. if status.current_epoch != lookahead: msg = "Callback should be fired up at epoch #{} not #{}.".format(lookahead, status.current_epoch) raise NameError(msg) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, 2*lookahead) assert_equal(early_stopping.best_epoch, lookahead) assert_equal(early_stopping.best_cost, 0.) # 20 increasing costs but should stop after 9 increasing epochs. lookahead = 9 costs = range(20) increasing_cost = DummyCost(0, costs) def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 0.) # Test `min_nb_epochs` lookahead = 9 min_nb_epochs = 15 costs = range(20) increasing_cost = DummyCost(0, costs) early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, min_nb_epochs=min_nb_epochs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, min_nb_epochs) # Test that at the end the model is the best one. # `lookahead` decreasing costs followed by `lookahead+1` constant identical costs. lookahead = 9 costs = np.r_[-np.arange(lookahead), np.zeros(lookahead+1)] simple_cost = DummyCost(1, costs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) model = trainer._optimizer.loss.model # Add some parameters to the model. model.parameters.extend([sharedX(np.zeros(4)), sharedX(np.zeros((3, 5)))]) # Callback that will change model parameters after each epoch. def callback(task, status): for param in model.parameters: param.set_value(param.get_value() + 1) trainer.append_task(tasks.Callback(callback)) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() for param in model.parameters: assert_array_equal(param.get_value(), lookahead*np.ones_like(param.get_value()))
def test_new_fprop_matches_old_fprop(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 10 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 + (use_mask_as_input is True) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size, use_mask_as_input=use_mask_as_input) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) trainer.build_theano_graph() with Timer("Training"): trainer.train() mask_o_lt_d = batch_scheduler._shared_batch_mask fprop_output, fprop_pre_output = model.fprop( trainset.inputs, mask_o_lt_d, return_output_preactivation=True) model_output = model.get_output( T.concatenate([trainset.inputs * mask_o_lt_d, mask_o_lt_d], axis=1)) assert_array_equal(model_output.eval(), fprop_pre_output.eval()) print(np.sum(abs(model_output.eval() - fprop_pre_output.eval())))
def test_simple_perceptron(): # Loading dataset trainset, validset, testset = load_mnist() # Creating model nb_classes = 10 model = Perceptron(trainset.input_size, nb_classes) model.initialize() # By default, uniform initialization. # Building optimizer loss = NLL(model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.1)) # Use mini batches of 100 examples. batch_scheduler = MiniBatchScheduler(trainset, 100) # Build trainer and add some tasks. trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print NLL mean/stderror. nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset)) trainer.append_task( tasks.Print("Validset - NLL : {0:.1%} ± {1:.1%}", nll.mean, nll.stderror)) # Print mean/stderror of classification errors. classif_error = views.LossView( loss=ClassificationError(model, validset), batch_scheduler=FullBatchScheduler(validset)) trainer.append_task( tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror)) # Train for 10 epochs (stopping criteria should be added at the end). trainer.append_task(stopping_criteria.MaxEpochStopping(10)) trainer.train()
def test_simple_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" consider_mask_as_channel = True batch_size = 1024 ordering_seed = 1234 max_epoch = 3 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, consider_mask_as_channel=True) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset))) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.build_theano_graph() with Timer("Training"): trainer.train() with Timer("Checking the probs for all possible inputs sum to 1"): # rng = np.random.RandomState(ordering_seed) D = np.prod(image_shape) batch_scheduler = BatchSchedulerWithAutoregressiveMasks( validset, batch_size=len(validset), batch_id=0, ordering_id=0, concatenate_mask=model.nb_channels == 2, seed=42) nll = views.LossView( loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask( model, validset, batch_scheduler.mod), batch_scheduler=batch_scheduler) nlls_xod_given_xoltd = nll.losses.view(Status()) nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(validset)), axis=0) nll_validset = np.mean(nlls) print("Sum of NLL for validset:", nll_validset) inputs = cartesian([[0, 1]] * int(D), dtype=np.float32) dataset = ReconstructionDataset(inputs) batch_scheduler = BatchSchedulerWithAutoregressiveMasks( dataset, batch_size=len(dataset), batch_id=0, ordering_id=0, concatenate_mask=model.nb_channels == 2, seed=42) nll = views.LossView( loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask( model, dataset, batch_scheduler.mod), batch_scheduler=batch_scheduler) nlls_xod_given_xoltd = nll.losses.view(Status()) nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(dataset)), axis=0) p_x = np.exp(np.logaddexp.reduce(-nlls)) print("Sum of p(x) for all x:", p_x) assert_almost_equal(p_x, 1., decimal=5)
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'feed_previous_direction': False, 'predict_offset': False, 'normalize': False, 'sort_streamlines': False, 'keep_step_size': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False, 'skip_connections': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True) print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension if hyperparams['feed_previous_direction']: input_size += 3 model = model_factory(hyperparams, input_size=input_size, output_size=batch_scheduler.target_size, volume_manager=trainset_volume_manager) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = loss_factory(hyperparams, model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # if args.learn_to_stop: # l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error)) # avg_l2err = tasks.AveragePerEpoch(l2err_monitor) # trainer.append_task(avg_l2err) # # crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy)) # avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor) # trainer.append_task(avg_crossentropy) # # trainer.append_task(tasks.Print("Avg. training L2 err: : {}", avg_l2err)) # trainer.append_task(tasks.Print("Avg. training stopping: : {}", avg_crossentropy)) # trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100)) # trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100)) # Print NLL mean/stderror. # train_loss = L2DistanceForSequences(model, trainset) # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000, # noisy_streamlines_sigma=None, # nb_updates_per_epoch=None, # seed=1234) # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler) # trainer.append_task(tasks.Print("Trainset - Error : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager model.drop_prob = 0. # Do not use dropout/zoneout for evaluation valid_loss = loss_factory(hyperparams, model, validset) valid_batch_scheduler = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task(tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) if hyperparams['model'] == 'ffnn_regression': valid_batch_scheduler2 = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_l2 = loss_factory(hyperparams, model, validset, loss_type="expected_value") valid_l2_error = views.LossView(loss=valid_l2, batch_scheduler=valid_batch_scheduler2) trainer.append_task(tasks.Print("Validset - {}".format(valid_l2.__class__.__name__) + "\t: {0:.2f} | {1:.2f}", valid_l2_error.sum, valid_l2_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager model.drop_prob = hyperparams['drop_prob'] # Restore dropout lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) if args.view: import pylab as plt def _plot(*args, **kwargs): plt.figure(1) plt.clf() plt.show(False) plt.subplot(121) plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train") plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid") plt.legend() plt.subplot(122) plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||") plt.draw() trainer.append_task(tasks.Callback(_plot)) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()