def setUp(self): # Build model net = self._make_test_model() # Build trainer self.trainer = Trainer(net)\ .build_logger(TensorboardLogger(send_image_at_batch_indices=0, send_image_at_channel_indices='all', log_images_every=(20, 'iterations')), log_directory=os.path.join(self.ROOT_DIR, 'logs'))\ .build_criterion('CrossEntropyLoss')\ .build_metric('CategoricalError')\ .build_optimizer('Adam')\ .validate_every((1, 'epochs'))\ .save_every((2, 'epochs'), to_directory=os.path.join(self.ROOT_DIR, 'saves'))\ .save_at_best_validation_score()\ .set_max_num_epochs(2)\ .cuda().set_precision(self.PRECISION) # Load CIFAR10 data train_loader, test_loader = \ get_cifar10_loaders(root_directory=os.path.join(self.ROOT_DIR, 'data'), download=self.DOWNLOAD_CIFAR) # Bind loaders self.trainer.bind_loader('train', train_loader).bind_loader('validate', test_loader)
def train_net_with_inferno(config_dict, net, criterion, optimizer, trainloader, valloader): """ Trains the NeuralNet with inferno :param config_dict: dict with configs :param net: NeuralNet :param criterion: criterion for NN :param optimizer: optimizer for NN :param trainloader: dataloader with traind ata :param valloader: dataloader with validation data """ print("Start training with inferno!") from inferno.trainers.basic import Trainer from inferno.trainers.callbacks.essentials import SaveAtBestValidationScore model_folder = os.path.join(config_dict["project_folder"], "model/") if not os.path.exists(model_folder): os.mkdir(model_folder) trainer = Trainer(net) \ .save_every((1, 'epochs'), to_directory=model_folder) \ .build_criterion(criterion) \ .build_optimizer(optimizer) \ .build_metric(sorensen_dice_metric) \ .evaluate_metric_every('never') \ .validate_every((1, 'epochs'), for_num_iterations=50) \ .register_callback(SaveAtBestValidationScore(smoothness=.5)) trainer.set_max_num_epochs(config_dict['max_train_epochs']) trainer.bind_loader('train', trainloader).bind_loader('validate', valloader) trainer.cuda() trainer.fit()
def set_up_training(project_directory, config, data_config, load_pretrained_model): # Get model if load_pretrained_model: model = Trainer().load(from_directory=project_directory, filename='Weights/checkpoint.pytorch').model else: model_name = config.get('model_name') model = getattr(models, model_name)(**config.get('model_kwargs')) criterion = SorensenDiceLoss() loss_train = LossWrapper(criterion=criterion, transforms=Compose(ApplyAndRemoveMask(), InvertTarget())) loss_val = LossWrapper(criterion=criterion, transforms=Compose(RemoveSegmentationFromTarget(), ApplyAndRemoveMask(), InvertTarget())) # Build trainer and validation metric logger.info("Building trainer.") smoothness = 0.95 offsets = data_config['volume_config']['segmentation']['affinity_config'][ 'offsets'] metric = ArandErrorFromMulticut(average_slices=False, use_2d_ws=True, n_threads=8, weight_edges=True, offsets=offsets) trainer = Trainer(model)\ .save_every((1000, 'iterations'), to_directory=os.path.join(project_directory, 'Weights'))\ .build_criterion(loss_train)\ .build_validation_criterion(loss_val)\ .build_optimizer(**config.get('training_optimizer_kwargs'))\ .evaluate_metric_every('never')\ .validate_every((100, 'iterations'), for_num_iterations=1)\ .register_callback(SaveAtBestValidationScore(smoothness=smoothness, verbose=True))\ .build_metric(metric)\ .register_callback(AutoLR(factor=0.98, patience='100 iterations', monitor_while='validating', monitor_momentum=smoothness, consider_improvement_with_respect_to='previous'))\ .register_callback(GarbageCollection()) logger.info("Building logger.") # Build logger tensorboard = TensorboardLogger( log_scalars_every=(1, 'iteration'), log_images_every=(100, 'iterations'), log_histograms_every='never').observe_states( ['validation_input', 'validation_prediction, validation_target'], observe_while='validating') trainer.build_logger(tensorboard, log_directory=os.path.join(project_directory, 'Logs')) return trainer
def test_cifar(self): from inferno.trainers.basic import Trainer from inferno.io.box.cifar10 import get_cifar10_loaders # Build cifar10 loaders trainloader, testloader = get_cifar10_loaders( root_directory=join(self.ROOT_DIR, 'data'), download=self.DOWNLOAD_CIFAR) # Make model net = self._make_test_model() tic = time.time() # Make trainer trainer = Trainer(model=net)\ .build_optimizer('Adam')\ .build_criterion('CrossEntropyLoss')\ .build_metric('CategoricalError')\ .validate_every((1, 'epochs'))\ .save_every((1, 'epochs'), to_directory=join(self.ROOT_DIR, 'saves'))\ .save_at_best_validation_score()\ .set_max_num_epochs(2) # Bind trainer to datasets trainer.bind_loader('train', trainloader).bind_loader('validate', testloader) # Check device and fit if self.CUDA: if self.HALF_PRECISION: trainer.cuda().set_precision('half').fit() else: trainer.cuda().fit() else: trainer.fit() toc = time.time() print("[*] Elapsed time: {} seconds.".format(toc - tic))
def set_up_training(project_directory, config, data_config, criterion, balance, load_pretrained_model): # Get model if load_pretrained_model: model = Trainer().load(from_directory=project_directory, filename='Weights/checkpoint.pytorch').model else: model_name = config.get('model_name') model = getattr(models, model_name)(**config.get('model_kwargs')) # TODO logger.info("Using criterion: %s" % criterion) # TODO this should go somewhere more prominent affinity_offsets = data_config['volume_config']['segmentation'][ 'affinity_offsets'] # TODO implement affinities on gpu again ?! criterion = CRITERIA[criterion] loss = LossWrapper( criterion=criterion(), transforms=Compose(MaskTransitionToIgnoreLabel(affinity_offsets), RemoveSegmentationFromTarget(), InvertTarget()), weight_function=BalanceAffinities( ignore_label=0, offsets=affinity_offsets) if balance else None) # Build trainer and validation metric logger.info("Building trainer.") smoothness = 0.95 # use multicut pipeline for validation metric = ArandErrorFromSegmentationPipeline( local_affinity_multicut_from_wsdt2d(n_threads=10, time_limit=120)) trainer = Trainer(model)\ .save_every((1000, 'iterations'), to_directory=os.path.join(project_directory, 'Weights'))\ .build_criterion(loss)\ .build_optimizer(**config.get('training_optimizer_kwargs'))\ .evaluate_metric_every('never')\ .validate_every((100, 'iterations'), for_num_iterations=1)\ .register_callback(SaveAtBestValidationScore(smoothness=smoothness, verbose=True))\ .build_metric(metric)\ .register_callback(AutoLR(factor=0.98, patience='100 iterations', monitor_while='validating', monitor_momentum=smoothness, consider_improvement_with_respect_to='previous')) logger.info("Building logger.") # Build logger tensorboard = TensorboardLogger( log_scalars_every=(1, 'iteration'), log_images_every=(100, 'iterations')).observe_states( ['validation_input', 'validation_prediction, validation_target'], observe_while='validating') trainer.build_logger(tensorboard, log_directory=os.path.join(project_directory, 'Logs')) return trainer
def train(net, dataset, criterion, num_epochs, batch_size, learn_rate, dir_name): dir_name = os.path.join('net/', dir_name) trainer = Trainer(net[0]) if (os.path.exists(os.path.join(dir_name, 'model.pytorch'))): net_temp = trainer.load_model(dir_name).model net[0].load_state_dict(net_temp.state_dict()) print("Loaded checkpoint directly") else: if (not os.path.exists(dir_name)): os.makedirs(dir_name) data_loader = torch.utils.data.DataLoader(dataset, shuffle=True, batch_size=batch_size) net[0].train() trainer \ .build_criterion(LossPrinter(criterion)) \ .bind_loader('train', data_loader) \ .build_optimizer('Adam', lr=learn_rate) \ .set_max_num_epochs(num_epochs) if torch.cuda.is_available(): trainer.cuda() trainer.fit() trainer.save_model(dir_name) net[0].cpu() net[0].eval()
def set_up_training(project_directory, config, data_config, load_pretrained_model): # Get model if load_pretrained_model: model = Trainer().load(from_directory=project_directory, filename='Weights/checkpoint.pytorch').model else: model_name = config.get('model_name') model = getattr(models, model_name)(**config.get('model_kwargs')) affinity_offsets = data_config['volume_config']['segmentation'][ 'affinity_offsets'] loss = MultiOutputLossWrapper( criterion=SorensenDiceLoss(), transforms=Compose(MaskTransitionToIgnoreLabel(affinity_offsets), RemoveSegmentationFromTarget(), InvertTarget())) # Build trainer and validation metric logger.info("Building trainer.") smoothness = 0.95 # use multicut pipeline for validation # metric = ArandErrorFromSegmentationPipeline(local_affinity_multicut_from_wsdt2d(n_threads=10, # time_limit=120)) # use damws for validation stride = [2, 10, 10] metric = ArandErrorFromSegmentationPipeline( DamWatershed(affinity_offsets, stride, randomize_bounds=False)) trainer = Trainer(model)\ .save_every((1000, 'iterations'), to_directory=os.path.join(project_directory, 'Weights'))\ .build_criterion(loss)\ .build_optimizer(**config.get('training_optimizer_kwargs'))\ .evaluate_metric_every('never')\ .validate_every((100, 'iterations'), for_num_iterations=1)\ .register_callback(SaveAtBestValidationScore(smoothness=smoothness, verbose=True))\ .build_metric(metric)\ .register_callback(AutoLR(factor=0.98, patience='100 iterations', monitor_while='validating', monitor_momentum=smoothness, consider_improvement_with_respect_to='previous')) # FIXME some issues with conda tf for torch0.3 env # logger.info("Building logger.") # # Build logger # tensorboard = TensorboardLogger(log_scalars_every=(1, 'iteration'), # log_images_every=(100, 'iterations')).observe_states( # ['validation_input', 'validation_prediction, validation_target'], # observe_while='validating' # ) # trainer.build_logger(tensorboard, log_directory=os.path.join(project_directory, 'Logs')) return trainer
def load_model(args): trainer = Trainer() trainer.load(from_directory=args.load_directory, best=False) # trainer.load(from_directory=args.load_directory, best=False) trainer.set_max_num_epochs(args.epochs + trainer.epoch_count) model = trainer.model trainer.build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'), log_images_every='never'), log_directory=args.save_directory) trainer.save_to_directory(args.save_directory) return (model, trainer)
def set_up_training(project_directory, config, data_config): # Get model model_name = config.get('model_name') model = getattr(models, model_name)(**config.get('model_kwargs')) criterion = SorensenDiceLoss() loss_train = LossWrapper(criterion=criterion, transforms=InvertTarget()) loss_val = LossWrapper(criterion=criterion, transforms=Compose(RemoveSegmentationFromTarget(), InvertTarget())) # Build trainer and validation metric logger.info("Building trainer.") smoothness = 0.75 offsets = data_config['volume_config']['segmentation']['affinity_config'][ 'offsets'] strides = [1, 10, 10] metric = ArandErrorFromMWS(average_slices=False, offsets=offsets, strides=strides, randomize_strides=False) trainer = Trainer(model)\ .save_every((1000, 'iterations'), to_directory=os.path.join(project_directory, 'Weights'))\ .build_criterion(loss_train)\ .build_validation_criterion(loss_val)\ .build_optimizer(**config.get('training_optimizer_kwargs'))\ .evaluate_metric_every('never')\ .validate_every((100, 'iterations'), for_num_iterations=1)\ .register_callback(SaveAtBestValidationScore(smoothness=smoothness, verbose=True))\ .build_metric(metric)\ .register_callback(AutoLR(factor=0.99, patience='100 iterations', monitor_while='validating', monitor_momentum=smoothness, consider_improvement_with_respect_to='previous'))\ logger.info("Building logger.") # Build logger tensorboard = TensorboardLogger( log_scalars_every=(1, 'iteration'), log_images_every=(100, 'iterations'), log_histograms_every='never').observe_states( ['validation_input', 'validation_prediction, validation_target'], observe_while='validating') trainer.build_logger(tensorboard, log_directory=os.path.join(project_directory, 'Logs')) return trainer
def test_multi_gpu(self): import torch if not torch.cuda.is_available(): return from inferno.trainers.basic import Trainer from inferno.io.box.cifar10 import get_cifar10_loaders import os # Make model net = self._make_test_model() # Make trainer trainer = Trainer(model=net) \ .build_optimizer('Adam') \ .build_criterion('CrossEntropyLoss') \ .build_metric('CategoricalError') \ .validate_every((1, 'epochs')) \ .save_every((1, 'epochs'), to_directory=os.path.join(self.ROOT_DIR, 'saves')) \ .save_at_best_validation_score() \ .set_max_num_epochs(2)\ .cuda(devices=[0, 1, 2, 3]) train_loader, validate_loader = get_cifar10_loaders( root_directory=self.ROOT_DIR, download=True) trainer.bind_loader('train', train_loader) trainer.bind_loader('validate', validate_loader) trainer.fit()
def set_up_training(project_directory, config, data_config, load_pretrained_model, max_iters): # Get model if load_pretrained_model: model = Trainer().load(from_directory=project_directory, filename='Weights/checkpoint.pytorch').model else: model_name = config.get('model_name') model = getattr(models, model_name)(**config.get('model_kwargs')) loss = LossWrapper(criterion=SorensenDiceLoss(), transforms=Compose(MaskIgnoreLabel(), RemoveSegmentationFromTarget())) # TODO loss transforms: # - Invert Target ??? # Build trainer and validation metric logger.info("Building trainer.") # smoothness = 0.95 # TODO set up validation ?! trainer = Trainer(model)\ .save_every((1000, 'iterations'), to_directory=os.path.join(project_directory, 'Weights'))\ .build_criterion(loss)\ .build_optimizer(**config.get('training_optimizer_kwargs'))\ .evaluate_metric_every('never')\ .register_callback(ManualLR(decay_specs=[((k * 100, 'iterations'), 0.99) for k in range(1, max_iters // 100)])) # .validate_every((100, 'iterations'), for_num_iterations=1)\ # .register_callback(SaveAtBestValidationScore(smoothness=smoothness, verbose=True))\ # .build_metric(metric)\ # .register_callback(AutoLR(factor=0.98, # patience='100 iterations', # monitor_while='validating', # monitor_momentum=smoothness, # consider_improvement_with_respect_to='previous')) logger.info("Building logger.") # Build logger tensorboard = TensorboardLogger( log_scalars_every=(1, 'iteration'), log_images_every=(100, 'iterations')) # .observe_states( # ['validation_input', 'validation_prediction, validation_target'], # observe_while='validating' # ) trainer.build_logger(tensorboard, log_directory=os.path.join(project_directory, 'Logs')) return trainer
def set_up_training(project_directory, config, data_config, load_pretrained_model): # Get model if load_pretrained_model: model = Trainer().load(from_directory=project_directory, filename='Weights/checkpoint.pytorch').model else: model_name = config.get('model_name') model = getattr(models, model_name)(**config.get('model_kwargs')) loss = dice_loss() loss_val = dice_loss(is_val=True) metric = mws_metric() # metric = loss_val # Build trainer and validation metric logger.info("Building trainer.") smoothness = 0.9 trainer = Trainer(model)\ .save_every((1000, 'iterations'), to_directory=os.path.join(project_directory, 'Weights'))\ .build_criterion(loss)\ .build_validation_criterion(loss_val)\ .build_optimizer(**config.get('training_optimizer_kwargs'))\ .evaluate_metric_every('never')\ .validate_every((100, 'iterations'), for_num_iterations=5)\ .register_callback(SaveAtBestValidationScore(smoothness=smoothness, verbose=True))\ .build_metric(metric)\ .register_callback(AutoLR(factor=0.98, patience='100 iterations', monitor_while='validating', monitor_momentum=smoothness, consider_improvement_with_respect_to='previous'))\ .register_callback(GarbageCollection())\ logger.info("Building logger.") # Build logger tensorboard = TensorboardLogger( log_scalars_every=(1, 'iteration'), log_images_every=(100, 'iterations'), log_histograms_every='never').observe_states( ['validation_input', 'validation_prediction, validation_target'], observe_while='validating') trainer.build_logger(tensorboard, log_directory=os.path.join(project_directory, 'Logs')) return trainer
def __init__(self, model_path, halo, gpu=0, use_best=True, prep_model=None, **augmentation_kwargs): # load the model and prep it if specified assert os.path.exists(model_path), model_path trainer = Trainer().load(from_directory=model_path, best=use_best) self.model = trainer.model.cuda(gpu) self.model.eval() if prep_model is not None: self.model = prep_model(self.model) self.gpu = gpu self.halo = halo self.lock = threading.Lock() # build the test-time-augmenter if we have augmentation kwargs if augmentation_kwargs: assert TestTimeAugmenter is not None, "Need neurofire for test-time-augmentation" self.offsets = augmentation_kwargs.pop('offsets', None) print(augmentation_kwargs) self.augmenter = self.build_augmenter(**augmentation_kwargs) else: self.augmenter = None
def predict(project_folder, sample, only_nn_channels=True): gpu = 0 checkpoint = os.path.join(project_folder, 'Weights') data_config_file = './template_config/prediction_configs/sample%s.yml' % sample print( "[*] Loading CREMI with Configuration at: {}".format(data_config_file)) # Load CREMI sample cremi = RawVolumeWithDefectAugmentation.from_config(data_config_file) # Load model trainer = Trainer().load(from_directory=checkpoint, best=True).cuda(gpu) model = trainer.model inference_config_file = './template_config/prediction_configs/inference_config.yml' inference_engine = SimpleInferenceEngine.from_config( inference_config_file, model) output = inference_engine.infer(cremi) print("[*] Output has shape {}".format(str(output.shape))) save_folder = os.path.join(project_folder, 'Predictions') if not os.path.exists(save_folder): os.mkdir(save_folder) save_path = os.path.join(save_folder, 'prediction_sample%s.h5' % sample) if only_nn_channels: output = output[:3] save_path = save_path[:-3] + '_nnaffinities.h5' toh5(output.astype('float32'), save_path, compression='lzf')
def test_multi_io(self): from torch.utils.data.dataset import Dataset from torch.utils.data.dataloader import DataLoader from inferno.trainers.basic import Trainer import torch class DummyDataset(Dataset): def __len__(self): return 42 def __getitem__(self, item): # 2 inputs and 3 targets (say) return torch.rand(3, 32, 32), \ torch.rand(3, 32, 32), \ torch.rand(1).uniform_(), \ torch.rand(1).uniform_(), \ torch.rand(1).uniform_() class DummyNetwork(torch.nn.Module): def __init__(self): super(DummyNetwork, self).__init__() self.conv = torch.nn.Conv2d(3, 1, 3, padding=1) def forward(self, *inputs): assert len(inputs) == 2 out = self.conv(inputs[0]) return out.view(inputs[0].size(0), -1).mean(1), \ out.view(inputs[0].size(0), -1).mean(1), \ out.view(inputs[0].size(0), -1).mean(1) class DummyCriterion(torch.nn.Module): def forward(self, predictions, targets): assert len(predictions) == len(targets) == 3 return predictions[0].mean() loader = DataLoader(DummyDataset()) net = DummyNetwork() trainer = Trainer(net)\ .build_criterion(DummyCriterion)\ .build_optimizer('Adam')\ .set_max_num_iterations(50)\ .bind_loader('train', loader, num_inputs=2, num_targets=3) trainer.fit()
def test_auto_registry(self): callback_engine = CallbackEngine().bind_trainer(Trainer()) callback_engine.register_callback(DummyCallback()) self.assertIsInstance(next(iter(callback_engine ._callback_registry .get('end_of_training_iteration'))), DummyCallback) with self.assertRaises(AssertionError): callback_engine.register_callback(WrongDummyCallback())
def __init__(self, model_path, halo, gpu=0, use_best=True, prep_model=None): assert os.path.exists(model_path), model_path trainer = Trainer().load(from_directory=model_path, best=use_best) self.model = trainer.model.cuda(gpu) self.model.eval() if prep_model is not None: self.model = prep_model(self.model) self.gpu = gpu self.halo = halo self.lock = threading.Lock()
def __init__(self, features: bool=False, out_size: int=28): ''' :param features: If True -> returns map of features for each window If False -> returns map of responces for each window :param out_size: Size of output image. Influence on stride. ''' trainer = Trainer(ICL_DenseNet_3fc) if torch.cuda.is_available(): trainer = trainer.load(from_directory='../centrioles/models/ICL_DenseNet_3fc/true_save/weights/', best=True) else: trainer = trainer.load(from_directory='../centrioles/models/ICL_DenseNet_3fc/true_save/weights/', best=True, map_location='cpu') self.model = trainer.model self.model.features_needed = True self.features = features self.out_size = out_size self.to_torch = inftransforms.generic.AsTorchBatch(dimensionality=2)
def test_serialization(self): if not hasattr(self, 'trainer'): self.setUp() # Serialize self.trainer.save() # Unserialize trainer = Trainer().load(os.path.join(self.ROOT_DIR, 'saves')) train_loader, test_loader = \ get_cifar10_loaders(root_directory=os.path.join(self.ROOT_DIR, 'data'), download=self.DOWNLOAD_CIFAR) trainer.bind_loader('train', train_loader).bind_loader('validate', test_loader) trainer.fit() trainer.print("Inspect logs at: {}".format(self.trainer.log_directory))
def test_multi_gpu_setup(self): from torch.nn import CrossEntropyLoss from inferno.trainers.basic import Trainer # Test base_device = 'cpu' # Build model net = self._make_test_model() # Make dummy criterion criterion = CrossEntropyLoss(weight=torch.rand(10)) # Make trainer trainer = Trainer(net).build_criterion(criterion).cuda( [0, 1], base_device='cpu') self.assertIsInstance(trainer.criterion.weight, torch.FloatTensor) # Test base_device = 'cpu' # Build model net = self._make_test_model() criterion = CrossEntropyLoss(weight=torch.rand(10)) # Make trainer trainer = Trainer(net).build_criterion(criterion).cuda( [0, 1], base_device='cuda') self.assertIsInstance(trainer.criterion.weight, torch.cuda.FloatTensor)
def __init__(self, model_path, crop=None, gpu=0, use_best=True): from inferno.trainers.basic import Trainer assert os.path.exists(model_path), model_path trainer = Trainer().load(from_directory=model_path, best=use_best) self.model = trainer.model.cuda(gpu) self.gpu = gpu # validate cropping if crop is not None: assert isinstance(crop, (list, tuple)) assert len(crop) == 3 self.crop = crop self.lock = threading.Lock()
def train_model(args): model = MNISTCNNModel() train_loader, validate_loader = mnist_data_loaders(args) # Build trainer trainer = Trainer(model) \ .build_criterion('CrossEntropyLoss') \ .build_metric('CategoricalError') \ .build_optimizer('Adam') \ .validate_every((2, 'epochs')) \ .save_every((5, 'epochs')) \ .save_to_directory(args.save_directory) \ .set_max_num_epochs(args.epochs) \ .build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'), log_images_every='never'), log_directory=args.save_directory) # Bind loaders trainer \ .bind_loader('train', train_loader) \ .bind_loader('validate', validate_loader) if args.cuda: trainer.cuda() # Go! trainer.fit()
def train_model(model, train_dataset, valid_dataset, args): kw = {'num_workers': 2, 'pin_memory': True} if args.cuda else {} train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True, **kw) validate_loader = DataLoader(dataset=valid_dataset, batch_size=args.batch_size, **kw) trainer = Trainer(model) \ .build_criterion(CrossEntropyLoss3D) \ .build_metric(CategoricalError3D) \ .build_optimizer('Adam', weight_decay=1e-6) \ .save_every((1, 'epochs')) \ .validate_every((100, 'iteration')) \ .save_to_directory(args.save_directory) \ .set_max_num_epochs(args.epochs) \ .build_logger( TensorboardLogger(log_scalars_every=(1, 'iteration'), log_images_every='never'), log_directory='tb_log/' ) \ .bind_loader('train', train_loader) \ .bind_loader('validate', validate_loader) if args.cuda: trainer.cuda() # Go! trainer.fit()
def test_serialization(self): from inferno.trainers.basic import Trainer import os # Make model net = self._make_test_model() # Make trainer trainer = Trainer(model=net) \ .build_optimizer('Adam') \ .build_criterion('CrossEntropyLoss') \ .build_metric('CategoricalError') \ .validate_every((1, 'epochs')) \ .save_every((1, 'epochs'), to_directory=os.path.join(self.ROOT_DIR, 'saves')) \ .save_at_best_validation_score() \ .set_max_num_epochs(2) # Try to serialize trainer.save() # Try to unserialize trainer = Trainer(net).save_to_directory( os.path.join(self.ROOT_DIR, 'saves')).load() # Make sure everything survived (especially the logger) self.assertEqual(trainer._logger.__class__.__name__, 'BasicTensorboardLogger')
def train_model(args): model = model_fn() train_loader, validate_loader = mnist_data_loaders(args) # Build trainer trainer = Trainer(model) \ .build_criterion('RegularizedCrossEntropyLoss') \ .build_metric('CategoricalError') \ .build_optimizer('Adam') \ .validate_every((1, 'epochs')) \ .save_every((1, 'epochs')) \ .save_to_directory(args.save_directory) \ .set_max_num_epochs(args.epochs) \ .build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'), log_images_every='never'), log_directory=args.save_directory) # Record regularization losses trainer.logger.observe_training_and_validation_states([ 'main_loss', 'total_regularization_loss', 'activity_regularization', 'l1_weight_regularization' ]) # Bind loaders trainer \ .bind_loader('train', train_loader) \ .bind_loader('validate', validate_loader) if args.cuda: trainer.cuda() # Go! trainer.fit()
def test_save(self): from inferno.trainers.basic import Trainer trainer = Trainer().save_to_directory( to_directory=self.ROOT_DIR, checkpoint_filename='dummy.pytorch') trainer.save() # Instantiate new trainer and load trainer = Trainer().load(from_directory=self.ROOT_DIR, filename='dummy.pytorch')
def test_serialization(self): # Build engine and trainer callback_engine = CallbackEngine().bind_trainer(Trainer()) callback_engine.register_callback(DummyCallback()) # Serialize torch.save(callback_engine, join(self.ROOT_DIR, 'callback_engine.pkl')) # Unserialize callback_engine = torch.load(join(self.ROOT_DIR, 'callback_engine.pkl')) # Make sure the trainer is detached self.assertIsNone(callback_engine._trainer) self.assertIsInstance(next(iter(callback_engine ._callback_registry .get('end_of_training_iteration'))), DummyCallback)
def get_trainer(self, input_channels): # Build model net = self._make_test_model(input_channels) # Build trainer trainer = Trainer(net)\ .build_logger(TensorboardLogger(send_image_at_batch_indices=0, send_image_at_channel_indices='all', log_images_every=(20, 'iterations')), log_directory=self.LOG_DIRECTORY)\ .build_criterion('CrossEntropyLoss')\ .build_metric('CategoricalError')\ .build_optimizer('Adam')\ .validate_every((1, 'epochs'))\ .save_every((2, 'epochs'), to_directory=self.SAVE_DIRECTORY)\ .save_at_best_validation_score()\ .set_max_num_epochs(2)\ .set_precision(self.PRECISION) # Bind loaders train_loader, test_loader = self.get_random_dataloaders( input_channels=input_channels) trainer.bind_loader('train', train_loader).bind_loader('validate', test_loader) return trainer
def test_training_cpu(self): """Test Trainer.""" # Build model model = self.build_graph_model() # Build callbacks # save_info_recorder = RecordSaveInfo() # Build trainer trainer = Trainer(model)\ .save_every((2, 'epochs'), to_directory=join(self.WORKING_DIRECTORY, 'Weights'))\ .validate_every((100, 'iterations'), for_num_iterations=10)\ .set_max_num_epochs(10)\ .save_at_best_validation_score()\ .build_optimizer('RMSprop')\ .build_criterion('CrossEntropyLoss')\ .build_metric('CategoricalError')\ .register_callback(NaNDetector) # Bind datasets trainer\ .bind_loader('train', self.train_loader)\ .bind_loader('validate', self.validate_loader) # Go trainer.fit()
def run_inference(project_dir, out_file, inference_config): print("Loading model...") model = Trainer().load(from_directory=os.path.join(project_dir, "Weights"), best=True).model print("Loading dataset...") dataset = load_volume(inference_config) engine = SimpleInferenceEngine.from_config(inference_config, model) print("Run prediction...") out = engine.infer(dataset) if out_file != '': print("Save prediction to %s ..." % out_file) with h5py.File(out_file, 'w') as f: f.create_dataset('data', data=out, compression='gzip') return out
def task(env): dataset_name = 'MNIST' dataset_path = env.dataset(dataset_name) batch_size = 128 transform = transforms.Compose( [ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) mnist_set = torchvision.datasets.MNIST(str(dataset_path), train=True, download=True, transform=transform) train_loader = torch.utils.data.DataLoader(mnist_set, batch_size=batch_size, shuffle=True, num_workers=2) mnist_test_set = torchvision.datasets.MNIST(str(dataset_path), train=False, download=True, transform=transform) test_loader = torch.utils.data.DataLoader(mnist_test_set, batch_size=512, shuffle=True, num_workers=2) model = Model(n_classes=10, in_channels=1, layers=4) exp = TrainLog(env, dataset_name,model,log_time=True) logging.info(' saving to %s', exp.save_directory) logging.info(' logging to %s', exp.log_directory) # Load loaders # train_loader, validate_loader = get_cifar10_loaders(DATASET_DIRECTORY, # download=DOWNLOAD_CIFAR) # Build trainer iterations = 5000 trainer = Trainer(model) \ .build_criterion('NLLLoss') \ .build_metric('CategoricalError') \ .build_optimizer('Adam', lr=0.001) \ .save_every((1, 'epochs')) \ .save_to_directory(str(exp.save_directory)) \ .validate_every((1, 'epochs'))\ .set_max_num_iterations(iterations) \ .build_logger(TensorboardLogger(log_scalars_every=(1, 'iteration'), log_images_every='never'), log_directory=str(exp.log_directory)) # Bind loaders trainer.bind_loader('train', train_loader) trainer.bind_loader('validate', test_loader) trainer.cuda() # Go! logging.info('start training') trainer.fit() trainer.set_max_num_iterations(trainer.iteration_count + iterations) trainer.build_optimizer('Adam',lr=0.0001) logging.info('slower lr') trainer.fit()
def task(env): dataset_name = 'MNIST' dataset_path = env.dataset(dataset_name) dataset = MNIST(str(dataset_path), train=True, download=True, transform=mnist_to_tensor32) testset = MNIST(str(dataset_path), train=False, download=True, transform=mnist_to_tensor32) test_loader = DataLoader(testset, batch_size=512, shuffle=True, num_workers=2) train_loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2) triplets = dis.TripelDataset(dataset, dataset.train_labels, K=1) train_loader_triple = DataLoader(triplets, batch_size=16,collate_fn=dis.stack_triplets) model = Model(in_channels=1, n_classes=10, shortcut=True) criterion = dis.TripletLoss() exp = TrainLog(env, dataset_name,model.info,log_time=True) logging.info(' saving to %s', exp.save_directory) logging.info(' logging to %s', exp.log_directory) # Load loaders # train_loader, validate_loader = get_cifar10_loaders(DATASET_DIRECTORY, # download=DOWNLOAD_CIFAR) iterations = 0 trainer = Trainer(model) set_log_and_save(trainer, exp) trainer.build_criterion(criterion) \ .build_optimizer('SGD', lr=0.001, weight_decay=0.0005) \ .set_max_num_iterations(iterations)\ .bind_loader('train', train_loader_triple) trainer.cuda() logging.info('start training') trainer.fit() print(model.forward(Variable(next(iter(train_loader_triple))[0]).cuda())) model.use_shortcut = False model.classify = True print(model.forward(Variable(next(iter(train_loader_triple))[0]).cuda())) trainer = Trainer(model) set_log_and_save(trainer, exp) trainer.build_criterion('CrossEntropyLoss') \ .build_metric('CategoricalError') \ .build_optimizer('Adam', lr=0.001) \ .set_max_num_iterations(5000) \ .bind_loader('train', train_loader) \ .bind_loader('test', test_loader) trainer.cuda() logging.info('start training') trainer.fit()
model = Model(n_classes=4, in_channels=1, layers=2) # model = dis.BaseNet(in_channels=1, n_classes=4) model.classify = True exp = TrainLog(env, 'img_folder', model.info ,log_time=True) logging.info(' saving to %s', exp.save_directory) logging.info(' logging to %s', exp.log_directory) # Build trainer max_num_iterations = 10000 trainer = Trainer(model) \ .build_criterion('NLLLoss') \ .build_metric('CategoricalError') \ .build_optimizer('Adam') \ .save_every((1, 'epochs')) \ .save_to_directory(str(exp.save_directory))\ .validate_every((2, 'epochs'))\ .set_max_num_epochs(100) \ .build_logger(TensorboardLogger(log_scalars_every=(1, 'iterations'), log_images_every='never'), log_directory=str(exp.log_directory)) # .save_every((2000, 'iterations'), to_directory=str(exp.save_directory), checkpoint_filename='latest') \ # Bind loaders trainer.bind_loader('train', train_loader) trainer.bind_loader('validate', test_loader) trainer.cuda() # Go! trainer.fit()