def main(): # Parse Arguments parser = argparse.ArgumentParser(description='Train WaveRNN Vocoder') parser.add_argument('--lr', '-l', type=float, help='[float] override hparams.py learning rate') parser.add_argument('--batch_size', '-b', type=int, help='[int] override hparams.py batch size') parser.add_argument('--force_train', '-f', action='store_true', help='Forces the model to train past total steps') parser.add_argument('--gta', '-g', action='store_true', help='train wavernn on GTA features') parser.add_argument( '--force_cpu', '-c', action='store_true', help='Forces CPU-only training, even when in CUDA capable environment') parser.add_argument('--hp_file', metavar='FILE', default='hparams.py', help='The file to use for the hyperparameters') args = parser.parse_args() hp.configure(args.hp_file) # load hparams from file if args.lr is None: args.lr = hp.voc_lr if args.batch_size is None: args.batch_size = hp.voc_batch_size paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id) batch_size = args.batch_size force_train = args.force_train train_gta = args.gta lr = args.lr if not args.force_cpu and torch.cuda.is_available(): device = torch.device('cuda') if batch_size % torch.cuda.device_count() != 0: raise ValueError( '`batch_size` must be evenly divisible by n_gpus!') else: device = torch.device('cpu') print('Using device:', device) print('\nInitialising Model...\n') # Instantiate WaveRNN Model voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims, fc_dims=hp.voc_fc_dims, bits=hp.bits, pad=hp.voc_pad, upsample_factors=hp.voc_upsample_factors, feat_dims=hp.num_mels, compute_dims=hp.voc_compute_dims, res_out_dims=hp.voc_res_out_dims, res_blocks=hp.voc_res_blocks, hop_length=hp.hop_length, sample_rate=hp.sample_rate, mode=hp.voc_mode).to(device) # Check to make sure the hop length is correctly factorised assert np.cumprod(hp.voc_upsample_factors)[-1] == hp.hop_length optimizer = optim.Adam(voc_model.parameters()) restore_checkpoint('voc', paths, voc_model, optimizer, create_if_missing=True) train_set, test_set = get_vocoder_datasets(paths.data, batch_size, train_gta) total_steps = 10_000_000 if force_train else hp.voc_total_steps simple_table([ ('Remaining', str( (total_steps - voc_model.get_step()) // 1000) + 'k Steps'), ('Batch Size', batch_size), ('LR', lr), ('Sequence Len', hp.voc_seq_len), ('GTA Train', train_gta) ]) loss_func = F.cross_entropy if voc_model.mode == 'RAW' else discretized_mix_logistic_loss voc_train_loop(paths, voc_model, loss_func, optimizer, train_set, test_set, lr, total_steps) print('Training Complete.') print( 'To continue training increase voc_total_steps in hparams.py or use --force_train' )
def main(): # Parse Arguments parser = argparse.ArgumentParser(description='Train Tacotron TTS') parser.add_argument('--force_train', '-f', action='store_true', help='Forces the model to train past total steps') parser.add_argument('--force_gta', '-g', action='store_true', help='Force the model to create GTA features') parser.add_argument( '--force_cpu', '-c', action='store_true', help='Forces CPU-only training, even when in CUDA capable environment') parser.add_argument('--hp_file', metavar='FILE', default='hparams.py', help='The file to use for the hyperparameters') args = parser.parse_args() hp.configure(args.hp_file) # Load hparams from file paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id) force_train = args.force_train force_gta = args.force_gta if not args.force_cpu and torch.cuda.is_available(): device = torch.device('cuda') for session in hp.tts_schedule: _, _, _, batch_size = session if batch_size % torch.cuda.device_count() != 0: raise ValueError( '`batch_size` must be evenly divisible by n_gpus!') else: device = torch.device('cpu') print('Using device:', device) # Instantiate Tacotron Model print('\nInitialising Tacotron Model...\n') model = Tacotron(embed_dims=hp.tts_embed_dims, num_chars=len(symbols), encoder_dims=hp.tts_encoder_dims, decoder_dims=hp.tts_decoder_dims, n_mels=hp.num_mels, fft_bins=hp.num_mels, postnet_dims=hp.tts_postnet_dims, encoder_K=hp.tts_encoder_K, lstm_dims=hp.tts_lstm_dims, postnet_K=hp.tts_postnet_K, num_highways=hp.tts_num_highways, dropout=hp.tts_dropout, stop_threshold=hp.tts_stop_threshold).to(device) optimizer = optim.Adam(model.parameters()) restore_checkpoint('tts', paths, model, optimizer, create_if_missing=True) if not force_gta: for i, session in enumerate(hp.tts_schedule): current_step = model.get_step() r, lr, max_step, batch_size = session training_steps = max_step - current_step # Do we need to change to the next session? if current_step >= max_step: # Are there no further sessions than the current one? if i == len(hp.tts_schedule) - 1: # There are no more sessions. Check if we force training. if force_train: # Don't finish the loop - train forever training_steps = 999_999_999 else: # We have completed training. Breaking is same as continue break else: # There is a following session, go to it continue model.r = r simple_table([('Steps with r=%s' % (repr1(r)), str(training_steps // 1000) + 'k Steps'), ('Batch Size', batch_size), ('Learning Rate', lr), ('Outputs/Step (r)', model.r)]) train_set, attn_example = get_tts_datasets(paths.data, batch_size, r) tts_train_loop(paths, model, optimizer, train_set, lr, training_steps, attn_example) print('Training Complete.') print( 'To continue training increase tts_total_steps in hparams.py or use --force_train\n' ) print('Creating Ground Truth Aligned Dataset...\n') train_set, attn_example = get_tts_datasets(paths.data, 8, model.r) create_gta_features(model, train_set, paths.gta) print( '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n' )
postnet_k=hp.forward_postnet_K, postnet_dims=hp.forward_postnet_dims, prenet_k=hp.forward_prenet_K, prenet_dims=hp.forward_prenet_dims, highways=hp.forward_num_highways, dropout=hp.forward_dropout, n_mels=hp.num_mels).to(device) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print(f'num params {params}') optimizer = optim.Adam(model.parameters()) restore_checkpoint('forward', paths, model, optimizer, create_if_missing=True) if force_gta: print('Creating Ground Truth Aligned Dataset...\n') train_set, val_set = get_tts_datasets(paths.data, 8, r=1, model_type='forward') create_gta_features(model, train_set, val_set, paths.gta) print( '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n' ) else: trainer = ForwardTrainer(paths)
fft_bins=hp.num_mels, postnet_dims=hp.tts_postnet_dims, encoder_K=hp.tts_encoder_K, lstm_dims=hp.tts_lstm_dims, postnet_K=hp.tts_postnet_K, num_highways=hp.tts_num_highways, dropout=hp.tts_dropout, stop_threshold=hp.tts_stop_threshold).to(device) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print(f'Num Params: {params}') optimizer = optim.Adam(model.parameters()) restore_checkpoint('tts', paths, model, optimizer, create_if_missing=True, device=device) if args.force_gta: print('Creating Ground Truth Aligned Dataset...\n') train_set, val_set = get_tts_datasets(paths.data, 8, model.r) create_gta_features(model, train_set, val_set, paths.gta) print( '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n' ) elif args.force_align: print('Creating Attention Alignments and Pitch Values...') train_set, val_set = get_tts_datasets(paths.data, 1, model.r) create_align_features(model, train_set, val_set, paths.alg) # paths.phon_pitch)
def main(argv): args = parser.parse_args(argv) # Load configuration conf = Configuration.from_json(args.config) conf.args = args if args.conf: new_conf_entries = {} for arg in args.conf: key, value = arg.split('=') new_conf_entries[key] = value conf.update(new_conf_entries) # Setup log directory if args.run_dir: conf.run_dir = args.run_dir elif args.resume: if os.path.exists(args.resume): conf.run_dir = os.path.dirname(args.resume) if not conf.has_attr('run_dir'): run_name = conf.get_attr('run_name', default='unnamed_run') conf.run_dir = get_run_dir(args.log_dir, run_name) if not args.dry: if not os.path.isdir(conf.run_dir): os.mkdir(conf.run_dir) setup_logging(conf.run_dir, 'train', args.verbose, args.dry) logging.info('Commandline arguments: {}'.format(' '.join(argv))) if not args.dry: logging.info('This run is saved to: {}'.format(conf.run_dir)) config_path = get_config_path(conf.run_dir) conf.serialize(config_path) if args.cuda != '': try: args.cuda = utils.set_cuda_env(args.cuda) except Exception: logging.critical('No free GPU on this machine. Aborting run.') return logging.info('Running on GPU {}'.format(args.cuda)) if args.verbose: logging.debug(str(conf)) utils.set_random_seeds(conf.seed) # Setup model logging.info('Setting up training runner {}'.format(conf.runner_type)) runner = build_runner(conf, conf.runner_type, args.cuda, mode='train') if args.print_model: print(str(runner)) if args.print_parameters: print_model_parameters(runner) # Handle resuming from checkpoint restore_state = None if args.resume: if os.path.exists(args.resume): restore_state = restore_checkpoint(args.resume, runner) logging.info('Restored checkpoint from {}'.format(args.resume)) else: logging.critical(('Checkpoint {} to restore ' 'from not found').format(args.resume)) return use_tensorboard = conf.get_attr('use_tensorboard', default=DEFAULT_USE_TENSORBOARD) if use_tensorboard and not args.dry: from tensorboardX import SummaryWriter summary_writer = SummaryWriter(conf.run_dir) logging.debug('Using tensorboardX summary writer') else: summary_writer = None # Load datasets num_workers = conf.get_attr('num_data_workers', default=DEFAULT_NUM_WORKERS) num_train_samples = conf.get_attr('num_train_subset_samples', default=None) num_val_samples = conf.get_attr('num_validation_subset_samples', default=None) train_dataset_name = conf.get_attr('train_dataset', alternative='dataset') logging.info('Loading training dataset {}'.format(train_dataset_name)) train_dataset = load_dataset(conf, args.data_dir, train_dataset_name, 'train') train_sampler = maybe_get_subset_sampler(num_train_samples, train_dataset) train_loader = DataLoader(dataset=train_dataset, num_workers=num_workers, batch_size=conf.batch_size, sampler=train_sampler, shuffle=train_sampler is None, worker_init_fn=utils.set_worker_seeds) val_dataset_name = conf.get_attr('validation_dataset', alternative='dataset') logging.info('Loading validation dataset {}'.format(val_dataset_name)) val_dataset = load_dataset(conf, args.data_dir, val_dataset_name, 'val') val_sampler = maybe_get_subset_sampler(num_val_samples, val_dataset) val_loader = DataLoader(dataset=val_dataset, num_workers=num_workers, batch_size=conf.get_attr('validation_batch_size', default=conf.batch_size), sampler=val_sampler, shuffle=False, worker_init_fn=utils.set_worker_seeds) # Setup validation checkpoints chkpt_metrics = conf.get_attr('validation_checkpoint_metrics', default=[]) chkpt_metric_dirs = { metric: os.path.join(conf.run_dir, 'best_' + metric) for metric in chkpt_metrics } for metric_dir in chkpt_metric_dirs.values(): if not args.dry and not os.path.isdir(metric_dir): os.mkdir(metric_dir) # Setup early stopping if conf.has_attr('early_stopping'): from training.early_stopping import EarlyStopper early_stoppers = [ EarlyStopper(conf.early_stopping['metric_name'], conf.early_stopping['patience'], conf.early_stopping.get('min_value', None), conf.early_stopping.get('max_difference', None)) ] elif conf.has_attr('early_stoppers'): from training.early_stopping import EarlyStopper early_stoppers = [] for early_stopping_conf in conf.early_stoppers: min_value = early_stopping_conf.get('min_value', None) max_diff = early_stopping_conf.get('max_difference', None) early_stoppers.append( EarlyStopper(early_stopping_conf['metric_name'], early_stopping_conf['patience'], min_value, max_diff)) else: early_stoppers = [] logging.info('Starting training run of {} epochs'.format(conf.num_epochs)) # Train try: train_net(conf, runner, train_loader, val_loader, args.cuda, chkpt_metric_dirs, restore_state, summary_writer, early_stoppers) except KeyboardInterrupt: if summary_writer is not None: summary_writer.close()
def main(argv): args = parser.parse_args(argv) if args.cuda != '': try: args.cuda = utils.set_cuda_env(args.cuda) except Exception: print('No free GPU on this machine. Aborting run.') return print('Running on GPU {}'.format(args.cuda)) # Load configuration conf = Configuration.from_json(args.config) conf.args = args if args.conf: new_conf_entries = {} for arg in args.conf: key, value = arg.split('=') new_conf_entries[key] = value conf.update(new_conf_entries) if args.verbose: print(conf) utils.set_random_seeds(conf.seed) # Setup model runner = build_runner(conf, conf.runner_type, args.cuda, mode='test') # Handle resuming from checkpoint if args.checkpoint != 'NONE': if os.path.exists(args.checkpoint): _ = restore_checkpoint(args.checkpoint, runner, cuda=args.cuda) print('Restored checkpoint from {}'.format(args.checkpoint)) else: print('Checkpoint {} to restore from not found'.format(args.checkpoint)) return # Evaluate on full image, not crops conf.full_image = True # Load datasets mode = 'dataset' if len(args.files_or_dirs) == 0: datasets = [load_dataset(conf, args.data_dir, conf.validation_dataset, args.fold)] else: datasets = [] for f in args.files_or_dirs: if is_dataset(f): dataset = load_dataset(conf, args.data_dir, f, args.fold) datasets.append(dataset) else: mode = 'image' transform = get_sr_transform(conf, 'test', downscale=False) datasets = [make_sr_dataset_from_folder(conf, f, transform, inference=True) for f in args.files_or_dirs] num_workers = conf.get_attr('num_data_workers', default=DEFAULT_NUM_WORKERS) # Evaluate all datasets for dataset in datasets: loader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=1, shuffle=False) if mode == 'dataset': data, _, val_metrics = runner.validate(loader, len(loader)) print('Average metrics for {}'.format(dataset.name)) for metric_name, metric in val_metrics.items(): print(' {}: {}'.format(metric_name, metric)) else: data = runner.infer(loader) if args.infer or args.dump: if mode == 'dataset': output_dir = get_run_dir(args.out_dir, dataset.name) if not os.path.isdir(output_dir): os.mkdir(output_dir) file_idx = 0 for batch in data: if mode == 'image': output_dir = os.path.dirname(dataset.images[file_idx]) named_batch = runner.get_named_outputs(batch) inputs = named_batch['input'] predictions = named_batch['prediction'] targets = named_batch['target'] for (inp, target, prediction) in zip(inputs, targets, predictions): image_file = os.path.basename(dataset.images[file_idx]) name, _ = os.path.splitext(image_file) file_idx += 1 if args.dump: input_file = os.path.join(output_dir, '{}_input.png'.format(name)) save_image(inp.data, input_file) target_file = os.path.join(output_dir, '{}_target.png'.format(name)) save_image(target.data, target_file) pred_file = os.path.join(output_dir, '{}_pred.png'.format(name)) save_image(prediction.data, pred_file)
parser.add_argument('--gta', '-g', action='store_true', help='train wavernn on GTA features') parser.add_argument('--config', metavar='FILE', default='config.yaml', help='The config containing all hyperparams.') args = parser.parse_args() config = read_config(args.config) paths = Paths(config['data_path'], config['voc_model_id'], config['tts_model_id']) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') print('Using device:', device) print('\nInitialising Model...\n') voc_model = WaveRNN.from_config(config).to(device) dsp = DSP.from_config(config) assert np.cumprod( config['vocoder']['model']['upsample_factors'])[-1] == dsp.hop_length optimizer = optim.Adam(voc_model.parameters()) restore_checkpoint(model=voc_model, optim=optimizer, path=paths.voc_checkpoints / 'latest_model.pt', device=device) voc_trainer = VocTrainer(paths=paths, dsp=dsp, config=config) voc_trainer.train(voc_model, optimizer, train_gta=args.gta)
config['tts_model_id']) assert len(os.listdir(paths.alg)) > 0, f'Could not find alignment files in {paths.alg}, please predict ' \ f'alignments first with python train_tacotron.py --force_align!' force_gta = args.force_gta device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') print('Using device:', device) # Instantiate Forward TTS Model print('\nInitialising Forward TTS Model...\n') model = ForwardTacotron.from_config(config).to(device) optimizer = optim.Adam(model.parameters()) restore_checkpoint(model=model, optim=optimizer, path=paths.forward_checkpoints / 'latest_model.pt', device=device) if force_gta: print('Creating Ground Truth Aligned Dataset...\n') train_set, val_set = get_tts_datasets(paths.data, 8, r=1, model_type='forward', filter_attention=False, max_mel_len=None) create_gta_features(model, train_set, val_set, paths.gta) print( '\n\nYou can now train WaveRNN on GTA features - use python train_wavernn.py --gta\n' ) else:
def main(argv): args = parser.parse_args(argv) if args.cuda != '': try: args.cuda = utils.set_cuda_env(args.cuda) except Exception: print('No free GPU on this machine. Aborting run.') return print('Running on GPU {}'.format(args.cuda)) # Load configuration conf = Configuration.from_json(args.config) conf.args = args if args.conf: new_conf_entries = {} for arg in args.conf: key, value = arg.split('=') new_conf_entries[key] = value conf.update(new_conf_entries) if args.verbose: print(conf) utils.set_random_seeds(conf.seed) # Setup model runner = build_runner(conf, conf.runner_type, args.cuda, mode='train', resume=args.resume is not None) if args.print_model: print(str(runner)) # Handle resuming from checkpoint restore_state = None if args.resume: if os.path.exists(args.resume): restore_state = restore_checkpoint(args.resume, runner) conf.run_dir = os.path.dirname(args.resume) print('Restored checkpoint from {}'.format(args.resume)) else: print('Checkpoint {} to restore from not found'.format( args.resume)) return # Setup log directory if args.run_dir: conf.run_dir = args.run_dir if not conf.has_attr('run_dir'): run_name = conf.get_attr('run_name', default='unnamed_run') conf.run_dir = get_run_dir(args.log_dir, run_name) if not args.dry: if not os.path.isdir(conf.run_dir): os.mkdir(conf.run_dir) print('This run is saved to: {}'.format(conf.run_dir)) config_path = get_config_path(conf.run_dir) conf.serialize(config_path) use_tensorboard = conf.get_attr('use_tensorboard', default=DEFAULT_USE_TENSORBOARD) if use_tensorboard and not args.dry: from tensorboardX import SummaryWriter summary_writer = SummaryWriter(conf.run_dir) else: summary_writer = None # Load datasets num_workers = conf.get_attr('num_data_workers', default=DEFAULT_NUM_WORKERS) num_train_samples = conf.get_attr('num_train_subset_samples', default=None) num_val_samples = conf.get_attr('num_validation_subset_samples', default=None) train_dataset_name = conf.get_attr('train_dataset', alternative='dataset') train_dataset = load_dataset(conf, args.data_dir, train_dataset_name, 'train') train_sampler = maybe_get_subset_sampler(num_train_samples, train_dataset) train_loader = DataLoader(dataset=train_dataset, num_workers=num_workers, batch_size=conf.batch_size, sampler=train_sampler, shuffle=train_sampler is None) val_dataset_name = conf.get_attr('validation_dataset', alternative='dataset') val_dataset = load_dataset(conf, args.data_dir, val_dataset_name, 'val') val_sampler = maybe_get_subset_sampler(num_val_samples, val_dataset) val_loader = DataLoader(dataset=val_dataset, num_workers=num_workers, batch_size=conf.get_attr('validation_batch_size', default=conf.batch_size), sampler=val_sampler, shuffle=False) chkpt_metrics = conf.get_attr('validation_checkpoint_metrics', default=[]) chkpt_metric_dirs = { metric: os.path.join(conf.run_dir, 'best_' + metric) for metric in chkpt_metrics } for metric_dir in chkpt_metric_dirs.values(): if not args.dry and not os.path.isdir(metric_dir): os.mkdir(metric_dir) # Train try: train_net(conf, runner, train_loader, val_loader, args.cuda, chkpt_metric_dirs, restore_state, summary_writer) except KeyboardInterrupt: if summary_writer is not None: summary_writer.close()
def main(): # Parse Arguments parser = argparse.ArgumentParser(description='Train Tacotron TTS') parser.add_argument('--force_train', '-f', action='store_true', help='Forces the model to train past total steps') parser.add_argument('--force_gta', '-g', action='store_true', help='Force the model to create GTA features') parser.add_argument( '--force_cpu', '-c', action='store_true', help='Forces CPU-only training, even when in CUDA capable environment') parser.add_argument('--hp_file', metavar='FILE', default='hparams.py', help='The file to use for the hyperparameters') args = parser.parse_args() hp.configure(args.hp_file) # Load hparams from file paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id) force_gta = args.force_gta if not args.force_cpu and torch.cuda.is_available(): device = torch.device('cuda') for session in hp.forward_schedule: _, _, batch_size = session if batch_size % torch.cuda.device_count() != 0: raise ValueError( '`batch_size` must be evenly divisible by n_gpus!') else: device = torch.device('cpu') print('Using device:', device) # Instantiate Forward TTS Model print('\nInitialising Forward TTS Model...\n') model = ForwardTacotron(embed_dims=hp.forward_embed_dims, num_chars=len(symbols), durpred_rnn_dims=hp.forward_durpred_rnn_dims, durpred_conv_dims=hp.forward_durpred_conv_dims, rnn_dim=hp.forward_rnn_dims, postnet_k=hp.forward_postnet_K, postnet_dims=hp.forward_postnet_dims, prenet_k=hp.forward_prenet_K, prenet_dims=hp.forward_prenet_dims, highways=hp.forward_num_highways, dropout=hp.forward_dropout, n_mels=hp.num_mels).to(device) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print(f'num params {params}') optimizer = optim.Adam(model.parameters()) restore_checkpoint('forward', paths, model, optimizer, create_if_missing=True) if not force_gta: for i, session in enumerate(hp.forward_schedule): current_step = model.get_step() lr, max_step, batch_size = session training_steps = max_step - current_step simple_table([(f'Steps', str(training_steps // 1000) + 'k Steps'), ('Batch Size', batch_size), ('Learning Rate', lr)]) train_set, mel_example = get_tts_datasets(paths.data, batch_size, 1, alignments=True) train_loop(paths, model, optimizer, train_set, lr, training_steps, mel_example) train_set, mel_example = get_tts_datasets(paths.data, 8, 1, alignments=True) create_gta_features(model, train_set, paths.gta) print('Training Complete.')
if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') print('Using device:', device) print('\nInitialising Model...\n') # Instantiate WaveRNN Model voc_model = WaveRNN(rnn_dims=hp.voc_rnn_dims, fc_dims=hp.voc_fc_dims, bits=hp.bits, pad=hp.voc_pad, upsample_factors=hp.voc_upsample_factors, feat_dims=hp.num_mels, compute_dims=hp.voc_compute_dims, res_out_dims=hp.voc_res_out_dims, res_blocks=hp.voc_res_blocks, hop_length=hp.hop_length, sample_rate=hp.sample_rate, mode=hp.voc_mode).to(device) # Check to make sure the hop length is correctly factorised assert np.cumprod(hp.voc_upsample_factors)[-1] == hp.hop_length optimizer = optim.Adam(voc_model.parameters()) restore_checkpoint('voc', paths, voc_model, optimizer, create_if_missing=True) voc_trainer = VocTrainer(paths) voc_trainer.train(voc_model, optimizer, train_gta=args.gta)
def main(argv): args = parser.parse_args(argv) setup_logging(os.path.dirname(args.checkpoint), 'eval', args.verbose, args.dry) logging.info('Commandline arguments: {}'.format(' '.join(argv))) if args.cuda != '': try: args.cuda = utils.set_cuda_env(args.cuda) except Exception: logging.critical('No free GPU on this machine. Aborting run.') return logging.info('Running on GPU {}'.format(args.cuda)) # Load configuration conf = Configuration.from_json(args.config) conf.args = args if args.conf: new_conf_entries = {} for arg in args.conf: key, value = arg.split('=') new_conf_entries[key] = value conf.update(new_conf_entries) if args.verbose: logging.debug(conf) utils.set_random_seeds(conf.seed) if args.raw: # This is a hack to suppress the output transform when we request raw data conf.application = 'none' if conf.has_attr('tasks'): for name, task in conf.tasks.items(): if 'application' in task: logging.debug(('Changing output transform in task {} ' 'from {} to none').format(name, task['application'])) task['application'] = 'none' # Setup model runner = build_runner(conf, conf.runner_type, args.cuda, mode='test') # Handle resuming from checkpoint if args.checkpoint != 'NONE': if os.path.exists(args.checkpoint): _ = restore_checkpoint(args.checkpoint, runner, cuda=args.cuda) logging.info('Restored checkpoint from {}'.format(args.checkpoint)) else: logging.critical(('Checkpoint {} to restore ' 'from not found').format(args.checkpoint)) return # Load datasets mode = 'dataset' if len(args.files_or_dirs) == 0: datasets = [load_dataset(conf, args.data_dir, conf.validation_dataset, args.fold)] else: datasets = [] for f in args.files_or_dirs: if is_dataset(f): dataset = load_dataset(conf, args.data_dir, f, args.fold) datasets.append(dataset) if args.raw: mode = 'raw' num_samples = conf.get_attr('num_validation_subset_samples', default=None) # Evaluate all datasets for dataset in datasets: logging.info('Evaluating dataset {}'.format(dataset.name)) sampler = maybe_get_subset_sampler(num_samples, dataset) loader = DataLoader(dataset=dataset, num_workers=DEFAULT_NUM_WORKERS, batch_size=1, sampler=sampler, shuffle=False) if mode == 'dataset': data, _, val_metrics = runner.validate(loader, len(loader)) res_str = 'Average metrics for {}\n'.format(dataset.name) for metric_name, metric in val_metrics.items(): res_str += ' {}: {}\n'.format(metric_name, metric) logging.info(res_str) else: data = runner.infer(loader) if not args.dry and (args.infer or args.dump): if mode == 'dataset' or mode == 'raw': conf_name = os.path.splitext(os.path.basename(conf.file))[0] output_dir = get_run_dir(args.out_dir, '{}_{}'.format(dataset.name, conf_name)) if not os.path.isdir(output_dir): os.mkdir(output_dir) logging.info('Writing images to {}'.format(output_dir)) file_idx = 0 for batch in data: if mode == 'image': output_dir = os.path.dirname(dataset.images[file_idx]) named_batch = runner.get_named_outputs(batch) inp = named_batch['input'] if 'prediction' in named_batch: batch_size = named_batch['prediction'].shape[0] filenames = [dataset.get_filename(idx) for idx in range(file_idx, file_idx + batch_size)] save_output_images(dataset, inp, named_batch['prediction'], named_batch['target'], output_dir, filenames, 'default', args.dump, args.raw) file_idx += len(filenames) logging.info(('Finished writing images for ' 'dataset {}').format(dataset.name))