def test_new_root(self): with metrics.aggregate() as a: metrics.log_scalar('loss', 1) with metrics.aggregate(new_root=True) as b: metrics.log_scalar('loss', 2) self.assertEqual(a.get_smoothed_values()['loss'], 1) self.assertEqual(b.get_smoothed_values()['loss'], 2)
def test_nesting(self): with metrics.aggregate() as a: metrics.log_scalar("loss", 1) with metrics.aggregate() as b: metrics.log_scalar("loss", 2) self.assertEqual(a.get_smoothed_values()["loss"], 1.5) self.assertEqual(b.get_smoothed_values()["loss"], 2)
def test_nested_duplicate_names(self): name = str(uuid.uuid4()) metrics.reset_meters(name) with metrics.aggregate(name): metrics.log_scalar('loss', 1) with metrics.aggregate() as other: with metrics.aggregate(name): metrics.log_scalar('loss', 2) metrics.log_scalar('loss', 6) self.assertEqual(metrics.get_smoothed_values(name)['loss'], 3) self.assertEqual(other.get_smoothed_values()['loss'], 2)
def test_named(self): name = str(uuid.uuid4()) metrics.reset_meters(name) with metrics.aggregate(name): metrics.log_scalar('loss', 1) metrics.log_scalar('loss', 3) with metrics.aggregate(name): metrics.log_scalar('loss', 2) self.assertEqual(metrics.get_smoothed_values(name)['loss'], 1.5)
def test_nested_new_root(self): with metrics.aggregate() as layer1: metrics.log_scalar('loss', 1) with metrics.aggregate(new_root=True) as layer2: metrics.log_scalar('loss', 2) with metrics.aggregate() as layer3: metrics.log_scalar('loss', 3) with metrics.aggregate(new_root=True) as layer4: metrics.log_scalar('loss', 4) metrics.log_scalar('loss', 1.5) self.assertEqual(layer4.get_smoothed_values()['loss'], 4) self.assertEqual(layer3.get_smoothed_values()['loss'], 3) self.assertEqual(layer2.get_smoothed_values()['loss'], 2.5) self.assertEqual(layer1.get_smoothed_values()['loss'], 1.25)
def reduce_metrics(self, logging_outputs, criterion, logging_output_keys=None): logging_output_keys = logging_output_keys or self.eval_lang_pairs # aggregate logging outputs for each language pair agg_logging_outputs = {} for key in logging_output_keys: with metrics.aggregate() as agg: logging_outputs_key = [ logging_output.get(key, {}) for logging_output in logging_outputs ] for k in ['sample_size', 'nsentences', 'ntokens']: metrics.log_scalar(k, sum(l[k] for l in logging_outputs_key)) super().reduce_metrics(logging_outputs_key, criterion) agg_logging_outputs[key] = agg.get_smoothed_values() def sum_over_languages(key): return sum(logging_output[key] for logging_output in agg_logging_outputs.values()) # flatten logging outputs flat_logging_output = { '{}:{}'.format(lang_pair, k): v for lang_pair, agg_logging_output in agg_logging_outputs.items() for k, v in agg_logging_output.items() } flat_logging_output['loss'] = sum_over_languages('loss') if any('nll_loss' in logging_output for logging_output in agg_logging_outputs.values()): flat_logging_output['nll_loss'] = sum_over_languages('nll_loss') flat_logging_output['sample_size'] = sum_over_languages('sample_size') flat_logging_output['nsentences'] = sum_over_languages('nsentences') flat_logging_output['ntokens'] = sum_over_languages('ntokens') return flat_logging_output
def aggregate_logging_outputs(self, logging_outputs, criterion): """[deprecated] Aggregate logging outputs from data parallel training.""" utils.deprecation_warning( 'The aggregate_logging_outputs API is deprecated. ' 'Please use the reduce_metrics API instead.') with metrics.aggregate() as agg: self.reduce_metrics(logging_outputs, criterion) return agg.get_smoothed_values()
def validate(args, trainer, task, epoch_itr, subsets): """Evaluate the model on the validation set(s) and return the losses.""" if args.fixed_validation_seed is not None: # set fixed seed for every validation utils.set_torch_seed(args.fixed_validation_seed) hypothes = [] references = [] for subset in subsets: # Initialize data iterator itr = task.get_batch_iterator( dataset=task.dataset(subset), max_tokens=args.max_tokens_valid, max_sentences=args.max_sentences_valid, max_positions=utils.resolve_max_positions( task.max_positions(), trainer.get_model().max_positions(), ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, seed=args.seed, num_shards=args.distributed_world_size, shard_id=args.distributed_rank, num_workers=args.num_workers, ).next_epoch_itr(shuffle=False) progress = progress_bar.build_progress_bar( args, itr, epoch_itr.epoch, prefix='valid on \'{}\' subset'.format(subset), no_progress_bar='simple') # create a new root metrics aggregator so validation metrics # don't pollute other aggregators (e.g., train meters) with metrics.aggregate(new_root=True) as agg: hyps, refs = [], [] for sample in progress: logging_output, h, r = trainer.valid_step(sample, generate=True) hyps.extend(h) refs.extend(r) # log validation stats stats = get_valid_stats(args, trainer, agg.get_smoothed_values()) progress.print(stats, tag=subset, step=trainer.get_num_updates()) hypothes.append(hyps) references.append(refs) return hypothes, references
def train(args, trainer, task, epoch_itr): """Train the model for one epoch.""" # Initialize data iterator itr = epoch_itr.next_epoch_itr( fix_batches_to_gpus=args.fix_batches_to_gpus, shuffle=(epoch_itr.epoch >= args.curriculum), ) update_freq = (args.update_freq[epoch_itr.epoch - 1] if epoch_itr.epoch <= len(args.update_freq) else args.update_freq[-1]) itr = iterators.GroupedIterator(itr, update_freq) progress = progress_bar.build_progress_bar( args, itr, epoch_itr.epoch, no_progress_bar='simple', ) valid_subsets = args.valid_subset.split(',') max_update = args.max_update or math.inf if hasattr(trainer.criterion, 'set_epoch'): trainer.criterion.set_epoch(epoch_itr.epoch) for samples in progress: if hasattr(trainer.criterion, 'set_num_updates'): trainer.criterion.set_num_updates(trainer.get_num_updates()) with metrics.aggregate('train_inner'): log_output = trainer.train_step(samples) num_updates = trainer.get_num_updates() if log_output is None: continue # log mid-epoch stats stats = get_training_stats('train_inner') progress.log(stats, tag='train', step=num_updates) if (not args.disable_validation and args.save_interval_updates > 0 and num_updates % args.save_interval_updates == 0 and num_updates > 0): valid_losses = validate(args, trainer, task, epoch_itr, valid_subsets) checkpoint_utils.save_checkpoint(args, trainer, epoch_itr, valid_losses[0]) if num_updates >= max_update: break # log end-of-epoch stats stats = get_training_stats('train') progress.print(stats, tag='train', step=num_updates) # reset epoch-level meters metrics.reset_meters('train')
def downstream_train_pytorch(args, trainer, task, epoch_itr, train_prefix): """Fine-tune PyTorch classifier on downstream training set for one epoch""" task.split = 'train' num_updates = trainer.get_num_updates() # Initialize data iterator itr = epoch_itr.next_epoch_itr( fix_batches_to_gpus=args.fix_batches_to_gpus, shuffle=(epoch_itr.next_epoch_idx > args.curriculum), ) update_freq = (args.update_freq[epoch_itr.epoch - 1] if epoch_itr.epoch <= len(args.update_freq) else args.update_freq[-1]) itr = iterators.GroupedIterator(itr, update_freq) progress = progress_bar.build_progress_bar( args, itr, epoch_itr.epoch, no_progress_bar='simple', ) progress = maybe_wrap_neptune_logging(progress, args) # Task specific setup per epoch task.begin_epoch(epoch_itr.epoch, trainer.get_model()) max_update = args.max_update or math.inf with metrics.aggregate() as agg: for samples in progress: # Train for one step log_output = trainer.train_step(samples) num_updates = trainer.get_num_updates() if log_output is None: continue # log mid-epoch stats stats = get_ft_train_stats(agg.get_smoothed_values()) progress.log(stats, tag=train_prefix, step=num_updates) if num_updates >= max_update: break # log end-of-epoch stats stats = get_ft_train_stats(agg.get_smoothed_values()) try: progress.print(stats, tag=train_prefix, step=num_updates, log=False) except: progress.print(stats, tag=train_prefix, step=num_updates) # Reset epoch-level meters metrics.reset_meters(train_prefix)
def validate(args, trainer, task, epoch_for_logging, valid_name, ckpt_idx): """Evaluate the model on the validation set(s) and return the losses.""" task.split = 'valid' if args.fixed_validation_seed is not None: # Set fixed seed for every validation utils.set_torch_seed(args.fixed_validation_seed) # Initialize data iterator itr = task.get_batch_iterator( dataset=task.dataset('valid'), max_tokens=args.max_tokens_valid, max_sentences=args.max_sentences_valid, max_positions=utils.resolve_max_positions( task.max_positions(), trainer.get_model().max_positions(), ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, seed=args.seed, num_shards=args.distributed_world_size, shard_id=args.distributed_rank, num_workers=args.num_workers, epoch=epoch_for_logging, ).next_epoch_itr(shuffle=False) progress = progress_bar.build_progress_bar( args, itr, epoch_for_logging, prefix='valid on \'{}\' subset'.format(valid_name), no_progress_bar='simple') progress = maybe_wrap_neptune_logging(progress, args) # Reset validation meters metrics.reset_meters(valid_name) with metrics.aggregate(valid_name) as agg: for sample in progress: trainer.valid_step(sample) # Log validation stats stats = get_valid_stats(args, trainer, agg.get_smoothed_values()) if args.log_valid_progress: valid_progress_prefix = '{}_ckpt{}'.format(valid_name, ckpt_idx) progress.print({args.eval_metric: stats[args.eval_metric]}, tag=valid_progress_prefix, step=epoch_for_logging) # Return validations score return stats[args.best_checkpoint_metric], stats[ args.eval_metric], progress
def train(args, trainer, task, epoch_itr): """Train the model for one epoch.""" task.split = 'train' # Initialize data iterator itr = epoch_itr.next_epoch_itr( fix_batches_to_gpus=args.fix_batches_to_gpus, shuffle=(epoch_itr.next_epoch_idx > args.curriculum), ) update_freq = (args.update_freq[epoch_itr.epoch - 1] if epoch_itr.epoch <= len(args.update_freq) else args.update_freq[-1]) itr = iterators.GroupedIterator(itr, update_freq) progress = maybe_wrap_neptune_logging( progress_bar.build_progress_bar( args, itr, epoch_itr.epoch, no_progress_bar='simple', ), args=args, ) # task specific setup per epoch task.begin_epoch(epoch_itr.epoch, trainer.get_model()) valid_subsets = args.valid_subset.split(',') max_update = args.max_update or math.inf with metrics.aggregate() as agg: for samples in progress: log_output = trainer.train_step(samples) num_updates = trainer.get_num_updates() if log_output is None: continue # log mid-epoch stats stats = get_training_stats(agg.get_smoothed_values()) progress.log(stats, tag='train', step=num_updates) if num_updates >= max_update: break # log end-of-epoch stats stats = get_training_stats(agg.get_smoothed_values()) try: progress.print(stats, tag='train', step=num_updates, log=False) except: progress.print(stats, tag='train', step=num_updates) # reset epoch-level meters metrics.reset_meters('train')
def _reduce_and_log_stats(self, logging_outputs, sample_size): with metrics.aggregate() as agg: # convert logging_outputs to CPU to avoid unnecessary # device-to-host transfers in reduce_metrics logging_outputs = utils.apply_to_sample( lambda t: t.to(device='cpu', non_blocking=True), logging_outputs) self.task.reduce_metrics(logging_outputs, self.get_criterion()) # support legacy interface logging_output = agg.get_smoothed_values() logging_output["sample_size"] = sample_size for key_to_delete in ["ppl", "wps", "wpb", "bsz"]: if key_to_delete in logging_output: del logging_output[key_to_delete] return logging_output
def validate(args, trainer, task, epoch_itr, subsets): """Evaluate the model on the validation set(s) and return the losses.""" if args.fixed_validation_seed is not None: # set fixed seed for every validation utils.set_torch_seed(args.fixed_validation_seed) valid_losses = [] for subset in subsets: # Initialize data iterator itr = task.get_batch_iterator( dataset=task.dataset(subset), max_tokens=args.max_tokens_valid, max_sentences=args.max_sentences_valid, max_positions=utils.resolve_max_positions( task.max_positions(), trainer.get_model().max_positions(), ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, seed=args.seed, num_shards=args.distributed_world_size, shard_id=args.distributed_rank, num_workers=args.num_workers, ).next_epoch_itr(shuffle=False) progress = progress_bar.build_progress_bar( args, itr, epoch_itr.epoch, prefix='valid on \'{}\' subset'.format(subset), no_progress_bar='simple') # reset validation meters metrics.reset_meters('valid') with metrics.aggregate() as agg: for sample in progress: trainer.valid_step(sample) # log validation stats stats = get_valid_stats(args, trainer, agg.get_smoothed_values()) progress.print(stats, tag=subset, step=trainer.get_num_updates()) valid_losses.append(stats[args.best_checkpoint_metric]) return valid_losses
def reduce_metrics(self, logging_outputs, criterion): with metrics.aggregate(): # pass 'sample_size', 'nsentences', 'ntokens' stats to fairseq_task super().reduce_metrics(logging_outputs, criterion) for k in ['sample_size', 'nsentences', 'ntokens']: metrics.log_scalar(k, sum(l[k] for l in logging_outputs))
def downstream_validate_pytorch(args, task, model, criterion, epoch_for_logging, subsets, valid_name, num_updates, global_epoch=None): """Evaluate the model on the validation set(s) and return the losses.""" task.split = 'valid' valid_name_ = valid_name if valid_name is not None else 'valid' if args.fixed_validation_seed is not None: # Set fixed seed for every validation utils.set_torch_seed(args.fixed_validation_seed) valid_losses = [] for subset in subsets: # Initialize data iterator itr = task.get_batch_iterator( dataset=task.dataset(subset), max_tokens=args.max_tokens_valid, max_sentences=args.max_sentences_valid, max_positions=utils.resolve_max_positions( task.max_positions(), model.max_positions(), ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, seed=args.seed, num_shards=args.distributed_world_size, shard_id=args.distributed_rank, num_workers=args.num_workers, epoch=1, ).next_epoch_itr(shuffle=False) progress = progress_bar.build_progress_bar( args, itr, epoch_for_logging, prefix='valid on \'{}\' subset'.format(valid_name_), no_progress_bar='simple') # Add global epoch to beginning of progress bar description if global_epoch is not None: try: progress.wrapped_bar.tqdm.set_description( desc='epoch {:03d} | \'{}\' {}'.format( global_epoch, valid_name_, progress.wrapped_bar.prefix), refresh=True) except: progress.tqdm.set_description( desc='epoch {:03d} | \'{}\' {}'.format( global_epoch, valid_name_, progress.tqdm.desc), refresh=True) progress = maybe_wrap_neptune_logging(progress, args) # Reset validation meters metrics.reset_meters(valid_name_) with metrics.aggregate(valid_name) as agg: dummy_batch = "DUMMY" for sample in progress: dummy_batch = sample if dummy_batch == "DUMMY" else dummy_batch valid_step(args, sample, task, model, criterion, dummy_batch, logger) # Log validation stats stats = get_ft_valid_stats(args, agg.get_smoothed_values(), num_updates) progress.print(stats, tag=valid_name_, step=num_updates) valid_losses.append(stats[args.best_checkpoint_metric]) return valid_losses
def main(args): utils.import_user_module(args) utils.set_torch_seed(args.seed) assert args.max_tokens is not None or args.max_sentences is not None, \ 'Must specify batch size either with --max-tokens or --max-sentences' use_fp16 = args.fp16 use_cuda = torch.cuda.is_available() and not args.cpu # Setup task, e.g., translation, language modeling, etc. task = tasks.setup_task(args) task.split = 'valid' # Load model load_checkpoint = getattr(args, 'load_checkpoint') if load_checkpoint: logger.info('loading model(s) from {}'.format(load_checkpoint)) if not os.path.exists(load_checkpoint): raise IOError("Model file not found: {}".format(load_checkpoint)) state = checkpoint_utils.load_checkpoint_to_cpu(load_checkpoint) checkpoint_args = state["args"] if task is None: task = tasks.setup_task(args) load_component_prefix = getattr(args, 'load_component_prefix', None) model_state = state["model"] if load_component_prefix: model_state = select_component_state(model_state, load_component_prefix) # build model for ensemble model = task.build_model(args) missing_keys, unexpected_keys = model.load_state_dict(model_state, strict=False, args=args) handle_state_dict_keys(missing_keys, unexpected_keys) else: model = task.build_model(args) # Move model to GPU if use_fp16: model.half() if use_cuda: model.cuda() # Print args logger.info(args) # Build criterion criterion = task.build_criterion(args) criterion.eval() # Load valid dataset (we load training data below, based on the latest checkpoint) for subset in args.valid_subset.split(','): try: task.load_dataset(subset, combine=False, epoch=0) dataset = task.dataset(subset) except KeyError: raise Exception('Cannot find dataset: ' + subset) # Initialize data iterator itr = task.get_batch_iterator( dataset=dataset, max_tokens=args.max_tokens, max_sentences=args.max_sentences, max_positions=utils.resolve_max_positions( task.max_positions(), model.max_positions(), ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, seed=args.seed, num_workers=args.num_workers, ).next_epoch_itr(shuffle=False) progress = progress_bar.build_progress_bar( args, itr, prefix='valid on \'{}\' subset'.format(subset), no_progress_bar='simple') log_outputs = [] for i, sample in enumerate(progress): sample = utils.move_to_cuda(sample) if use_cuda else sample _loss, _sample_size, log_output = task.valid_step( sample, model, criterion) progress.log(log_output, step=i) log_outputs.append(log_output) with metrics.aggregate() as agg: task.reduce_metrics(log_outputs, criterion) log_output = agg.get_smoothed_values() progress.print(log_output, tag=subset, step=i)
def main(args, override_args=None): utils.import_user_module(args) assert args.max_tokens is not None or args.max_sentences is not None, \ 'Must specify batch size either with --max-tokens or --max-sentences' use_fp16 = args.fp16 use_cuda = torch.cuda.is_available() and not args.cpu if override_args is not None: overrides = vars(override_args) overrides.update(eval(getattr(override_args, 'model_overrides', '{}'))) else: overrides = None # Load ensemble logger.info('loading model(s) from {}'.format(args.path)) models, model_args, task = checkpoint_utils.load_model_ensemble_and_task( [args.path], arg_overrides=overrides, ) model = models[0] # Move models to GPU for model in models: if use_fp16: model.half() if use_cuda: model.cuda() # Print args logger.info(model_args) # Build criterion criterion = task.build_criterion(model_args) criterion.eval() # Load valid dataset (we load training data below, based on the latest checkpoint) for subset in args.valid_subset.split(','): try: task.load_dataset(subset, combine=False, epoch=0) dataset = task.dataset(subset) except KeyError: raise Exception('Cannot find dataset: ' + subset) # Initialize data iterator itr = task.get_batch_iterator( dataset=dataset, max_tokens=args.max_tokens, max_sentences=args.max_sentences, max_positions=utils.resolve_max_positions( task.max_positions(), *[m.max_positions() for m in models], ), ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test, required_batch_size_multiple=args.required_batch_size_multiple, seed=args.seed, num_workers=args.num_workers, ).next_epoch_itr(shuffle=False) progress = progress_bar.build_progress_bar( args, itr, prefix='valid on \'{}\' subset'.format(subset), no_progress_bar='simple') log_outputs = [] for i, sample in enumerate(progress): sample = utils.move_to_cuda(sample) if use_cuda else sample _loss, _sample_size, log_output = task.valid_step( sample, model, criterion) progress.log(log_output, step=i) log_outputs.append(log_output) with metrics.aggregate() as agg: task.reduce_metrics(log_outputs, criterion) log_output = agg.get_smoothed_values() progress.print(log_output, tag=subset, step=i)