def load_state(model_path): state = load_checkpoint_to_cpu(model_path, arg_overrides={}) args = state["args"] args = recursive_contractuser(args) args = recursive_expanduser(args) task = tasks.setup_task(args) # load src/tgt dicts model = task.build_model(args) model.load_state_dict(state["model"]) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] if args['common']['fp16'] and use_cuda: model.half() if use_cuda: torch.cuda.empty_cache() torch.cuda.set_device(torch.cuda.device_count() - 1) model.cuda() model.eval() del state return args, task, model, use_cuda
def main(model_path, input): LOGGER.info('Load model from {}'.format(model_path)) state = load_checkpoint_to_cpu(model_path, arg_overrides={}) args = state["args"] task = tasks.setup_task(args) # load src/tgt dicts model = task.build_model(args) model.load_state_dict(state["model"]) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] use_cuda = 0 if use_cuda: torch.cuda.empty_cache() torch.cuda.set_device(torch.cuda.device_count() - 1) model.cuda() model.eval() if args['common']['fp16'] and use_cuda: model.half() # TODO: source tensor should be handled in corresponding task scripts. here we only use seq2seq pipeline for instance. intput_ids = task.target_dictionary.encode_string(input, line_tokenizer=None, add_if_not_exist=False) src_input_ids = intput_ids.long().unsqueeze(dim=0) sample = { 'net_input': { 'src_tokens': src_input_ids, }, } sample = utils.move_to_cuda(sample) if use_cuda else sample generator = task.sequence_completor net_output = generator.complete(models=[model], sample=sample) # from ipdb import set_trace # set_trace() pred_prob = torch.softmax(net_output[0][0, -1, :], dim=-1) topk_prob, topk_idx = pred_prob.topk(k=10, dim=-1) # remove unk/eos/bos/pad topk_info = [(round(prob.item(), 6), idx.item()) for prob, idx in zip(topk_prob, topk_idx)][:5] topk_info = [(task.target_dictionary[idx], prob) for prob, idx in topk_info] pred_sentence = [ (input[:-1] + [topk_token], topk_prob) for topk_token, topk_prob in topk_info ] return topk_info, pred_sentence
def main(model_path, input): state = load_checkpoint_to_cpu(model_path, arg_overrides={}) args = state["args"] task = tasks.setup_task(args) # load src/tgt dicts model = task.build_model(args) model.load_state_dict(state["model"]) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] if use_cuda: torch.cuda.empty_cache() torch.cuda.set_device(torch.cuda.device_count() - 1) model.cuda() if args['common']['fp16'] and use_cuda: model.half() model.eval() # TODO: source tensor should be handled in corresponding task scripts. here we only use seq2seq pipeline for instance. src_input_ids = task.src_dict.encode_line(input, line_tokenizer=None, add_if_not_exist=False) src_input_ids = torch.cat([ src_input_ids[:args['task']['max_source_positions'] - 1], torch.Tensor([task.src_dict.eos()]).long() ]) padding_size = args['task']['max_source_positions'] - len(src_input_ids) if padding_size > 0: src_input_ids = torch.cat([ src_input_ids, torch.Tensor([task.src_dict.pad()] * padding_size).long() ]) if use_cuda: src_input_ids = src_input_ids.unsqueeze(dim=0).cuda() sample = { 'net_input': { 'src_tokens': src_input_ids, 'src_lengths': torch.LongTensor([s.numel() for s in src_input_ids]), }, } sample = utils.move_to_cuda(sample) if use_cuda else sample generator = task.build_generator(args) pred_sentence_ids = generator.generate(models=[model], sample=sample) pred_sentence = task.tgt_dict.string(pred_sentence_ids[0][0]['tokens']) return pred_sentence
def load_model_ensemble_and_task(filenames, arg_overrides=None, task=None, strict=True, suffix=''): from ncc import tasks ensemble = [] for filename in filenames: filename = filename.replace(".pt", suffix + ".pt") if not PathManager.exists(filename): raise IOError("Model file not found: {}".format(filename)) state = load_checkpoint_to_cpu(filename, arg_overrides) args = state["args"] if task is None: task = tasks.setup_task(args) # build model for ensemble model = task.build_model(args) model.load_state_dict(state["model"], strict=strict, args=args) ensemble.append(model) return ensemble, args, task
def main(model_path, input): LOGGER.info('Load model from {}'.format(model_path)) state = load_checkpoint_to_cpu(model_path, arg_overrides={}) args = state["args"] args = recursive_contractuser(args, old_cache_name='.ncc') args = recursive_expanduser(args) task = tasks.setup_task(args) # load src/tgt dicts model = task.build_model(args) model.load_state_dict(state["model"]) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] if use_cuda: torch.cuda.empty_cache() torch.cuda.set_device(torch.cuda.device_count() - 1) model.cuda() model.eval() if args['common']['fp16'] and use_cuda: model.half() sample = task.encode_input(input) sample = utils.move_to_cuda(sample) if use_cuda else sample generator = task.sequence_completor net_output = generator.complete(models=[model], sample=sample) out = task.decode_output(net_output) return out
def single_main(args, init_distributed=False): assert args['dataset']['max_tokens'] is not None or args['dataset']['max_sentences'] is not None, \ 'Must specify batch size either with --max-tokens or --max-sentences' metrics.reset() # 0. Initialize CUDA and distributed training if torch.cuda.is_available() and not args['common']['cpu']: torch.cuda.set_device(args['distributed_training']['device_id']) set_seed.set_seed(args['common']['seed']) if init_distributed: args['distributed_training'][ 'distributed_rank'] = distributed_utils.distributed_init(args) # Verify checkpoint directory if distributed_utils.is_master(args): save_dir = args['checkpoint']['save_dir'] checkpoint_utils.verify_checkpoint_directory(save_dir) PathManager.rm(os.path.join( save_dir, '*.pt')) # this code will remove pre-trained models # 1. Setup task, e.g., translation, language modeling, etc. task = tasks.setup_task(args) # 2. Load valid dataset (we load training data below, based on the latest checkpoint) task.load_dataset(args['dataset']['valid_subset'], combine=False, epoch=1) # 3. Build model and criterion model = task.build_model(args) criterion = task.build_criterion(args) LOGGER.info(model) LOGGER.info('model {}, criterion {}'.format(args['model']['arch'], criterion.__class__.__name__)) LOGGER.info('num. model params: {} (num. trained: {})'.format( sum(p.numel() for p in model.parameters()), sum(p.numel() for p in model.parameters() if p.requires_grad), )) # 4. Build trainer trainer = Trainer(args, task, model, criterion) LOGGER.info('training on {} GPUs'.format( args['distributed_training']['distributed_world_size'])) LOGGER.info( 'max tokens per GPU = {} and max sentences per GPU = {}'.format( args['dataset']['max_tokens'], args['dataset']['max_sentences'], )) # 5. Load the latest checkpoint if one is available and restore the corresponding train iterator extra_state, epoch_itr = checkpoint_utils.load_checkpoint(args, trainer, combine=False) # 6. Train until the learning rate gets too small max_epoch = args['optimization']['max_epoch'] or math.inf max_update = args['optimization']['max_update'] or math.inf lr = trainer.get_lr() train_meter = meters.StopwatchMeter() train_meter.start() valid_subsets = args['dataset']['valid_subset'].split(',') while (lr > args['optimization']['min_lr'] and epoch_itr.next_epoch_idx <= max_epoch and trainer.get_num_updates() < max_update): # train for one epoch train(args, trainer, task, epoch_itr) if not args['dataset']['disable_validation'] and epoch_itr.epoch % args[ 'dataset']['validate_interval'] == 0: valid_losses = validate(args, trainer, task, epoch_itr, valid_subsets) else: valid_losses = [None] # only use first validation loss to update the learning rate lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0]) # save checkpoint if epoch_itr.epoch % args['checkpoint']['save_interval'] == 0: checkpoint_utils.save_checkpoint(args, trainer, epoch_itr, valid_losses[0]) # early stop if should_stop_early(args, valid_losses[0]): LOGGER.info( 'early stop since valid performance hasn\'t improved for last {} runs' .format(args['checkpoint']['patience'])) break epoch_itr = trainer.get_train_iterator( epoch_itr.next_epoch_idx, combine=False, # TODO to be checked # sharded data: get train iterator for next epoch load_dataset=(os.pathsep in args['task']['data']), ) train_meter.stop() LOGGER.info('done training in {:.1f} seconds'.format(train_meter.sum))
def main(args, out_file=None): use_cuda = torch.cuda.is_available() and not args['common']['cpu'] # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args['dataset']['gen_subset']) # Set dictionaries src_dict = task.source_dictionary tgt_dict = task.target_dictionary # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _ = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args['eval']['no_beamable_mm'] else args['eval']['beam'], need_attn=args['eval']['print_alignment'], ) if use_cuda: device = os.environ.get('CUDA_VISIBALE_DEVICES', [0])[0] # get first device as default torch.cuda.set_device(f'cuda:{device}') model = model.cuda() if args['common']['fp16'] and use_cuda: model.half() # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args['dataset']['gen_subset']), max_tokens=args['dataset']['max_tokens'], max_sentences=args['eval']['max_sentences'], max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=args['dataset'] ['skip_invalid_size_inputs_valid_test'], required_batch_size_multiple=args['dataset'] ['required_batch_size_multiple'], num_shards=args['dataset']['num_shards'], shard_id=args['dataset']['shard_id'], num_workers=args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=args['common']['log_format'], log_interval=args['common']['log_interval'], default_log_format=('tqdm' if not args['common']['no_progress_bar'] else 'none'), ) # Initialize generator gen_timer = StopwatchMeter() generator = task.build_generator(models, args) sources, hypotheses, references = dict(), dict(), dict() for sample in progress: torch.cuda.empty_cache() sample = move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue gen_timer.start() hypos = task.inference_step(generator, models, sample, bos_token=tgt_dict.bos()) num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) # TODO: warning gen_timer.stop(num_generated_tokens) for i, sample_id in enumerate(sample['id'].tolist()): has_target = sample['target'] is not None # Remove padding src_tokens = utils.strip_pad( sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) target_tokens = None if has_target: target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu() hypos_tokens = utils.strip_eos(hypos[i][0]['tokens'], tgt_dict.eos()).int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if src_dict is not None: src_str = src_dict.string(src_tokens, args['eval']['remove_bpe']) else: src_str = "0" if has_target: target_str = tgt_dict.string(target_tokens, args['eval']['remove_bpe'], escape_unk=True) hypo_str = tgt_dict.string(hypos_tokens, args['eval']['remove_bpe']) sources[sample_id] = [src_str] hypotheses[sample_id] = [hypo_str] references[sample_id] = [target_str] bleu, rouge_l, meteor = \ summarization_metrics.eval_accuracies(hypotheses, references, filename=out_file, mode='test') LOGGER.info('BLEU: {:.2f}\t ROUGE-L: {:.2f}\t METEOR: {:.2f}'.format( bleu, rouge_l, meteor))
def _main(args, output_file): if args['dataset']['max_tokens'] is None and args['dataset'][ 'max_sentences'] is None: args['dataset']['max_tokens'] = 12000 LOGGER.info(args) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args['dataset']['gen_subset']) # Set dictionaries try: src_dict = getattr(task, 'source_dictionary', None) except NotImplementedError: src_dict = None tgt_dict = task.target_dictionary # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args['eval']['no_beamable_mm'] else args['eval']['beam'], need_attn=args['eval']['print_alignment'], ) if _model_args['common']['fp16']: model.half() if use_cuda: model.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args['eval']['replace_unk']) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args['dataset']['gen_subset']), max_tokens=args['dataset']['max_tokens'], max_sentences=args['eval']['max_sentences'], max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=_model_args['dataset'] ['skip_invalid_size_inputs_valid_test'], required_batch_size_multiple=_model_args['dataset'] ['required_batch_size_multiple'], num_shards=_model_args['dataset']['num_shards'], shard_id=_model_args['dataset']['shard_id'], num_workers=_model_args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=_model_args['common']['log_format'], log_interval=_model_args['common']['log_interval'], default_log_format=('tqdm' if not _model_args['common']['no_progress_bar'] else 'none'), ) # Initialize generator gen_timer = StopwatchMeter() generator = task.build_generator(args) num_sentences = 0 has_target = True wps_meter = TimeMeter() # for sample in tqdm(progress, total=len(progress)): sources, hypotheses, references = dict(), dict(), dict() for sample in progress: torch.cuda.empty_cache() sample = utils.move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue # prefix_tokens = None # if args['eval']['prefix_size'] > 0: # prefix_tokens = sample['target'][:, :args['eval']['prefix_size']] gen_timer.start() hypos = task.inference_step(generator, models, sample) # gen_out = task.sequence_generator.generate(model, sample) num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) # TODO: warning gen_timer.stop(num_generated_tokens) for i, sample_id in enumerate(sample['id'].tolist()): has_target = sample['target'] is not None # Remove padding src_tokens = utils.strip_pad( sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) target_tokens = None if has_target: target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu() hypos_tokens = utils.strip_eos(hypos[i][0]['tokens'], tgt_dict.eos()).int().cpu() # Either retrieve the original sentences or regenerate them from tokens. # if align_dict is not None: # src_str = task.dataset(args['dataset']['gen_subset']).src.get_original_text(sample_id) # target_str = task.dataset(args['dataset']['gen_subset']).tgt.get_original_text(sample_id) # else: if src_dict is not None: src_str = src_dict.string(src_tokens, args['eval']['remove_bpe']) else: src_str = "" if has_target: target_str = tgt_dict.string(target_tokens, args['eval']['remove_bpe'], escape_unk=True) # hypo_tokens = tgt_dict.encode_line(hypo_str, add_if_not_exist=True) hypo_str = tgt_dict.string(hypos_tokens, args['eval']['remove_bpe']) sources[sample_id] = [src_str] hypotheses[sample_id] = [hypo_str] references[sample_id] = [target_str] if not args['eval']['quiet']: if src_dict is not None: print('S-{}\t{}'.format(sample_id, src_str), file=output_file) if has_target: print('T-{}\t{}'.format(sample_id, target_str), file=output_file) print('H-{}\t{}'.format(sample_id, hypo_str), file=output_file) filename = os.path.join(os.path.dirname(__file__), 'config', 'predict.json') LOGGER.info('write predicted file at {}'.format(filename)) bleu, rouge_l, meteor = eval_utils.eval_accuracies(hypotheses, references, filename=filename, mode='test') LOGGER.info('BLEU: {:.2f}\t ROUGE-L: {:.2f}\t METEOR: {:.2f}'.format( bleu, rouge_l, meteor))
def _main(args, output_file): if args['dataset']['max_tokens'] is None and args['dataset'][ 'max_sentences'] is None: args['dataset']['max_tokens'] = 12000 use_cuda = torch.cuda.is_available() and not args['common']['cpu'] if use_cuda: device = os.environ.get('CUDA_VISIBALE_DEVICES', [0])[0] # get first device as default torch.cuda.set_device(f'cuda:{device}') # Load dataset splits task = tasks.setup_task(args) # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) # Optimize ensemble for generation for model in models: if _model_args['common']['fp16']: model.half() if use_cuda: model.cuda() sequence_completor = task.build_completor(models, args) subsets = [ args['dataset']['train_subset'], args['dataset']['valid_subset'], args['dataset']['gen_subset'], ] for subset in subsets: task.load_dataset(subset, shuffle=False) task.dataset(subset).shuffle = False # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(subset), max_tokens=args['dataset']['max_tokens'], max_sentences=args['eval']['max_sentences_eval'], max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=_model_args['dataset'] ['skip_invalid_size_inputs_valid_test'], required_batch_size_multiple=_model_args['dataset'] ['required_batch_size_multiple'], num_shards=_model_args['dataset']['num_shards'], shard_id=_model_args['dataset']['shard_id'], num_workers=_model_args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=_model_args['common']['log_format'], log_interval=_model_args['common']['log_interval'], default_log_format=('tqdm' if not _model_args['common']['no_progress_bar'] else 'none'), ) topk = args['kd']['gen_topk'] out_idx, out_prob = [], [] with torch.no_grad(): for sample in progress: torch.cuda.empty_cache() sample = move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue net_output = sequence_completor.generate([model], sample, prefix_tokens=None) topk_prob, topk_ids = torch.topk(net_output[0], topk, dim=-1) # ignore pad non_padding_mask = sample['net_input'][ 'src_tokens'] != task.target_dictionary.pad() if use_cuda: topk_prob, topk_ids = topk_prob.cpu(), topk_ids.cpu() non_padding_mask = non_padding_mask.cpu() for idx in range(topk_prob.size(0)): out_idx.append( topk_ids[idx, ...][non_padding_mask[idx, ...]].view(-1).tolist()) out_prob.append(topk_prob[idx, ...][non_padding_mask[ idx, ...]].view(-1).tolist()) assert len(out_idx) == len(out_prob) == len(task.dataset(subset)), \ Exception(len(out_idx), len(out_prob), len(task.dataset(subset))) TeacherOutDataset.save_bin( prefix=os.path.join(args['checkpoint']['save_dir'], f'{subset}.top{topk}_idx'), data_list=out_idx, dtype=np.int32, ) TeacherOutDataset.save_bin( prefix=os.path.join(args['checkpoint']['save_dir'], f'{subset}.top{topk}_prob'), data_list=out_prob, dtype=np.float, )
def main(args, **unused_kwargs): assert args['eval']['path'] is not None, '--path required for evaluation!' if torch.cuda.is_available() and not args['common']['cpu']: torch.cuda.set_device(args['distributed_training']['device_id']) LOGGER.info(args) # while evaluation, set fraction_using_func_name = 0, namely, not sample from func_name args['task']['fraction_using_func_name'] = 0. use_cuda = torch.cuda.is_available() and not args['common']['cpu'] if use_cuda: device = os.environ.get('CUDA_VISIBALE_DEVICES', [0])[0] # get first device as default torch.cuda.set_device(f'cuda:{device}') task = tasks.setup_task(args) # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) for lang in deepcopy(args['dataset']['langs']): args['dataset']['langs'] = [lang] # Load dataset splits LOGGER.info(f'Evaluating {lang} dataset') task.load_dataset(args['dataset']['gen_subset']) dataset = task.dataset(args['dataset']['gen_subset']) # Optimize ensemble for generation and set the source and dest dicts on the model (required by scorer) for model in models: model.make_generation_fast_() if args['common']['fp16']: model.half() if use_cuda: model.cuda() assert len(models) > 0 LOGGER.info('num. model params: {}'.format( sum(p.numel() for p in models[0].parameters()))) itr = task.get_batch_iterator( dataset=dataset, max_tokens=args['dataset']['max_tokens'] or 36000, max_sentences=args['eval']['max_sentences'], max_positions=utils.resolve_max_positions( *[model.max_positions() for model in models]), ignore_invalid_inputs=True, num_shards=args['dataset']['num_shards'], shard_id=args['dataset']['shard_id'], num_workers=args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=args['common']['log_format'], log_interval=args['common']['log_interval'], default_log_format=('tqdm' if not args['common']['no_progress_bar'] else 'none'), ) code_reprs, query_reprs = [], [] for sample in progress: if 'net_input' not in sample: continue sample = move_to_cuda(sample) if use_cuda else sample batch_code_reprs, batch_query_reprs = models[0]( **sample['net_input']) if use_cuda: batch_code_reprs = batch_code_reprs.cpu().detach() batch_query_reprs = batch_query_reprs.cpu().detach() code_reprs.append(batch_code_reprs) query_reprs.append(batch_query_reprs) code_reprs = torch.cat(code_reprs, dim=0) query_reprs = torch.cat(query_reprs, dim=0) assert code_reprs.shape == query_reprs.shape, (code_reprs.shape, query_reprs.shape) eval_size = len( code_reprs ) if args['eval']['eval_size'] == -1 else args['eval']['eval_size'] k, MRR, topk_idx, topk_prob = 3, [], [], [] for idx in range(len(dataset) // eval_size): code_emb = code_reprs[idx:idx + eval_size, :] query_emb = query_reprs[idx:idx + eval_size, :] if use_cuda: code_emb = code_emb.cuda() query_emb = query_emb.cuda() if args['criterion'] == 'search_cosine': src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10 tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10 logits = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t() elif args['criterion'] == 'search_softmax': logits = query_emb @ code_emb.t() else: raise NotImplementedError correct_scores = logits.diag() compared_scores = logits >= correct_scores.unsqueeze(dim=-1) mrr = 1 / compared_scores.sum(dim=-1).float() MRR.extend(mrr.tolist()) if len(dataset) % eval_size: code_emb = code_reprs[-eval_size:, :] query_emb = query_reprs[-eval_size:, :] if use_cuda: code_emb = code_emb.cuda() query_emb = query_emb.cuda() if args['criterion'] == 'search_cosine': src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10 tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10 logits = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t() elif args['criterion'] == 'search_softmax': logits = query_emb @ code_emb.t() else: raise NotImplementedError correct_scores = logits.diag() compared_scores = logits >= correct_scores.unsqueeze(dim=-1) last_ids = len(code_reprs) % eval_size mrr = 1 / compared_scores.sum(dim=-1).float()[-last_ids:] MRR.extend(mrr.tolist()) print('{}, mrr: {:.4f}'.format(lang, np.mean(MRR)))
def main(args, **unused_kwargs): assert args['eval']['path'] is not None, '--path required for evaluation!' if torch.cuda.is_available() and not args['common']['cpu']: torch.cuda.set_device(args['distributed_training']['device_id']) LOGGER.info(args) # while evaluation, set fraction_using_func_name = 0, namely, not sample from func_name args['task']['fraction_using_func_name'] = 0. use_cuda = torch.cuda.is_available() and not args['common']['cpu'] task = tasks.setup_task(args) # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) task = tasks.setup_task(args) # Load dataset splits task.load_dataset(args['dataset']['gen_subset']) dataset = task.dataset(args['dataset']['gen_subset']) # Optimize ensemble for generation and set the source and dest dicts on the model (required by scorer) for model in models: model.make_generation_fast_() if args['common']['fp16']: model.half() if use_cuda: model.cuda() assert len(models) > 0 LOGGER.info('num. model params: {}'.format( sum(p.numel() for p in models[0].parameters()))) itr = task.get_batch_iterator( dataset=dataset, max_tokens=args['dataset']['max_tokens'] or 36000, max_sentences=args['eval']['max_sentences'], max_positions=utils.resolve_max_positions( *[model.max_positions() for model in models]), ignore_invalid_inputs=True, num_shards=args['dataset']['num_shards'], shard_id=args['dataset']['shard_id'], num_workers=args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=args['common']['log_format'], log_interval=args['common']['log_interval'], default_log_format=('tqdm' if not args['common']['no_progress_bar'] else 'none'), ) code_reprs, query_reprs = [], [] for sample in progress: if 'net_input' not in sample: continue sample = utils.move_to_cuda(sample) if use_cuda else sample batch_code_reprs, batch_query_reprs = models[0](**sample['net_input']) code_reprs.extend(batch_code_reprs.tolist()) query_reprs.extend(batch_query_reprs.tolist()) code_reprs = np.asarray(code_reprs, dtype=np.float32) query_reprs = np.asarray(query_reprs, dtype=np.float32) assert code_reprs.shape == query_reprs.shape, (code_reprs.shape, query_reprs.shape) eval_size = len( code_reprs ) if args['eval']['eval_size'] == -1 else args['eval']['eval_size'] k, MRR, topk_idx, topk_prob = 3, [], [], [] for idx in range(len(dataset) // eval_size): code_emb = torch.from_numpy(code_reprs[idx:idx + eval_size, :]).cuda() query_emb = torch.from_numpy(query_reprs[idx:idx + eval_size, :]).cuda() logits = query_emb @ code_emb.t() # src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10 # tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10 # logits = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t() correct_scores = logits.diag() compared_scores = logits >= correct_scores.unsqueeze(dim=-1) mrr = 1 / compared_scores.sum(dim=-1).float() MRR.extend(mrr.tolist()) batch_topk_prob, batch_topk_idx = logits.softmax(dim=-1).topk(k) batch_topk_idx = batch_topk_idx + idx * eval_size topk_idx.extend(batch_topk_idx.tolist()) topk_prob.extend(batch_topk_prob.tolist()) if len(dataset) % eval_size: code_emb = torch.from_numpy(code_reprs[-eval_size:, :]).cuda() query_emb = torch.from_numpy(query_reprs[-eval_size:, :]).cuda() logits = query_emb @ code_emb.t() # src_emb_nrom = torch.norm(code_emb, dim=-1, keepdim=True) + 1e-10 # tgt_emb_nrom = torch.norm(query_emb, dim=-1, keepdim=True) + 1e-10 # logits = (query_emb / tgt_emb_nrom) @ (code_emb / src_emb_nrom).t() correct_scores = logits.diag() compared_scores = logits >= correct_scores.unsqueeze(dim=-1) last_ids = len(code_reprs) % eval_size mrr = 1 / compared_scores.sum(dim=-1).float()[-last_ids:] MRR.extend(mrr.tolist()) batch_topk_prob, batch_topk_idx = logits[-last_ids:].softmax( dim=-1).topk(k) batch_topk_idx = batch_topk_idx + len(code_reprs) - eval_size topk_idx.extend(batch_topk_idx.tolist()) topk_prob.extend(batch_topk_prob.tolist()) print('mrr: {:.4f}'.format(np.mean(MRR))) for idx, mrr in enumerate(MRR): if mrr == 1.0 and topk_prob[idx][0] > 0.8: print( np.asarray(topk_idx[idx]) + 1, [round(porb, 4) for porb in topk_prob[idx]])
def _main(args, output_file): if args['dataset']['max_tokens'] is None and args['dataset']['max_sentences'] is None: args['dataset']['max_tokens'] = 12000 LOGGER.info(args) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args['dataset']['gen_subset']) # Set dictionaries try: src_dict = getattr(task, 'source_dictionary', None) except NotImplementedError: src_dict = None tgt_dict = task.target_dictionary # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) # Optimize ensemble for generation for model in models: if _model_args['common']['fp16']: model.half() if use_cuda: model.cuda() # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args['dataset']['gen_subset']), max_tokens=args['dataset']['max_tokens'], max_sentences=args['dataset']['max_sentences'], max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models] ), ignore_invalid_inputs=_model_args['dataset']['skip_invalid_size_inputs_valid_test'], required_batch_size_multiple=_model_args['dataset']['required_batch_size_multiple'], num_shards=_model_args['dataset']['num_shards'], shard_id=_model_args['dataset']['shard_id'], num_workers=_model_args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=_model_args['common']['log_format'], log_interval=_model_args['common']['log_interval'], default_log_format=('tqdm' if not _model_args['common']['no_progress_bar'] else 'none'), ) """ nohup python -m run.completion.seqrnn.eval > run/completion/seqrnn/case.log 2>&1 & """ sequence_completor = task.build_completor([model], args) for sample in progress: torch.cuda.empty_cache() sample = utils.move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue non_pad_idx = sample['net_input']['src_tokens'] > task.target_dictionary.pad() with torch.no_grad(): net_output = sequence_completor.generate([model], sample, prefix_tokens=None) lprobs = model.get_normalized_probs(net_output, log_probs=True) # from ipdb import set_trace # set_trace() rank = torch.argmax(lprobs, dim=-1) target = model.get_targets(sample, net_output) accuracy = 1.0 * ((rank == target) & non_pad_idx).sum(dim=-1) / non_pad_idx.sum(dim=-1) for idx, (data_idx, acc) in enumerate(zip(sample['id'], accuracy)): if acc > 0.9: LOGGER.info(f"{data_idx}: {task.target_dictionary.string(sample['net_input']['src_tokens'][idx, :])}")
def _main(args, output_file): if args['dataset']['max_tokens'] is None and args['dataset'][ 'max_sentences'] is None: args['dataset']['max_tokens'] = 12000 LOGGER.info(args) use_cuda = torch.cuda.is_available() and not args['common']['cpu'] # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args['dataset']['gen_subset']) # Set dictionaries try: src_dict = getattr(task, 'source_dictionary', None) except NotImplementedError: src_dict = None tgt_dict = task.target_dictionary # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) # Optimize ensemble for generation for model in models: model.make_generation_fast_( beamable_mm_beam_size=None if args['eval']['no_beamable_mm'] else args['eval']['beam'], need_attn=args['eval']['print_alignment'], ) if _model_args['common']['fp16']: model.half() if use_cuda: model.cuda() # Load alignment dictionary for unknown word replacement # (None if no unknown word replacement, empty if no path to align dictionary) align_dict = utils.load_align_dict(args['eval']['replace_unk']) # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args['dataset']['gen_subset']), max_tokens=args['dataset']['max_tokens'], max_sentences=args['eval']['max_sentences'], max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=_model_args['dataset'] ['skip_invalid_size_inputs_valid_test'], required_batch_size_multiple=_model_args['dataset'] ['required_batch_size_multiple'], num_shards=_model_args['dataset']['num_shards'], shard_id=_model_args['dataset']['shard_id'], num_workers=_model_args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=_model_args['common']['log_format'], log_interval=_model_args['common']['log_interval'], default_log_format=('tqdm' if not _model_args['common']['no_progress_bar'] else 'none'), ) # Initialize generator gen_timer = StopwatchMeter() generator = task.build_generator(args) # Generate and compute BLEU score scorer = OrderedDict() if args['eval']['sacrebleu']: scorer['bleu'] = bleu_scorer.SacrebleuScorer() elif args['eval']['nltk_bleu']: scorer['bleu'] = bleu_scorer.NLTKBleuScorer() else: scorer['bleu'] = bleu_scorer.Scorer(tgt_dict.pad(), tgt_dict.eos(), tgt_dict.unk()) # Generate and compute BLEU score if args['eval']['rouge']: scorer['rouge'] = rouge_scorer.RougeScorer() num_sentences = 0 has_target = True wps_meter = TimeMeter() # for sample in tqdm(progress, total=len(progress)): for sample in progress: torch.cuda.empty_cache() sample = utils.move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue prefix_tokens = None if args['eval']['prefix_size'] > 0: prefix_tokens = sample['target'][:, :args['eval']['prefix_size']] gen_timer.start() hypos = task.inference_step(generator, models, sample, prefix_tokens) num_generated_tokens = sum(len(h[0]['tokens']) for h in hypos) gen_timer.stop(num_generated_tokens) for i, sample_id in enumerate(sample['id'].tolist()): has_target = sample['target'] is not None # Remove padding src_tokens = utils.strip_pad( sample['net_input']['src_tokens'][i, :], tgt_dict.pad()) target_tokens = None if has_target: target_tokens = utils.strip_pad(sample['target'][i, :], tgt_dict.pad()).int().cpu() # Either retrieve the original sentences or regenerate them from tokens. if align_dict is not None: src_str = task.dataset( args['dataset']['gen_subset']).src.get_original_text( sample_id) target_str = task.dataset( args['dataset']['gen_subset']).tgt.get_original_text( sample_id) else: if src_dict is not None: src_str = src_dict.string(src_tokens, args['eval']['remove_bpe']) else: src_str = "" if has_target: target_str = tgt_dict.string(target_tokens, args['eval']['remove_bpe'], escape_unk=True) if not args['eval']['quiet']: if src_dict is not None: print('S-{}\t{}'.format(sample_id, src_str), file=output_file) if has_target: print('T-{}\t{}'.format(sample_id, target_str), file=output_file) # Process top predictions for j, hypo in enumerate(hypos[i][:args['eval']['nbest']]): hypo_tokens, hypo_str, alignment = utils.post_process_prediction( hypo_tokens=hypo['tokens'].int().cpu(), src_str=src_str, alignment=hypo['alignment'], align_dict=align_dict, tgt_dict=tgt_dict, remove_bpe=args['eval']['remove_bpe'], ) if hypo_str == '.': # rouge cannot handle hypo'.' continue if not args['eval']['quiet']: score = hypo['score'] / math.log(2) # convert to base 2 print('H-{}\t{}\t{}'.format(sample_id, score, hypo_str), file=output_file) print( 'P-{}\t{}'.format( sample_id, ' '.join( map( lambda x: '{:.4f}'.format(x), # convert from base e to base 2 hypo['positional_scores'].div_(math.log(2) ).tolist(), ))), file=output_file) if args['eval']['print_alignment']: print('A-{}\t{}'.format( sample_id, ' '.join([ '{}-{}'.format(src_idx, tgt_idx) for src_idx, tgt_idx in alignment ])), file=output_file) if args['eval']['print_step']: print('I-{}\t{}'.format(sample_id, hypo['steps']), file=output_file) # if getattr(args, 'retain_iter_history', False): if args['eval']['retain_iter_history']: for step, h in enumerate(hypo['history']): _, h_str, _ = utils.post_process_prediction( hypo_tokens=h['tokens'].int().cpu(), src_str=src_str, alignment=None, align_dict=None, tgt_dict=tgt_dict, remove_bpe=None, ) print('E-{}_{}\t{}'.format(sample_id, step, h_str), file=output_file) # Score only the top hypothesis if has_target and j == 0: # print('Ref>> {}'.format(target_str), file=output_file) # print('Hyp>> {}'.format(hypo_str), file=output_file) if align_dict is not None or args['eval'][ 'remove_bpe'] is not None: # Convert back to tokens for evaluation with unk replacement and/or without BPE target_tokens = tgt_dict.encode_line( target_str, add_if_not_exist=True) for metric in scorer: if hasattr(scorer[metric], 'add_string'): scorer[metric].add_string(target_str, hypo_str) else: scorer[metric].add(target_tokens, hypo_tokens) wps_meter.update(num_generated_tokens) progress.log({'wps': round(wps_meter.avg)}) num_sentences += sample['nsentences'] LOGGER.info('NOTE: hypothesis and token scores are output in base 2') LOGGER.info( 'Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)' .format(num_sentences, gen_timer.n, gen_timer.sum, num_sentences / gen_timer.sum, 1. / gen_timer.avg)) if has_target: LOGGER.info('Generate {} with beam={}: {}'.format( args['dataset']['gen_subset'], args['eval']['beam'], { '\n{}:\n{}'.format(str.upper(metric), value.score()) for metric, value in scorer.items() })) return scorer
def _main(args, output_file): if args['dataset']['max_tokens'] is None and args['dataset'][ 'max_sentences'] is None: args['dataset']['max_tokens'] = 12000 use_cuda = torch.cuda.is_available() and not args['common']['cpu'] if use_cuda: device = os.environ.get('CUDA_VISIBALE_DEVICES', [0])[0] # get first device as default torch.cuda.set_device(f'cuda:{device}') # Load dataset splits task = tasks.setup_task(args) task.load_dataset(args['dataset']['gen_subset'], shuffle=False) # Load ensemble LOGGER.info('loading model(s) from {}'.format(args['eval']['path'])) models, _model_args = checkpoint_utils.load_model_ensemble( utils.split_paths(args['eval']['path']), arg_overrides=eval(args['eval']['model_overrides']), task=task, ) # Optimize ensemble for generation for model in models: if _model_args['common']['fp16']: model.half() if use_cuda: model.cuda() # Load dataset (possibly sharded) itr = task.get_batch_iterator( dataset=task.dataset(args['dataset']['gen_subset']), max_tokens=args['dataset']['max_tokens'], max_sentences=args['eval']['max_sentences_eval'], max_positions=utils.resolve_max_positions( task.max_positions(), *[model.max_positions() for model in models]), ignore_invalid_inputs=_model_args['dataset'] ['skip_invalid_size_inputs_valid_test'], required_batch_size_multiple=_model_args['dataset'] ['required_batch_size_multiple'], num_shards=_model_args['dataset']['num_shards'], shard_id=_model_args['dataset']['shard_id'], num_workers=_model_args['dataset']['num_workers'], ).next_epoch_itr(shuffle=False) progress = progress_bar.progress_bar( itr, log_format=_model_args['common']['log_format'], log_interval=_model_args['common']['log_interval'], default_log_format=('tqdm' if not _model_args['common']['no_progress_bar'] else 'none'), ) sequence_completor = task.build_completor([model], args) accuracy = {'all': 0.} mrr = {'all': 0.} sample_num = {'all': 0.} if task.dataset('test').attrs is not None: for attr in task.dataset('test').attrs: accuracy[attr] = 0. mrr[attr] = 0. sample_num[attr] = 0 def _eval(lprobs, target, idx, num): with torch.no_grad(): lprobs = lprobs[idx] target = target[idx] accuracy = (torch.argmax(lprobs, dim=-1) == target).sum().float().item() # Ref: Code Prediction by Feeding Trees to Transformers # With this practical perspective and for ease of computation, we only consider ranki ≤ 10 for each # location i (all ranki > 10 will have a score of 0). ranks = (lprobs >= lprobs[:, target].diag().unsqueeze(dim=-1)).sum(-1) mrr = 1. / ranks mrr[ranks > 10] = 0. mrr = mrr.sum().float().item() return accuracy, mrr, num for sample in progress: torch.cuda.empty_cache() sample = utils.move_to_cuda(sample) if use_cuda else sample if 'net_input' not in sample: continue with torch.no_grad(): net_output = sequence_completor.generate([model], sample, prefix_tokens=None) # lprobs = model.get_normalized_probs(net_output, log_probs=True) lprobs = torch.softmax(net_output[0], dim=-1) lprobs = lprobs.view(-1, lprobs.size(-1)) target = model.get_targets(sample, net_output).view(-1) # all # ignore pad and unk idx = sample['net_input']['src_tokens'].view( -1) != task.target_dictionary.pad() idx[sample['target'].view(-1) == task.target_dictionary.unk()] = 0 # ignore overlapping tokens max_len = sample['target'].size(-1) for i, ext_i in enumerate(sample['extends']): idx[i * max_len:i * max_len + ext_i] = 0 batch_acc, batch_mrr, batch_num = _eval(lprobs, target, idx, num=idx.sum().item()) accuracy['all'] += batch_acc mrr['all'] += batch_mrr sample_num['all'] += batch_num # other attrs if sample['attr_masks'] is not None: for attr, attr_idx in sample['attr_masks'].items(): # pick out attr_idx who are not unk/pad attr_idx = attr_idx[idx[attr_idx].tolist()] if len(attr_idx) > 0: batch_acc, batch_mrr, batch_num = _eval( lprobs, target, attr_idx, num=attr_idx.size) accuracy[attr] += batch_acc mrr[attr] += batch_mrr sample_num[attr] += batch_num for attr in accuracy.keys(): avg_acc = round(accuracy[attr] / sample_num[attr], 6) if sample_num[attr] > 0. else None avg_mrr = round(mrr[attr] / sample_num[attr], 6) if sample_num[attr] > 0. else None print('[{}] tokens, accuracy: {}, MRR: {}'.format( attr, avg_acc, avg_mrr))
def single_main(args, init_distributed=False): assert args['dataset']['max_tokens'] is not None or args['dataset']['max_sentences'] is not None, \ 'Must specify batch size either with --max-tokens or --max-sentences' metrics.reset() # 0. Initialize CUDA and distributed training if torch.cuda.is_available() and not args['common']['cpu']: torch.cuda.set_device(args['distributed_training']['device_id']) random.seed(args['common']['seed']) np.random.seed(args['common']['seed']) torch.manual_seed(args['common']['seed']) torch.cuda.manual_seed(args['common']['seed']) if init_distributed: args['distributed_training'][ 'distributed_rank'] = distributed_utils.distributed_init(args) # Verify checkpoint directory if distributed_utils.is_master(args): save_dir = args['checkpoint']['save_dir'] checkpoint_utils.verify_checkpoint_directory(save_dir) remove_files(save_dir, 'pt') # this code will remove pre-trained models # 1. Setup task, e.g., translation, language modeling, etc. task = tasks.setup_task(args) # 2. Load valid dataset (we load training data below, based on the latest checkpoint) # calculate accuracy for decay learning rate task.load_dataset(args['dataset']['valid_subset'], combine=False, epoch=1) # # compute meteor to select model # task.load_dataset(args['dataset']['dev_subset'], combine=False, epoch=1) # # load dev/ref.txt # dev_refs = load_refs(os.path.join(args['task']['data'], args['dataset']['dev_ref_subset'])) # 3. Build model and criterion model = task.build_model(args) criterion = task.build_criterion(args) LOGGER.info(model) LOGGER.info('model {}, criterion {}'.format(args['model']['arch'], criterion.__class__.__name__)) LOGGER.info('num. model params: {} (num. trained: {})'.format( sum(p.numel() for p in model.parameters()), sum(p.numel() for p in model.parameters() if p.requires_grad), )) # 4. Build trainer trainer = Trainer(args, task, model, criterion) LOGGER.info('training on {} GPUs'.format( args['distributed_training']['distributed_world_size'])) LOGGER.info( 'max tokens per GPU = {} and max sentences per GPU = {}'.format( args['dataset']['max_tokens'], args['dataset']['max_sentences'], )) # 5. Load the latest checkpoint if one is available and restore the corresponding train iterator extra_state, epoch_itr = checkpoint_utils.load_checkpoint(args, trainer, combine=False) # 6. Train until the learning rate gets too small max_epoch = args['optimization']['max_epoch'] or math.inf max_update = args['optimization']['max_update'] or math.inf lr = trainer.get_lr() train_meter = meters.StopwatchMeter() train_meter.start() valid_subsets = args['dataset']['valid_subset'].split(',') dev_subsets = args['dataset']['dev_subset'].split(',') valid_accs_after_60e = [] while (lr > args['optimization']['min_lr'] and epoch_itr.next_epoch_idx <= max_epoch and trainer.get_num_updates() < max_update): # train for one epoch train(args, trainer, task, epoch_itr) if not args['dataset']['disable_validation'] and epoch_itr.epoch % args[ 'dataset']['validate_interval'] == 0: valid_acc, dev_prf = validate(args, trainer, task, epoch_itr, valid_subsets, dev_subsets, dev_refs=None) else: valid_acc, dev_prf = None, None # if epoch_itr.next_epoch_idx > 61 and valid_acc < valid_accs_after_60e[-1]: # """ # We start with a learning rate of 0.5 and start # decaying it by a factor of 0.8 after 60 epochs if # accuracy on the validation set goes down, and # terminate training when the learning rate goes # below 0.001. # """ # lr = trainer.set_lr(lr * trainer.args['optimization']['lr_shrink']) # # if epoch_itr.epoch >= 60: # valid_accs_after_60e.append(valid_acc) # if len(valid_accs_after_60e) > 10 and valid_accs_after_60e[-5] >= valid_acc: # lr = trainer.set_lr(lr * trainer.args['optimization']['lr_shrink']) # valid_accs_after_60e.append(valid_acc) if len(valid_accs_after_60e ) > 10 and valid_accs_after_60e[-5] >= valid_acc: lr = trainer.set_lr(lr * trainer.args['optimization']['lr_shrink']) # eval on dev and dev.ref data # save checkpoint if epoch_itr.epoch % args['checkpoint']['save_interval'] == 0: checkpoint_utils.save_checkpoint(args, trainer, epoch_itr, valid_acc) epoch_itr = trainer.get_train_iterator( epoch_itr.next_epoch_idx, combine=False, # TODO to be checked # sharded data: get train iterator for next epoch load_dataset=(os.pathsep in args['task']['data']), ) train_meter.stop() LOGGER.info('done training in {:.1f} seconds'.format(train_meter.sum))