def infer(args): states = {} score_manager = ScoreManager(args.model_path, 'evaluation_score') if args.cuda: logger.info('[PARAM] Enabling CUDA') assert torch.cuda.is_available() logger.info("[DATASET] Preparing dataset...") train_iter, val_iter, test_iter, fields = data_utils.load_dataset(args, is_eval=True) if 'src' in fields: src_vocab_size, tgt_vocab_size = len(fields.get('src', None).vocab), len(fields['tgt'].vocab) else: tgt_vocab_size = len(fields['tgt'].vocab) logger.info("[VALIDATION]: #Batches=%d (#Cases:%d))" % (len(val_iter), len(val_iter.dataset))) logger.info("[TEST]: #Batches=%d (#Cases:%d))" % (len(test_iter), len(test_iter.dataset))) logger.info("[PARAM] Setting vocab sizes") if args.copy: vocab_offset = args.max_copy_token_num else: vocab_offset = 0 if args.field_copy: vocab_offset += args.max_kw_pairs_num if 'src' in fields: args.__setattr__('src_vocab_size', src_vocab_size - vocab_offset) args.__setattr__('src_tag_vocab_size', len(fields['src_tag'].vocab)) args.__setattr__('tgt_vocab_size', tgt_vocab_size - vocab_offset) args.__setattr__('tgt_vocab_size_with_offsets', tgt_vocab_size) if 'attribute_key' in fields: args.__setattr__('field_vocab_size', len(fields['attribute_key'].vocab)) args.__setattr__('field_word_vocab_size', len(fields['attribute_word'].vocab)) if args.field_tag_usage != 'none': args.__setattr__('field_pos_tag_vocab_size', len(fields['attribute_word_tag'].vocab)) set_dynamic_vocabs(args, fields) generation_name = model_helper.name_a_generation(args, 'test') file_prefix = os.path.join(args.model_path, generation_name) if not args.skip_infer: logger.info("[MODEL] Preparing model...") seq2seq = InfoSeq2Seq.build_s2s_model(args, fields.get('src', None), fields['tgt']) model_helper.show_parameters(seq2seq) if args.cuda: seq2seq = seq2seq.to('cuda') logger.info(seq2seq) # Load model is_loaded = model_helper.try_restore_model(args.model_path, seq2seq, None, states, best_model=args.use_best_model) if not is_loaded: logger.info("[PARAM] Could not load a trained model!") return # Test Set if args.field_copy: raw_queries, raw_responses, raw_fields = data_utils.load_examples(args.test_data_path_prefix, field_words=True) else: raw_queries, raw_responses = data_utils.load_examples(args.test_data_path_prefix) raw_fields = [None] * len(raw_queries) output_queries, output_responses, output_scores, output_generations, test_loss, test_ppl_macro = \ inference(args, seq2seq, test_iter, tgt_vocab_size, fields.get('src', None), fields['tgt']) model_helper.write_results(args, file_prefix, raw_queries, raw_responses, output_scores, output_generations, raw_fields=raw_fields) logger.info('[STD Evaluation] Evaluating the test generations...') res_dict = std_eval_with_args(args, file_prefix, 'test') score_manager.update_group(generation_name, res_dict) if args.beam_width == 0 and args.skip_infer is False: test_ppl = math.exp(test_loss) score_manager.update('infer_b%d_test_loss' % args.beam_width, test_loss) score_manager.update('infer_b%d_test_ppl' % args.beam_width, test_ppl) score_manager.update('infer_b%d_test_macro_ppl' % args.beam_width, test_ppl_macro)
def train(args): states = { 'val_loss': [], 'val_ppl': [], 'lr': args.lr, 'epoch': 0, 'best_epoch': 0, 'best_val_loss': -1, } if args.cuda: logger.info('[PARAM] Enabling CUDA') assert torch.cuda.is_available() logger.info("[DATASET] Preparing dataset...") train_iter, val_iter, test_iter, fields = data_utils.load_dataset(args) if 'src' in fields: src_vocab_size, tgt_vocab_size = len(fields.get('src', None).vocab), len(fields['tgt'].vocab) else: tgt_vocab_size = len(fields['tgt'].vocab) logger.info("[TRAIN]: #Batches=%d (#Cases:%d))" % (len(train_iter), len(train_iter.dataset))) logger.info("[VALIDATION]: #Batches=%d (#Cases:%d))" % (len(val_iter), len(val_iter.dataset))) logger.info("[TEST]: #Batches=%d (#Cases:%d))" % (len(test_iter), len(test_iter.dataset))) logger.info("[PARAM] Setting vocab sizes") if args.copy: vocab_offset = args.max_copy_token_num else: vocab_offset = 0 if args.field_copy: vocab_offset += args.max_kw_pairs_num args.__setattr__('tgt_vocab_size', tgt_vocab_size - vocab_offset) if 'attribute_key' in fields: args.__setattr__('field_vocab_size', len(fields['attribute_key'].vocab)) if 'src' in fields: args.__setattr__('src_vocab_size', src_vocab_size - vocab_offset) args.__setattr__('src_tag_vocab_size', len(fields['src_tag'].vocab)) if 'attribute_word' in fields: args.__setattr__('field_word_vocab_size', len(fields['attribute_word'].vocab)) if 'attribute_word' in fields and args.field_tag_usage != 'none': args.__setattr__('field_pos_tag_vocab_size', len(fields['attribute_word_tag'].vocab)) args.__setattr__('tgt_vocab_size_with_offsets', tgt_vocab_size) set_dynamic_vocabs(args, fields) logger.info('[VOCAB] tgt_vocab_size_with_offsets = ' + str(tgt_vocab_size)) logger.info("[MODEL] Preparing model...") seq2seq = InfoSeq2Seq.build_s2s_model(args, fields.get('src', None), fields['tgt']) if args.cuda: seq2seq.to('cuda') model_helper.show_parameters(seq2seq) if args.init_lr > 0: logger.info("[LR] Using init LR %f" % args.init_lr) optimizer = optim.Adam(seq2seq.parameters(), lr=args.init_lr) states['lr'] = args.init_lr else: optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) states['lr'] = args.lr logger.info(seq2seq) # Load model is_loaded = model_helper.try_restore_model(args.model_path, seq2seq, optimizer, states, best_model=False) if not is_loaded: logger.info("[PARAM] Using fresh params") model_helper.init_network(seq2seq, args.init) if args.init_word_vecs is True: assert args.pre_embed_dim == args.embed_size logger.info("[EMBED] Loading the pre-trained word_embeddings") # 使用预训练的Embedding assert args.embed_size == args.field_key_embed_size if 'attribute_key' in fields: data_utils.load_pretrain_embeddings(seq2seq.field_encoder.field_key_embed, fields['attribute_key'], args.pre_embed_file, args.embed_size, char2word='avg', suffix=args.dataset_version + 'attribute_key') if args.field_word_vocab_path != 'none': data_utils.load_pretrain_embeddings(seq2seq.field_encoder.field_word_embedding, fields['attribute_word'], args.pre_embed_file, args.embed_size, char2word='avg', suffix=args.dataset_version + 'attribute_word') if 'sub_attribute_key' in fields: data_utils.load_pretrain_embeddings(seq2seq.field_encoder.sub_field_key_char_encoder.sub_embed, fields.get('sub_attribute_key', None), args.pre_embed_file, args.embed_size, suffix=args.dataset_version + 'sub_attribute_key') if 'sub_attribute_word' in fields: data_utils.load_pretrain_embeddings(seq2seq.field_encoder.sub_field_word_char_encoder.sub_embed, fields.get('sub_attribute_word', None), args.pre_embed_file, args.embed_size, suffix=args.dataset_version + 'sub_attribute_word') if 'src' in fields: data_utils.load_pretrain_embeddings(seq2seq.src_embed, fields.get('src', None), args.pre_embed_file, args.embed_size, suffix=args.dataset_version+'src') if 'sub_src' in fields: data_utils.load_pretrain_embeddings(seq2seq.sub_src_embed, fields.get('sub_src', None), args.pre_embed_file, args.embed_size, suffix=args.dataset_version+'subsrc') if not args.share_embedding or 'src' not in fields: data_utils.load_pretrain_embeddings(seq2seq.dec_embed, fields['tgt'], args.pre_embed_file, args.embed_size, suffix=args.dataset_version+'tgt') start_epoch = states['epoch'] for epoch in range(start_epoch + 1, args.epochs + 1): if epoch != start_epoch + 1 and (args.align_dropout > 0.0 and (args.copy or args.field_copy)): logger.info("[Dataset] Reloading & Aligning the dataset") train_iter, val_iter, test_iter, fields = data_utils.load_dataset(args) if epoch - states['best_epoch'] > 2: logger.info('[STOP] Early Stopped !') break start_time = time.time() num_batches = len(train_iter) logger.info('[NEW EPOCH] %d/%d, num of batches : %d' % (epoch, args.epochs, num_batches)) train_epoch(args, epoch, seq2seq, optimizer, train_iter, tgt_vocab_size, fields.get('src', None), fields['tgt']) val_loss, val_ppl_macro = evaluate(args, seq2seq, val_iter, tgt_vocab_size, fields.get('src', None), fields['tgt']) val_ppl = math.exp(val_loss) test_loss, test_ppl_macro = evaluate(args, seq2seq, test_iter, tgt_vocab_size, fields.get('src', None), fields['tgt']) test_ppl = math.exp(test_loss) logger.info("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2f/%5.2f | test_loss:%5.3f | test_pp:%5.2f/%5.2f" % (epoch, val_loss, val_ppl, val_ppl_macro, test_loss, test_ppl, test_ppl_macro)) time_diff = (time.time() - start_time) / 3600.0 logger.info("[Epoch:%d] epoch time:%.2fH, est. remaining time:%.2fH" % (epoch, time_diff, time_diff * (args.epochs - epoch))) # Adjusting learning rate states['val_ppl'].append(val_ppl) states['val_loss'].append(val_loss) states['epoch'] = epoch if len(states['val_ppl']) >= 2: logger.info('[TRAINING] last->now valid ppl : %.3f->%.3f' % (states['val_ppl'][-2], states['val_ppl'][-1])) if len(states['val_ppl']) >= 2 and states['val_ppl'][-1] >= states['val_ppl'][-2]: logger.info('[TRAINING] Adjusting learning rate due to the increment of val_loss') new_lr = model_helper.adjust_learning_rate(optimizer, rate=args.lr_decay_rate) states['lr'] = new_lr else: if args.init_lr > 0 and epoch >= args.init_lr_decay_epoch - 1 and states['lr'] > args.lr: logger.info('[TRAINING] Try to decay the init decay, from epoch %d, and the next epoch is %d' % (args.init_lr_decay_epoch, epoch+1)) new_lr = model_helper.adjust_learning_rate(optimizer, rate=args.lr_decay_rate, min_value=args.lr) states['lr'] = new_lr # Save the model if the validation loss is the best we've seen so far. if states['best_val_loss'] == -1 or val_ppl < states['best_val_loss']: logger.info('[CHECKPOINT] New best valid ppl : %.3f->%.2f' % (states['best_val_loss'], val_ppl)) model_helper.save_model(args.model_path, epoch, val_ppl, seq2seq, optimizer, args, states, best_model=True, clear_history=True) states['best_val_loss'] = val_ppl states['best_epoch'] = epoch # Saving standard model model_helper.save_model(args.model_path, epoch, val_ppl, seq2seq, optimizer, args, states, best_model=False, clear_history=True)
def eval(args): states = {} score_manager = ScoreManager(args.model_path, 'evaluation_score') if args.cuda: logger.info('[PARAM] Enabling CUDA') assert torch.cuda.is_available() logger.info("[DATASET] Preparing dataset...") train_iter, val_iter, test_iter, fields= data_utils.load_dataset(args, is_eval=True) if 'src' in fields: src_vocab_size, tgt_vocab_size = len(fields.get('src', None).vocab), len(fields['tgt'].vocab) else: tgt_vocab_size = len(fields['tgt'].vocab) logger.info("[VALIDATION]: #Batches=%d (#Cases:%d))" % (len(val_iter), len(val_iter.dataset))) logger.info("[TEST]: #Batches=%d (#Cases:%d))" % (len(test_iter), len(test_iter.dataset))) logger.info("[PARAM] Setting vocab sizes") if args.copy: vocab_offset = args.max_copy_token_num else: vocab_offset = 0 if args.field_copy: vocab_offset += args.max_kw_pairs_num if 'src' in fields: args.__setattr__('src_vocab_size', src_vocab_size - vocab_offset) args.__setattr__('src_tag_vocab_size', len(fields['src_tag'].vocab)) args.__setattr__('tgt_vocab_size', tgt_vocab_size - vocab_offset) args.__setattr__('tgt_vocab_size_with_offsets', tgt_vocab_size) if 'attribute_key' in fields: args.__setattr__('field_vocab_size', len(fields['attribute_key'].vocab)) args.__setattr__('field_word_vocab_size', len(fields['attribute_word'].vocab)) if args.field_tag_usage != 'none': args.__setattr__('field_pos_tag_vocab_size', len(fields['attribute_word_tag'].vocab)) set_dynamic_vocabs(args, fields) logger.info("[MODEL] Preparing model...") seq2seq = InfoSeq2Seq.build_s2s_model(args, fields.get('src', None), fields['tgt']) model_helper.show_parameters(seq2seq) if args.cuda: seq2seq = seq2seq.to('cuda') logger.info(seq2seq) # Load model is_loaded = model_helper.try_restore_model(args.model_path, seq2seq, None, states, best_model=args.use_best_model) if not is_loaded: logger.info("[PARAM] Could not load a trained model!") return else: val_loss, val_ppl_macro = evaluate(args, seq2seq, val_iter, tgt_vocab_size, fields.get('src', None), fields['tgt']) val_ppl = math.exp(val_loss) test_loss, test_ppl_macro = evaluate(args, seq2seq, test_iter, tgt_vocab_size, fields.get('src', None), fields['tgt']) test_ppl = math.exp(test_loss) logger.info("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2f/%5.2f | test_loss:%5.3f | test_pp:%5.2f/%5.2f" % (states['epoch'], val_loss, val_ppl, val_ppl_macro, test_loss, test_ppl, test_ppl_macro)) score_manager.update('eval_val_loss', val_loss) score_manager.update('eval_val_ppl', val_ppl) score_manager.update('eval_val_macro_ppl', val_ppl_macro) score_manager.update('eval_test_loss', test_loss) score_manager.update('eval_test_ppl', test_ppl) score_manager.update('eval_test_macro_ppl', test_ppl_macro)
def train(args): states = { 'val_loss': [], 'val_ppl': [], 'lr': args.lr, 'epoch': 0, 'best_epoch': 0, 'best_val_loss': -1, } if args.cuda: logger.info('[PARAM] Enabling CUDA') assert torch.cuda.is_available() logger.info("[DATASET] Preparing dataset...") train_iter, val_iter, test_iter, src_field, tgt_field = data_utils.load_dataset( args) src_vocab_size, tgt_vocab_size = len(src_field.vocab), len(tgt_field.vocab) logger.info("[TRAIN]: #Batches=%d (#Cases:%d))" % (len(train_iter), len(train_iter.dataset))) logger.info("[VALIDATION]: #Batches=%d (#Cases:%d))" % (len(val_iter), len(val_iter.dataset))) logger.info("[TEST]: #Batches=%d (#Cases:%d))" % (len(test_iter), len(test_iter.dataset))) logger.info("[PARAM] Setting vocab sizes") if args.copy: vocab_offset = args.max_copy_token_num else: vocab_offset = 0 args.__setattr__('src_vocab_size', src_vocab_size - vocab_offset) args.__setattr__('tgt_vocab_size', tgt_vocab_size - vocab_offset) args.__setattr__('tgt_vocab_size_with_offsets', tgt_vocab_size) logger.info("[MODEL] Preparing model...") seq2seq = Seq2Seq.build_s2s_model(args, src_field, tgt_field) if args.cuda: seq2seq.to('cuda') model_helper.show_parameters(seq2seq) optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) logger.info(seq2seq) # Load model is_loaded = model_helper.try_restore_model(args.model_path, seq2seq, optimizer, states, best_model=False) if not is_loaded: logger.info("[PARAM] Using fresh params") model_helper.init_network(seq2seq, args.init) if args.init_word_vecs is True: assert args.pre_embed_dim == args.embed_size logger.info("[EMBED] Loading the pre-trained word_embeddings") data_utils.load_pretrain_embeddings(seq2seq.src_embed, src_field, args.pre_embed_file, args.embed_size) if not args.share_embedding: data_utils.load_pretrain_embeddings(seq2seq.tgt_embed, tgt_field, args.pre_embed_file, args.embed_size) start_epoch = states['epoch'] for epoch in range(start_epoch + 1, args.epochs + 1): if epoch - states['best_epoch'] > 2: logger.info('[STOP] Early Stopped !') break start_time = time.time() num_batches = len(train_iter) logger.info('[NEW EPOCH] %d/%d, num of batches : %d' % (epoch, args.epochs, num_batches)) train_epoch(args, epoch, seq2seq, optimizer, train_iter, tgt_vocab_size, src_field, tgt_field) val_loss, val_ppl_macro = evaluate(args, seq2seq, val_iter, tgt_vocab_size, src_field, tgt_field) val_ppl = math.exp(val_loss) test_loss, test_ppl_macro = evaluate(args, seq2seq, test_iter, tgt_vocab_size, src_field, tgt_field) test_ppl = math.exp(test_loss) logger.info( "[Epoch:%d] val_loss:%5.3f | val_pp:%5.2f/%5.2f | test_loss:%5.3f | test_pp:%5.2f/%5.2f" % (epoch, val_loss, val_ppl, val_ppl_macro, test_loss, test_ppl, test_ppl_macro)) time_diff = (time.time() - start_time) / 3600.0 logger.info("[Epoch:%d] epoch time:%.2fH, est. remaining time:%.2fH" % (epoch, time_diff, time_diff * (args.epochs - epoch))) # Adjusting learning rate states['val_ppl'].append(val_ppl) states['val_loss'].append(val_loss) states['epoch'] = epoch if len(states['val_ppl']) >= 2: logger.info('[TRAINING] last->now valid ppl : %.3f->%.3f' % (states['val_ppl'][-2], states['val_ppl'][-1])) if len(states['val_ppl']) >= 2: if states['val_ppl'][-1] >= states['val_ppl'][-2]: logger.info( '[TRAINING] Adjusting learning rate due to the increment of val_loss' ) new_lr = model_helper.adjust_learning_rate(optimizer) states['lr'] = new_lr # Save the model if the validation loss is the best we've seen so far. if states['best_val_loss'] == -1 or val_ppl < states['best_val_loss']: logger.info('[CHECKPOINT] New best valid ppl : %.3f->%.2f' % (states['best_val_loss'], val_ppl)) model_helper.save_model(args.model_path, epoch, val_ppl, seq2seq, optimizer, args, states, best_model=True, clear_history=True) states['best_val_loss'] = val_ppl states['best_epoch'] = epoch # Saving standard model model_helper.save_model(args.model_path, epoch, val_ppl, seq2seq, optimizer, args, states, best_model=False, clear_history=True)