def main(): parser = argparse.ArgumentParser() parser.add_argument("--train_file", default=None, type=str, required=True, help="training file") parser.add_argument("--dev_file", default=None, type=str, required=True, help="development file") parser.add_argument("--output_dir", default=None, type=str, required=True, help="output directory for tokenizers and models") parser.add_argument("--num_epochs", default=10, type=int, required=False, help="number of epochs for training") parser.add_argument("--vocab_size", default=50000, type=int, required=False, help="vocabulary size") parser.add_argument("--hidden_size", default=300, type=int, required=False, help="hidden size of GRU") parser.add_argument("--embed_size", default=300, type=int, required=False, help="word embedding size") parser.add_argument("--batch_size", default=64, type=int, required=False, help="batch size for train and eval") parser.add_argument("--loss_function", default="hinge", type=str, required=False, choices=["CrossEntropy", "hinge"], help="which loss function to choose") args = parser.parse_args() # load dataset train_df = pd.read_csv(args.train_file)[["title", "reply"]] dev_df = pd.read_csv(args.dev_file)[["title", "reply"]] texts = list(train_df["title"]) + list(train_df["reply"]) tokenizer = create_tokenizer(texts, args.vocab_size) title_encoder = GRUEncoder(tokenizer.vocab_size, args.embed_size, args.hidden_size) reply_encoder = GRUEncoder(tokenizer.vocab_size, args.embed_size, args.hidden_size) model = DualEncoder(title_encoder, reply_encoder, type=args.loss_function) if args.loss_function == "CrossEntropy": loss_fn = nn.BCEWithLogitsLoss() elif args.loss_function == "hinge": loss_fn = nn.CosineEmbeddingLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) pickle.dump(tokenizer, open(os.path.join(args.output_dir, "tokenizer.pickle"), "wb")) best_acc = 0. for epoch in range(args.num_epochs): print("start epoch {}".format(epoch)) train(train_df, model, loss_fn, optimizer, device, tokenizer, args) acc = evaluate(dev_df, model, loss_fn, device, tokenizer, args) if acc > best_acc: best_acc = acc print("saving best model") torch.save(model.state_dict(), os.path.join(args.output_dir, "faq_model.pth"))
def main(): parser = argparse.ArgumentParser() parser.add_argument("--train_file", default=None, type=str, required=True, help="training file") parser.add_argument("--output_dir", default=None, type=str, required=True, help="output directory for tokenizers and models") parser.add_argument("--batch_size", default=64, type=int, required=False, help="batch size for train and eval") parser.add_argument("--hidden_size", default=300, type=int, required=False, help="hidden size of GRU") parser.add_argument("--embed_size", default=300, type=int, required=False, help="word embedding size") args = parser.parse_args() # load dataset train_df = pd.read_csv(args.train_file)[["title", "reply"]] tokenizer = pickle.load(open(os.path.join(args.output_dir, "tokenizer.pickle"), "rb")) title_encoder = GRUEncoder(tokenizer.vocab_size, args.embed_size, args.hidden_size) reply_encoder = GRUEncoder(tokenizer.vocab_size, args.embed_size, args.hidden_size) model = DualEncoder(title_encoder, reply_encoder) model.load_state_dict(torch.load(os.path.join(args.output_dir, "faq_model.pth"))) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) candidate_file = os.path.join(args.output_dir, "reply_candidates.pickle") if not os.path.isfile(candidate_file): replies, vectors = prepare_replies(train_df, model, device, tokenizer, args) pickle.dump([replies, vectors], open(candidate_file, "wb")) else: replies, vectors = pickle.load(open(candidate_file, "rb")) while True: title = input("你的问题是?\n") if len(title.strip()) == 0: continue title = [title] x, x_mask = list2tensor(title, tokenizer) x = x.to(device) x_mask = x_mask.to(device) x_rep = model.encoder2(x, x_mask).data.cpu().numpy() scores = cosine_similarity(x_rep, vectors)[0] index = np.argmax(scores) print("可能的答案:", replies[index])
def main(): """ Training and validation. """ global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map if dual_encoder: # this is always initialized with pre-trained models: print("DUAL ENCODER") if dual_encoder_checkpoint is not None: print('Loaded Dual Encoder Checkpoint') dual_branch_checkpoint = torch.load(checkpoint, map_location='cuda:0') encoder = dual_branch_checkpoint['encoder'] decoder = dual_branch_checkpoint['decoder'] decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) else: main_branch_checkpoint = torch.load(checkpoint, map_location='cuda:0') encoder = DualEncoder(sketch_resnet=sketch_encoder_resnet) encoder.m_resnet = main_branch_checkpoint['encoder'].resnet print("Use pre-trained resnet") # encoder.m_adaptive_pool = main_branch_checkpoint['encoder'].adaptive_pool decoder = main_branch_checkpoint['decoder'] decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) if fine_tune_encoder is True: print("!!! Will fine tune Encoder !!!") encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) else: encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) else: # following method is for One Encoder architecture # Initialize / load checkpoint if checkpoint is None: decoder = DecoderWithAttention(attention_dim=attention_dim, embed_dim=emb_dim, decoder_dim=decoder_dim, vocab_size=len(word_map), dropout=dropout) decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) encoder = Encoder(specify_resnet=main_encoder_resnet) encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam( params=filter(lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) if fine_tune_encoder else None else: checkpoint = torch.load(checkpoint, map_location='cuda:0') # start_epoch = checkpoint['epoch'] + 1 # epochs_since_improvement = checkpoint['epochs_since_improvement'] # best_bleu4 = checkpoint['bleu-4'] this metric is unfair when we switch to a different domain decoder = checkpoint['decoder'] # decoder_optimizer = checkpoint['decoder_optimizer'] decoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, decoder.parameters()), lr=decoder_lr) if main_encoder_resnet is not None: encoder = Encoder( specify_resnet=main_encoder_resnet ) # specify here so the encoder remove the last 2 layers of resnet encoder.adaptive_pool = checkpoint['encoder'].adaptive_pool else: encoder = checkpoint['encoder'] # encoder_optimizer = checkpoint['encoder_optimizer'] # if fine_tune_encoder is True and encoder_optimizer is None: if fine_tune_encoder is True: print("Will fine tune Encoder") encoder.fine_tune(fine_tune_encoder) encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) else: encoder_optimizer = torch.optim.Adam(params=filter( lambda p: p.requires_grad, encoder.parameters()), lr=encoder_lr) # Move to GPU, if available decoder = decoder.to(device) encoder = encoder.to(device) # Loss function criterion = nn.CrossEntropyLoss().to(device) # Custom dataloaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # data augmention for nycc dataset augment = transforms.Compose([ transforms.RandomAffine(20, (0.1, 0.1), (0.8, 1.2)), transforms.RandomHorizontalFlip(p=0.5) ]) train_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'TRAIN', transform=transforms.Compose([augment, normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(CaptionDataset( data_folder, data_name, 'VAL', transform=transforms.Compose([normalize])), batch_size=batch_size, shuffle=True, num_workers=workers, pin_memory=True) # Epochs for epoch in range(start_epoch, epochs): # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20 # if epochs_since_improvement == 40: # break # if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0: # adjust_learning_rate(decoder_optimizer, 0.8) # if fine_tune_encoder: # adjust_learning_rate(encoder_optimizer, 0.8) # One epoch's training train(train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion, encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch) # One epoch's validation recent_bleu4 = validate(val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion, epoch=epoch) # Check if there was an improvement is_best = recent_bleu4 > best_bleu4 best_bleu4 = max(recent_bleu4, best_bleu4) if not is_best: epochs_since_improvement += 1 print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement, )) else: epochs_since_improvement = 0 # Save checkpoint print(" *** saving model with bleu score: ", recent_bleu4) save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best) print(" *** LAST EPOCH saving model with bleu score: ", recent_bleu4) save_checkpoint(data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer, decoder_optimizer, recent_bleu4, is_best)