def __checkpoints__(self): try: ckpt = training.load_checkpoint('%s/latest.ckpt' % self.args.checkpoint_dir) self.start_epoch = ckpt['epoch'] self.Gab.load_state_dict(ckpt['Gab']) self.Gba.load_state_dict(ckpt['Gba']) except: print('Model is still untrained!')
def load_checkpoint(self, name, path): state = load_checkpoint(name, path=path) self.config = state["config"] self.epoch = state["epoch"] self.step = state["step"] self.model.load_state_dict(state["model"]) self.model.__class__.__name__ = state["model_class"] # [x.state_dict() for x in self.optimizers] = state["optimizers"]: , _vocab = state["vocab"] self.best_f1 = state["f1:"] self.best_acc = state["acc"] return _vocab
def __checkpoints__(self): # 如果没有checkpoint就整一个出来 if not os.path.isdir(self.args.checkpoint_dir): os.makedirs(self.args.checkpoint_dir) # 试图进行断点恢复,当然如果没有断点就算了 try: ckpt = training.load_checkpoint('%s/latest.ckpt' % self.args.checkpoint_dir) self.start_epoch = ckpt['epoch'] self.Da.load_state_dict(ckpt['Da']) self.Db.load_state_dict(ckpt['Db']) self.Gab.load_state_dict(ckpt['Gab']) self.Gba.load_state_dict(ckpt['Gba']) self.d_optimizer.load_state_dict(ckpt['d_optimizer']) self.g_optimizer.load_state_dict(ckpt['g_optimizer']) except: print(' [*] No checkpoint!')
def bcn(config, data_file, embeddings, device, chekpoint, dataset, embeddings_type): # extensions : add 2 languages, use a combination of CoVe embeddings (like ELMo) inputs = data.Field(lower=True, include_lengths=True, batch_first=True) labels = data.Field(sequential=False, unk_token=None) print('Generating train, dev, test splits') if dataset == 'IWSLT': # using the IWSLT 2016 TED talk translation task train, dev, test = datasets.IWSLT.splits(root=data_file, exts=['.en', '.de'], fields=[inputs, inputs]) elif dataset == 'SST-2': train, dev, test = datasets.SST.splits(text_field=inputs, label_field=labels, root=data_file, fine_grained=False, train_subtrees=True, filter_pred=lambda ex: ex.label != 'neutral') elif dataset == 'SST-5': train, dev, test = datasets.SST.splits(text_field=inputs, label_field=labels, root=data_file, fine_grained=True, train_subtrees=True) elif dataset == 'IMDB': train, test = datasets.IMDB.splits(text_field=inputs, label_field=labels, root=data_file) train, dev = train.split(split_ratio=0.9, stratified=True) # 0.9 in order to be close to the paper elif dataset == 'TREC-6': train, test = datasets.TREC.splits(text_field=inputs, label_field=labels, root=data_file, fine_grained=False) train, dev = train.split(split_ratio=0.9, stratified=True) elif dataset == 'TREC-50': train, test = datasets.TREC.splits(text_field=inputs, label_field=labels, root=data_file, fine_grained=True) train, dev = train.split() elif dataset == 'SNLI': train, dev, test = datasets.SNLI.splits(text_field=inputs, label_field=labels, root=data_file) else: print('Invalid dataset name detected...') return print('Building vocabulary') inputs.build_vocab(train, dev, test) inputs.vocab.load_vectors(vectors=GloVe(name='840B', dim=300, cache=embeddings)) labels.build_vocab(train, dev, test) train_iter, dev_iter, test_iter = data.BucketIterator.splits( (train, dev, test), batch_size=config["train_batch_size"], device=torch.device(device) if device >= 0 else None, sort_within_batch=True) model = BCN(config=config, n_vocab=len(inputs.vocab), vocabulary=inputs.vocab.vectors, embeddings=embeddings, num_labels=len(labels.vocab.freqs), embeddings_type=embeddings_type) bcn_params = [p for n, p in model.named_parameters() if "mtlstm" not in n and p.requires_grad] criterion = nn.CrossEntropyLoss() optimizer = Adam(bcn_params, lr=0.001) if device != -1: model.to(device) print(model) total_params = sum(p.numel() for p in model.parameters()) total_trainable_params = sum(p.numel() for p in bcn_params if p.requires_grad) print("Total Params:", number_h(total_params)) print("Total Trainable Params:", number_h(total_trainable_params)) ##################################### # Training Pipeline ##################################### trainer = BCNTrainer(model=model, train_loader=None, valid_loader=test_iter, criterion=criterion, device="cpu" if device == -1 else 'cuda', config=config, optimizers=[optimizer]) state = load_checkpoint(chekpoint) model.load_state_dict(state["model"]) print('Generating CoVe') test_loss, y_test, y_pred_test = trainer.test_step() print("Test cls loss is {}".format(test_loss)) print("\n") print("F1 on test set is {}".format(f1_macro(y_test, y_pred_test))) print("\n") print("Accuracy on test set is {}".format(acc(y_test, y_pred_test))) print("\n") return test_loss, f1_macro(y_test, y_pred_test)
X_train, y_train, X_test, y_test = load_dataset(config["data"]["dataset"], test=True) # load word embeddings if config["data"]["embeddings"] == "wiki.en.vec": word2idx, idx2word, weights = load_word_vectors_from_fasttext( os.path.join(EMB_DIR, config["data"]["embeddings"]), config["data"]["embeddings_dim"]) else: word2idx, idx2word, weights = load_word_vectors( os.path.join(EMB_DIR, config["data"]["embeddings"]), config["data"]["embeddings_dim"]) checkpoint_name = "Psych_exp_baseline" state = load_checkpoint(checkpoint_name) # features, feat_length = load_features(config["data"]["features"]) test_set = ClfDataset(X_test, y_test, word2idx, name="psych_test") test_lengths = [len(x) for x in test_set.data] test_sampler = SortedSampler(test_lengths) test_loader = DataLoader(test_set, sampler=test_sampler, batch_size=config["batch_size"], num_workers=opts.cores, collate_fn=ClfCollate()) model = Classifier(ntokens=weights.shape[0], nclasses=7, **config["model"]) model.load_state_dict(state["model"])
def sum_clf_test(dataset, config, opts, transfer=False, output_dir=None, checkpoint_name='scv2_aux_ft_gu_last'): opts.name = config["name"] X_test, y_test, posts_test, pids, human_summaries = dataset vocab = None if transfer: opts.transfer = config["pretrained_lm"] checkpoint = load_checkpoint(opts.transfer) config["vocab"].update(checkpoint["config"]["vocab"]) dict_pattern_rename(checkpoint["config"]["model"], {"rnn_": "bottom_rnn_"}) config["model"].update(checkpoint["config"]["model"]) vocab = checkpoint["vocab"] #################################################################### # Load Preprocessed Datasets #################################################################### if config["preprocessor"] == "twitter": preprocessor = twitter_preprocessor() else: preprocessor = None #################################################################### # Model #################################################################### ntokens = 70004 model = SummarizationClassifier(ntokens, len(set([0, 1])), **config["model"]) model.to(opts.device) clf_criterion = nn.CrossEntropyLoss() lm_criterion = nn.CrossEntropyLoss(ignore_index=0) embed_parameters = filter(lambda p: p.requires_grad, model.embed.parameters()) bottom_parameters = filter( lambda p: p.requires_grad, chain(model.bottom_rnn.parameters(), model.vocab.parameters())) if config["model"]["has_att"]: top_parameters = filter( lambda p: p.requires_grad, chain(model.top_rnn.parameters(), model.attention.parameters(), model.classes.parameters())) else: top_parameters = filter( lambda p: p.requires_grad, chain(model.top_rnn.parameters(), model.classes.parameters())) embed_optimizer = optim.ASGD(embed_parameters, lr=0.0001) rnn_optimizer = optim.ASGD(bottom_parameters) top_optimizer = Adam(top_parameters, lr=config["top_lr"]) #################################################################### # Training Pipeline #################################################################### # Trainer: responsible for managing the training process trainer = SumClfTrainer(model, None, None, (lm_criterion, clf_criterion), [embed_optimizer, rnn_optimizer, top_optimizer], config, opts.device, valid_loader_train_set=None, unfreeze_embed=config["unfreeze_embed"], unfreeze_rnn=config["unfreeze_rnn"], test_loader=None) #################################################################### # Resume Training from a previous checkpoint #################################################################### if transfer: print("Transferring Encoder weights ...") dict_pattern_rename(checkpoint["model"], { "encoder": "bottom_rnn", "decoder": "vocab" }) load_state_dict_subset(model, checkpoint["model"]) print(model) _vocab = trainer.load_checkpoint(name=checkpoint_name, path=None) test_set = SUMDataset(X_test, posts_test, y_test, seq_len=config['data']['seq_len'], post_len=config['data']['post_len'], preprocess=preprocessor, vocab=_vocab) test_lengths = [len(x) for x in test_set.data] test_sampler = SortedSampler(test_lengths) # test_loader = DataLoader(test_set, sampler=test_sampler, # batch_size=config["batch_size"], # num_workers=opts.cores, collate_fn=SumCollate()) test_loader = DataLoader(test_set, sampler=test_sampler, batch_size=config["batch_size"], num_workers=0, collate_fn=SumCollate()) trainer.test_loader = test_loader _, labels_array, predicted = trainer.test_epoch() pids_dic = {} if human_summaries is None: for x, y, sent, z in zip(y_test, predicted, X_test, pids): if z in pids_dic: pids_dic[z].append([x, y, sent]) else: pids_dic[z] = [[x, y, sent]] else: for x, y, sent, z, h_summary in zip(y_test, predicted, X_test, pids, human_summaries): if z in pids_dic: pids_dic[z].append([x, y, sent, h_summary]) else: pids_dic[z] = [[x, y, sent, h_summary]] # import os # if not os.path.exists('{}/ref_abs'.format(output_dir)): # os.mkdir('{}/ref_abs'.format(output_dir)) # if not os.path.exists('{}/dec'.format(output_dir)): # os.mkdir('{}/dec'.format(output_dir)) file_index = 0 all_summaries = [] for elem_key in pids_dic: current_summary = '' for pair in pids_dic[elem_key]: if pair[1] == 1: current_summary += pair[2] + '\n' all_summaries.append(current_summary) return all_summaries
def sent_clf(dataset, config, opts, transfer=False): from logger.experiment import Experiment opts.name = config["name"] X_train, y_train, _, X_val, y_val, _ = dataset vocab = None if transfer: opts.transfer = config["pretrained_lm"] checkpoint = load_checkpoint(opts.transfer) config["vocab"].update(checkpoint["config"]["vocab"]) dict_pattern_rename(checkpoint["config"]["model"], {"rnn_": "bottom_rnn_"}) config["model"].update(checkpoint["config"]["model"]) vocab = checkpoint["vocab"] #################################################################### # Load Preprocessed Datasets #################################################################### if config["preprocessor"] == "twitter": preprocessor = twitter_preprocessor() else: preprocessor = None print("Building training dataset...") train_set = ClfDataset(X_train, y_train, vocab=vocab, preprocess=preprocessor, vocab_size=config["vocab"]["size"], seq_len=config["data"]["seq_len"]) print("Building validation dataset...") val_set = ClfDataset(X_val, y_val, seq_len=train_set.seq_len, preprocess=preprocessor, vocab=train_set.vocab) src_lengths = [len(x) for x in train_set.data] val_lengths = [len(x) for x in val_set.data] # select sampler & dataloader train_sampler = BucketBatchSampler(src_lengths, config["batch_size"], True) val_sampler = SortedSampler(val_lengths) val_sampler_train = SortedSampler(src_lengths) train_loader = DataLoader(train_set, batch_sampler=train_sampler, num_workers=opts.cores, collate_fn=ClfCollate()) val_loader = DataLoader(val_set, sampler=val_sampler, batch_size=config["batch_size"], num_workers=opts.cores, collate_fn=ClfCollate()) val_loader_train_dataset = DataLoader(train_set, sampler=val_sampler_train, batch_size=config["batch_size"], num_workers=opts.cores, collate_fn=ClfCollate()) #################################################################### # Model #################################################################### ntokens = len(train_set.vocab) model = Classifier(ntokens, len(set(train_set.labels)), **config["model"]) model.to(opts.device) clf_criterion = nn.CrossEntropyLoss() lm_criterion = nn.CrossEntropyLoss(ignore_index=0) embed_parameters = filter(lambda p: p.requires_grad, model.embed.parameters()) bottom_parameters = filter( lambda p: p.requires_grad, chain(model.bottom_rnn.parameters(), model.vocab.parameters())) if config["model"]["has_att"]: top_parameters = filter( lambda p: p.requires_grad, chain(model.top_rnn.parameters(), model.attention.parameters(), model.classes.parameters())) else: top_parameters = filter( lambda p: p.requires_grad, chain(model.top_rnn.parameters(), model.classes.parameters())) embed_optimizer = optim.ASGD(embed_parameters, lr=0.0001) rnn_optimizer = optim.ASGD(bottom_parameters) top_optimizer = Adam(top_parameters, lr=config["top_lr"]) #################################################################### # Training Pipeline #################################################################### # Trainer: responsible for managing the training process trainer = SentClfTrainer(model, train_loader, val_loader, (lm_criterion, clf_criterion), [embed_optimizer, rnn_optimizer, top_optimizer], config, opts.device, valid_loader_train_set=val_loader_train_dataset, unfreeze_embed=config["unfreeze_embed"], unfreeze_rnn=config["unfreeze_rnn"]) #################################################################### # Experiment: logging and visualizing the training process #################################################################### # exp = Experiment(opts.name, config, src_dirs=opts.source, # output_dir=EXP_DIR) # exp.add_metric("ep_loss_lm", "line", "epoch loss lm", # ["TRAIN", "VAL"]) # exp.add_metric("ep_loss_cls", "line", "epoch loss class", # ["TRAIN", "VAL"]) # exp.add_metric("ep_f1", "line", "epoch f1", ["TRAIN", "VAL"]) # exp.add_metric("ep_acc", "line", "epoch accuracy", ["TRAIN", "VAL"]) # # exp.add_value("epoch", title="epoch summary") # exp.add_value("progress", title="training progress") ep_loss_lm = [10000, 10000] ep_loss_cls = [10000, 10000] ep_f1 = [0, 0] ep_acc = [0, 0] e_log = 0 progress = 0 #################################################################### # Resume Training from a previous checkpoint #################################################################### if transfer: print("Transferring Encoder weights ...") dict_pattern_rename(checkpoint["model"], { "encoder": "bottom_rnn", "decoder": "vocab" }) load_state_dict_subset(model, checkpoint["model"]) print(model) #################################################################### # Training Loop #################################################################### best_loss = None early_stopping = EarlyStopping("min", config["patience"]) for epoch in range(0, config["epochs"]): train_loss = trainer.train_epoch() val_loss, y, y_pred = trainer.eval_epoch(val_set=True) _, y_train, y_pred_train = trainer.eval_epoch(train_set=True) # exp.update_metric("ep_loss_lm", train_loss[0], "TRAIN") ep_loss_lm[0] = train_loss[0] # exp.update_metric("ep_loss_lm", val_loss[0], "VAL") ep_loss_lm[1] = val_loss[0] # exp.update_metric("ep_loss_cls", train_loss[1], "TRAIN") # exp.update_metric("ep_loss_cls", val_loss[1], "VAL") ep_loss_cls[0] = train_loss[1] ep_loss_cls[1] = val_loss[1] # exp.update_metric("ep_f1", f1_macro(y_train, y_pred_train), # "TRAIN") ep_f1[0] = f1_macro(y_train, y_pred_train) # exp.update_metric("ep_f1", f1_macro(y, y_pred), "VAL") ep_f1[1] = f1_macro(y, y_pred) # exp.update_metric("ep_acc", acc(y_train, y_pred_train), "TRAIN") # exp.update_metric("ep_acc", acc(y, y_pred), "VAL") ep_acc[0] = acc(y_train, y_pred_train) ep_acc[1] = acc(y, y_pred) # print('Train lm Loss : {}\nVal lm Loss : {}\nTrain cls Loss : {}\nVal cls Loss : {}\n Train f1 : {}\nVal f1 : {}\nTrain acc : {}\n Val acc : {}'.format( # ep_loss_lm[0], ep_loss_lm[1], ep_loss_cls[0], ep_loss_cls[1], ep_f1[0], ep_f1[1], ep_acc[0], ep_acc[1] # )) # epoch_log = exp.log_metrics(["ep_loss_lm", "ep_loss_cls","ep_f1", "ep_acc"]) epoch_log = 'Train lm Loss : {}\nVal lm Loss : {}\nTrain cls Loss : {}\nVal cls Loss : {}\n Train f1 : {}\nVal f1 : {}\nTrain acc : {}\n Val acc : {}'.format( ep_loss_lm[0], ep_loss_lm[1], ep_loss_cls[0], ep_loss_cls[1], ep_f1[0], ep_f1[1], ep_acc[0], ep_acc[1]) print(epoch_log) # exp.update_value("epoch", epoch_log) e_log = epoch_log # print('') # Save the model if the val loss is the best we've seen so far. # if not best_loss or val_loss[1] < best_loss: # best_loss = val_loss[1] # trainer.best_acc = acc(y, y_pred) # trainer.best_f1 = f1_macro(y, y_pred) # trainer.checkpoint(name=opts.name, timestamp=True) best_loss = val_loss[1] trainer.best_acc = acc(y, y_pred) trainer.best_f1 = f1_macro(y, y_pred) trainer.checkpoint(name=opts.name, tags=str(epoch)) # if early_stopping.stop(val_loss[1]): # print("Early Stopping (according to classification loss)....") # break print("\n" * 2) return best_loss, trainer.best_acc, trainer.best_f1
def compress_seq3(checkpoint, src_file, out_file, device, verbose=False, mode="attention"): checkpoint = load_checkpoint(checkpoint) config = checkpoint["config"] vocab = checkpoint["vocab"] def giga_tokenizer(x): return x.strip().lower().split() dataset = AEDataset(src_file, preprocess=giga_tokenizer, vocab=checkpoint["vocab"], seq_len=config["data"]["seq_len"], return_oov=True, oovs=config["data"]["oovs"]) data_loader = DataLoader(dataset, batch_size=config["batch_size"], num_workers=0, collate_fn=Seq2SeqOOVCollate()) n_tokens = len(dataset.vocab) model = Seq2Seq2Seq(n_tokens, **config["model"]).to(device) model.load_state_dict(checkpoint["model"]) model.eval() ############################################## n_batches = math.ceil(len(data_loader.dataset) / data_loader.batch_size) if verbose: iterator = tqdm(enumerate(data_loader, 1), total=n_batches) else: iterator = enumerate(data_loader, 1) def devect(ids, oov, strip_eos, pp): return devectorize(ids.tolist(), vocab.id2tok, vocab.tok2id[vocab.EOS], strip_eos=strip_eos, oov_map=oov, pp=pp) def id2txt(ids, oov=None, lengths=None, strip_eos=True): if lengths: return [ " ".join(x[:l]) for l, x in zip(lengths, devect(ids, oov, strip_eos, pp=True)) ] else: return [" ".join(x) for x in devect(ids, oov, strip_eos, pp=True)] results = [] with open(out_file, "w") as f: with torch.no_grad(): for i, batch in iterator: batch_oov_map = batch[-1] batch = batch[:-1] batch = list(map(lambda x: x.to(device), batch)) (inp_src, out_src, inp_trg, out_trg, src_lengths, trg_lengths) = batch trg_lengths = torch.clamp(src_lengths / 2, min=5, max=30) + 1 ############################################################# # Debug ############################################################# if mode in ["attention", "debug"]: outputs = model(inp_src, inp_trg, src_lengths, trg_lengths, sampling=0) enc1, dec1, enc2, dec2 = outputs if mode == "debug": src = id2txt(inp_src) latent = id2txt(dec1[3].max(-1)[1]) rec = id2txt(dec2[0].max(-1)[1]) _results = list(zip(src, latent, rec)) for sample in _results: f.write("\n".join(sample) + "\n\n") elif mode == "attention": src = devect(inp_src, None, strip_eos=False, pp=False) latent = devect(dec1[3].max(-1)[1], None, strip_eos=False, pp=False) rec = devect(dec2[0].max(-1)[1], None, strip_eos=False, pp=False) _results = [src, latent, dec1[4], rec, dec2[4]] results += list(zip(*_results)) break else: raise ValueError else: enc1, dec1 = model.generate(inp_src, src_lengths, trg_lengths) preds = id2txt(dec1[0].max(-1)[1], batch_oov_map, trg_lengths.tolist()) for sample in preds: f.write(sample + "\n") return results
train_loader = DataLoader(train_set, config["batch_train"], shuffle=True, drop_last=True) test_loader = DataLoader(test_set, config["batch_eval"]) classes = label_encoder.classes_.size model = Classifier(embeddings=weights, out_size=classes, **config).to(DEVICE) weights = class_weigths(train_set.labels, to_pytorch=True) weights = weights.to(DEVICE) criterion = CrossEntropyLoss(weight=weights) if pretrained_classifier: pretr_model, pretr_optimizer, pretr_vocab, loss, acc = \ load_checkpoint("sentiment_baseline") pretr_model.to(DEVICE) pretr_model.output = model.output model = pretr_model name = "wassa_pretr_clf_with_baseline" else: name = "wassa_rnn_600_bidir_batch32_adam_0.05" parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = Adam(parameters, amsgrad=True) print(model) ############################################################################# # Training Pipeline
from utils.training import epoch_summary, save_checkpoint, load_checkpoint # load dataset config = ConfLangModelFT name = 'LM_FT_GU_3_6' dataset = 'wassa' unfreeze = True freeze = {"embed": True, "hidden": True} unfreeze_epoque = {"embed": 6, "hidden": 3} # Load Pretrained LM pretr_model, pretr_optimizer, pretr_vocab, loss, acc = \ load_checkpoint("emotion2M/emotion_with_2M_18-06-28_18:04:54") pretr_model.to(DEVICE) # Load wassa train_data, val_data, _, _ = load_wassa() ##################################################################### # Define Dataloaders ##################################################################### preprocessor = twitter_preprocessor() if preprocessor is None: train_name = "train_simple_split_{}".format(dataset) val_name = "valid_simple_split_{}".format(dataset) else: train_name = "train_ekphrasis_{}".format(dataset) val_name = "valid_ekphrasis_{}".format(dataset)
def sent_clf_no_aux(dataset, config, opts, transfer=False): from logger.experiment import Experiment opts.name = config["name"] X_train, y_train, X_val, y_val = dataset vocab = None if transfer: opts.transfer = config["pretrained_lm"] checkpoint = load_checkpoint(opts.transfer) config["vocab"].update(checkpoint["config"]["vocab"]) dict_pattern_rename(checkpoint["config"]["model"], {"rnn_": "bottom_rnn_"}) config["model"].update(checkpoint["config"]["model"]) vocab = checkpoint["vocab"] #################################################################### # Data Loading and Preprocessing #################################################################### if config["preprocessor"] == "twitter": preprocessor = twitter_preprocessor() else: preprocessor = None print("Building training dataset...") train_set = ClfDataset(X_train, y_train, vocab=vocab, preprocess=preprocessor, vocab_size=config["vocab"]["size"], seq_len=config["data"]["seq_len"]) print("Building validation dataset...") val_set = ClfDataset(X_val, y_val, seq_len=train_set.seq_len, preprocess=preprocessor, vocab=train_set.vocab) src_lengths = [len(x) for x in train_set.data] val_lengths = [len(x) for x in val_set.data] # select sampler & dataloader train_sampler = BucketBatchSampler(src_lengths, config["batch_size"], True) val_sampler = SortedSampler(val_lengths) val_sampler_train = SortedSampler(src_lengths) train_loader = DataLoader(train_set, batch_sampler=train_sampler, num_workers=opts.cores, collate_fn=ClfCollate()) val_loader = DataLoader(val_set, sampler=val_sampler, batch_size=config["batch_size"], num_workers=opts.cores, collate_fn=ClfCollate()) val_loader_train_dataset = DataLoader(train_set, sampler=val_sampler_train, batch_size=config["batch_size"], num_workers=opts.cores, collate_fn=ClfCollate()) #################################################################### # Model #################################################################### ntokens = len(train_set.vocab) model = NaiveClassifier(ntokens, len(set(train_set.labels)), attention=config["model"]["has_att"], **config["model"]) model.to(opts.device) criterion = nn.CrossEntropyLoss() if config["gu"]: embed_parameters = filter(lambda p: p.requires_grad, model.embed.parameters()) bottom_parameters = filter(lambda p: p.requires_grad, chain(model.bottom_rnn.parameters())) if config["model"]["has_att"]: top_parameters = filter( lambda p: p.requires_grad, chain(model.attention.parameters(), model.classes.parameters())) else: top_parameters = filter(lambda p: p.requires_grad, model.classes.parameters()) embed_optimizer = Adam(embed_parameters) rnn_optimizer = Adam(bottom_parameters) top_optimizer = Adam(top_parameters) # Trainer: responsible for managing the training process trainer = SentClfNoAuxTrainer( model, train_loader, val_loader, criterion, [embed_optimizer, rnn_optimizer, top_optimizer], config, opts.device, valid_loader_train_set=val_loader_train_dataset, unfreeze_embed=config["unfreeze_embed"], unfreeze_rnn=config["unfreeze_rnn"]) else: parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=config["top_lr"]) # Trainer: responsible for managing the training process trainer = SentClfNoAuxTrainer( model, train_loader, val_loader, criterion, [optimizer], config, opts.device, valid_loader_train_set=val_loader_train_dataset) #################################################################### # Experiment: logging and visualizing the training process #################################################################### exp = Experiment(opts.name, config, src_dirs=opts.source, output_dir=EXP_DIR) exp.add_metric("ep_loss", "line", "epoch loss class", ["TRAIN", "VAL"]) exp.add_metric("ep_f1", "line", "epoch f1", ["TRAIN", "VAL"]) exp.add_metric("ep_acc", "line", "epoch accuracy", ["TRAIN", "VAL"]) exp.add_value("epoch", title="epoch summary") exp.add_value("progress", title="training progress") #################################################################### # Resume Training from a previous checkpoint #################################################################### if transfer: print("Transferring Encoder weights ...") dict_pattern_rename(checkpoint["model"], {"encoder": "bottom_rnn"}) load_state_dict_subset(model, checkpoint["model"]) print(model) #################################################################### # Training Loop #################################################################### best_loss = None early_stopping = EarlyStopping("min", config["patience"]) for epoch in range(1, config["epochs"] + 1): train_loss = trainer.train_epoch() val_loss, y, y_pred = trainer.eval_epoch(val_set=True) _, y_train, y_pred_train = trainer.eval_epoch(train_set=True) # Calculate accuracy and f1-macro on the evaluation set exp.update_metric("ep_loss", train_loss.item(), "TRAIN") exp.update_metric("ep_loss", val_loss.item(), "VAL") exp.update_metric("ep_f1", f1_macro(y_train, y_pred_train), "TRAIN") exp.update_metric("ep_f1", f1_macro(y, y_pred), "VAL") exp.update_metric("ep_acc", acc(y_train, y_pred_train), "TRAIN") exp.update_metric("ep_acc", acc(y, y_pred), "VAL") print() epoch_log = exp.log_metrics(["ep_loss", "ep_f1", "ep_acc"]) print(epoch_log) exp.update_value("epoch", epoch_log) ############################################################### # Unfreezing the model after X epochs ############################################################### # Save the model if the val loss is the best we've seen so far. if not best_loss or val_loss < best_loss: best_loss = val_loss trainer.best_acc = acc(y, y_pred) trainer.best_f1 = f1_macro(y, y_pred) trainer.checkpoint(name=opts.name) if early_stopping.stop(val_loss): print("Early Stopping (according to cls loss)....") break print("\n" * 2) return best_loss, trainer.best_acc, trainer.best_f1
#################################################################### # Data Loading and Preprocessing #################################################################### vocab = None if config["vocab"]["vocab_path"] is not None: vocab_path = config["vocab"]["vocab_path"] print(f"Loading vocab from '{vocab_path}'...") vocab = Vocab() vocab.from_file(vocab_path) if opts.cp_vocab is not None: print(f"Loading vocab from checkpoint '{opts.cp_vocab}'...") vcp = load_checkpoint(opts.cp_vocab) vocab = vcp["vocab"] if opts.resume: checkpoint = load_checkpoint(opts.resume) config["vocab"].update(checkpoint["config"]["vocab"]) if not config["vocab"]["subword"]: vocab = checkpoint["vocab"] def giga_tokenizer(x): return x.strip().lower().split() print("Building training dataset...") train_set = SentenceLMDataset(config["data"]["train_path"],
#################################################################### # Settings #################################################################### opts, config = seq2seq2seq_options() #################################################################### # # Weight Transfer # #################################################################### vocab = None if config["model"]["prior_loss"] and config["prior"] is not None: print("Loading Oracle LM ...") oracle_cp = load_checkpoint(config["prior"]) vocab = oracle_cp["vocab"] oracle = SeqReader(len(vocab), **oracle_cp["config"]["model"]) oracle.load_state_dict(oracle_cp["model"]) oracle.to(opts.device) freeze_module(oracle) else: oracle = None #################################################################### # # Data Loading and Preprocessing # ####################################################################