def __call__(self, args): super(Predict, self).__call__(args) print("Load the dataset") corpus = Corpus.load(args.fdata, self.fields) dataset = TextDataset(corpus, self.fields[:-1], args.buckets) # set the data loader dataset.loader = batchify(dataset, args.batch_size) print(f"{len(dataset)} sentences, " f"{len(dataset.loader)} batches") print("Load the model") self.model = Model.load(args.model) print(f"{self.model}\n") print("Make predictions on the dataset") start = datetime.now() pred_labels = self.predict(dataset.loader) total_time = datetime.now() - start # restore the order of sentences in the buckets indices = torch.tensor([i for bucket in dataset.buckets.values() for i in bucket]).argsort() corpus.labels = [pred_labels[i] for i in indices] print(f"Save the predicted result to {args.fpred}") corpus.save(args.fpred) print(f"{total_time}s elapsed, " f"{len(dataset) / total_time.total_seconds():.2f} Sents/s")
def __call__(self, args): print("Load the model") modelpath = args.mainpath + args.model + args.modelname + "/model_weights" vocabpath = args.mainpath + args.vocab + args.modelname + "/vocab.tag" config = torch.load(modelpath)['config'] config.batch_size = 2 config.buckets = 2 vocab = torch.load(vocabpath) parser = BiaffineParser.load(modelpath) model = Model(vocab, parser, config, vocab.n_rels) print("Load the dataset") if args.input_type == "conllu": corpus = UniversalDependenciesDatasetReader() corpus.load(args.fdata) elif args.input_type == "conllx": corpus = Corpus.load(args.fdata) elif args.input_type == "raw": corpus = UniversalDependenciesRawDatasetReader(args.language) corpus.load(args.fdata) if args.use_predicted: if args.input_type == "conllu": corpus_predicted = UniversalDependenciesDatasetReader() corpus_predicted.load(args.finit) else: corpus_predicted = Corpus.load(args.finit) if args.use_predicted: dataset = TextDataset(vocab.numericalize(corpus, corpus_predicted)) else: dataset = TextDataset(vocab.numericalize(corpus, training=False)) # set the data loader loader, ids = batchify(dataset, config.batch_size, config.buckets) print("Make predictions on the dataset") if args.use_predicted: heads_pred, rels_pred, metric = model.predict_predicted(loader) else: heads_pred, rels_pred, metric = model.predict(loader) print(f"Save the predicted result to {args.fpred}") heads_pred = self.rearange(heads_pred, ids) rels_pred = self.rearange(rels_pred, ids) corpus.heads = heads_pred corpus.rels = rels_pred corpus.save(args.fpred)
def __call__(self, args): print("Load the model") modelpath = args.mainpath + args.model + args.modelname + "/model_weights" vocabpath = args.mainpath + args.vocab + args.modelname + "/vocab.tag" config = torch.load(modelpath)['config'] vocab = torch.load(vocabpath) parser = Parser.load(modelpath) model = Model(vocab, parser, config, vocab.n_rels) print("Load the dataset") corpus = Corpus.load(args.fdata) dataset = TextDataset(vocab.numericalize(corpus)) # set the data loader loader, ids = batchify(dataset, 5 * config.batch_size, config.buckets) print("Make predictions on the dataset") heads_pred, rels_pred, metric = model.predict(loader) print(metric) print(f"Save the predicted result to {args.fpred}") heads_pred = self.rearange(heads_pred, ids) rels_pred = self.rearange(rels_pred, ids) corpus.heads = heads_pred corpus.rels = rels_pred corpus.save(args.fpred)
def __call__(self, args): logger.info("Load the model") self.model = Model.load(args.model) # override from CLI args args = self.model.args.update(vars(args)) super().__call__(args) logger.info("Load the dataset") if args.prob: self.fields = self.fields._replace(PHEAD=Field('probs')) if args.text: corpus = TextCorpus.load(args.fdata, self.fields, args.text, args.tokenizer_dir, use_gpu=args.device != 1) else: corpus = Corpus.load(args.fdata, self.fields) dataset = TextDataset(corpus, [self.WORD, self.FEAT], args.buckets) # set the data loader dataset.loader = batchify(dataset, args.batch_size) logger.info(f"{len(dataset)} sentences, " f"{len(dataset.loader)} batches") logger.info("Make predictions on the dataset") start = datetime.now() pred_arcs, pred_rels, pred_probs = self.predict(dataset.loader) total_time = datetime.now() - start # restore the order of sentences in the buckets indices = torch.tensor([ i for bucket in dataset.buckets.values() for i in bucket ]).argsort() corpus.arcs = [pred_arcs[i] for i in indices] corpus.rels = [pred_rels[i] for i in indices] if args.prob: corpus.probs = [pred_probs[i] for i in indices] logger.info(f"Save the predicted result to {args.fpred}") corpus.save(args.fpred) logger.info(f"{total_time}s elapsed, " f"{len(dataset) / total_time.total_seconds():.2f} Sents/s")
def __call__(self, args): super(Predict, self).__call__(args) print("Load the dataset") corpus = Corpus.load(args.fdata, self.fields) dataset = TextDataset(corpus, [self.WORD, self.FEAT]) # set the data loader dataset.loader = batchify(dataset, args.batch_size) print(f"{len(dataset)} sentences, " f"{len(dataset.loader)} batches") print("Load the model") self.model = Model.load(args.model) print(f"{self.model}\n") print("Make predictions on the dataset") start = datetime.now() corpus.heads, corpus.rels = self.predict(dataset.loader) print(f"Save the predicted result to {args.fpred}") corpus.save(args.fpred) total_time = datetime.now() - start print(f"{total_time}s elapsed, " f"{len(dataset) / total_time.total_seconds():.2f} Sents/s")
def __call__(self, args): super(Evaluate, self).__call__(args) print("Load the dataset") corpus = Corpus.load(args.fdata, self.fields) dataset = TextDataset(corpus, self.fields, args.buckets) # set the data loader dataset.loader = batchify(dataset, args.batch_size) print(f"{len(dataset)} sentences, " f"{len(dataset.loader)} batches, " f"{len(dataset.buckets)} buckets") print("Load the model") self.model = Model.load(args.model) print(f"{self.model}\n") print("Evaluate the dataset") start = datetime.now() loss, metric = self.evaluate(dataset.loader) total_time = datetime.now() - start print(f"Loss: {loss:.4f} {metric}") print(f"{total_time}s elapsed, " f"{len(dataset) / total_time.total_seconds():.2f} Sents/s")
def __call__(self, config): print("Load the model") vocab = torch.load(config.vocab) parser = BiaffineParser.load(config.model) model = Model(vocab, parser) print("Load the dataset") corpus = Corpus.load(config.fdata) dataset = TextDataset(vocab.numericalize(corpus)) # set the data loader loader = batchify(dataset, config.batch_size, config.buckets) print("Evaluate the dataset") loss, metric = model.evaluate(loader, config.punct) print(f"Loss: {loss:.4f} {metric}")
def __call__(self, config): print("Load the model") vocab = torch.load(config.vocab) parser = BiaffineParser.load(config.model) model = Model(config, vocab, parser) print("Load the dataset") corpus = Corpus.load(config.fdata) dataset = TextDataset(vocab.numericalize(corpus), config.buckets) # set the data loader loader = batchify(dataset, config.batch_size) print("Evaluate the dataset") _, loss, _, metric_t, metric_p = model.evaluate(None, loader) print(f"Loss: {loss:.4f} {metric_t}, {metric_p}")
def __call__(self, config): print("Load the model") vocab = torch.load(config.vocab) parser = BiaffineParser.load(config.model) model = Model(vocab, parser) print("Load the dataset") corpus = Corpus.load(config.fdata) dataset = TextDataset(vocab.numericalize(corpus, False)) # set the data loader loader = batchify(dataset, config.batch_size) print("Make predictions on the dataset") corpus.heads, corpus.rels = model.predict(loader) print(f"Save the predicted result to {config.fpred}") corpus.save(config.fpred)
def __call__(self, config): print("Preprocess the data") train = Corpus.load(config.ftrain) dev = Corpus.load(config.fdev) test = Corpus.load(config.ftest) if os.path.exists(config.vocab): vocab = torch.load(config.vocab) else: vocab = Vocab.from_corpus(corpus=train, min_freq=2) vocab.read_embeddings(Embedding.load(config.fembed, config.unk)) torch.save(vocab, config.vocab) config.update({ 'n_words': vocab.n_train_words, 'n_tags': vocab.n_tags, 'n_rels': vocab.n_rels, 'pad_index': vocab.pad_index, 'unk_index': vocab.unk_index }) print(vocab) print("Load the dataset") trainset = TextDataset(vocab.numericalize(train)) devset = TextDataset(vocab.numericalize(dev)) testset = TextDataset(vocab.numericalize(test)) # set the data loaders train_loader = batchify(dataset=trainset, batch_size=config.batch_size, n_buckets=config.buckets, shuffle=True) dev_loader = batchify(dataset=devset, batch_size=config.batch_size, n_buckets=config.buckets) test_loader = batchify(dataset=testset, batch_size=config.batch_size, n_buckets=config.buckets) print(f"{'train:':6} {len(trainset):5} sentences in total, " f"{len(train_loader):3} batches provided") print(f"{'dev:':6} {len(devset):5} sentences in total, " f"{len(dev_loader):3} batches provided") print(f"{'test:':6} {len(testset):5} sentences in total, " f"{len(test_loader):3} batches provided") print("Create the model") parser = BiaffineParser(config, vocab.embeddings) if torch.cuda.is_available(): parser = parser.cuda() print(f"{parser}\n") model = Model(vocab, parser) total_time = timedelta() best_e, best_metric = 1, Metric() model.optimizer = Adam(model.parser.parameters(), config.lr, (config.beta_1, config.beta_2), config.epsilon) model.scheduler = ExponentialLR(model.optimizer, config.decay ** (1 / config.steps)) for epoch in range(1, config.epochs + 1): start = datetime.now() # train one epoch and update the parameters model.train(train_loader) print(f"Epoch {epoch} / {config.epochs}:") loss, train_metric = model.evaluate(train_loader, config.punct) print(f"{'train:':6} Loss: {loss:.4f} {train_metric}") loss, dev_metric = model.evaluate(dev_loader, config.punct) print(f"{'dev:':6} Loss: {loss:.4f} {dev_metric}") loss, test_metric = model.evaluate(test_loader, config.punct) print(f"{'test:':6} Loss: {loss:.4f} {test_metric}") t = datetime.now() - start # save the model if it is the best so far if dev_metric > best_metric and epoch > config.patience: best_e, best_metric = epoch, dev_metric model.parser.save(config.model + f".{best_e}") print(f"{t}s elapsed (saved)\n") else: print(f"{t}s elapsed\n") total_time += t if epoch - best_e >= config.patience: break model.parser = BiaffineParser.load(config.model + f".{best_e}") loss, metric = model.evaluate(test_loader, config.punct) print(f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") print(f"the score of test at epoch {best_e} is {metric.score:.2%}") print(f"average time of each epoch is {total_time / epoch}s") print(f"{total_time}s elapsed")
def __call__(self, args): super(Train, self).__call__(args) rrr = os.popen( '"/usr/bin/nvidia-smi" --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' ) devices_info = rrr.read().strip().split("\n") total, used = devices_info[int( os.environ["CUDA_VISIBLE_DEVICES"])].split(',') total = int(total) used = int(used) max_mem = int(total * random.uniform(0.95, 0.97)) block_mem = max_mem - used x = torch.cuda.FloatTensor(256, 1024, block_mem) del x rrr.close() logging.basicConfig(filename=args.output, filemode='w', format='%(asctime)s %(levelname)-8s %(message)s', level=logging.INFO, datefmt='%Y-%m-%d %H:%M:%S') train_corpus = Corpus.load(args.ftrain, self.fields, args.max_len) dev_corpus = Corpus.load(args.fdev, self.fields) dev40_corpus = Corpus.load(args.fdev, self.fields, args.max_len) test_corpus = Corpus.load(args.ftest, self.fields) test40_corpus = Corpus.load(args.ftest, self.fields, args.max_len) train = TextDataset(train_corpus, self.fields, args.buckets, crf=args.crf) dev = TextDataset(dev_corpus, self.fields, args.buckets, crf=args.crf) dev40 = TextDataset(dev40_corpus, self.fields, args.buckets, crf=args.crf) test = TextDataset(test_corpus, self.fields, args.buckets, crf=args.crf) test40 = TextDataset(test40_corpus, self.fields, args.buckets, crf=args.crf) # set the data loaders if args.self_train: train.loader = batchify(train, args.batch_size) else: train.loader = batchify(train, args.batch_size, True) dev.loader = batchify(dev, args.batch_size) dev40.loader = batchify(dev40, args.batch_size) test.loader = batchify(test, args.batch_size) test40.loader = batchify(test40, args.batch_size) logging.info(f"{'train:':6} {len(train):5} sentences, " f"{len(train.loader):3} batches, " f"{len(train.buckets)} buckets") logging.info(f"{'dev:':6} {len(dev):5} sentences, " f"{len(dev.loader):3} batches, " f"{len(dev.buckets)} buckets") logging.info(f"{'dev40:':6} {len(dev40):5} sentences, " f"{len(dev40.loader):3} batches, " f"{len(dev40.buckets)} buckets") logging.info(f"{'test:':6} {len(test):5} sentences, " f"{len(test.loader):3} batches, " f"{len(test.buckets)} buckets") logging.info(f"{'test40:':6} {len(test40):5} sentences, " f"{len(test40.loader):3} batches, " f"{len(test40.buckets)} buckets") logging.info("Create the model") self.model = Model(args) self.model = self.model.to(args.device) if args.E_Reg or args.T_Reg: source_model = Model(args) source_model = source_model.to(args.device) # load model if args.load != '': logging.info("Load source model") device = 'cuda' if torch.cuda.is_available() else 'cpu' state = torch.load(args.load, map_location=device)['state_dict'] state_dict = self.model.state_dict() for k, v in state.items(): if k in ['word_embed.weight']: continue state_dict.update({k: v}) self.model.load_state_dict(state_dict) init_params = {} for name, param in self.model.named_parameters(): init_params[name] = param.clone() self.model.init_params = init_params if args.E_Reg or args.T_Reg: state_dict = source_model.state_dict() for k, v in state.items(): if k in ['word_embed.weight']: continue state_dict.update({k: v}) source_model.load_state_dict(state_dict) init_params = {} for name, param in source_model.named_parameters(): init_params[name] = param.clone() source_model.init_params = init_params self.model = self.model.load_pretrained(self.WORD.embed) self.model = self.model.to(args.device) if args.self_train: train_arcs_preds = self.get_preds(train.loader) del self.model self.model = Model(args) self.model = self.model.load_pretrained(self.WORD.embed) self.model = self.model.to(args.device) if args.E_Reg or args.T_Reg: source_model = source_model.load_pretrained(self.WORD.embed) source_model = source_model.to(args.device) args.source_model = source_model self.optimizer = Adam(self.model.parameters(), args.lr, (args.mu, args.nu), args.epsilon) self.scheduler = ExponentialLR(self.optimizer, args.decay**(1 / args.decay_steps)) # test before train if args.load is not '': logging.info('\n') dev_loss, dev_metric = self.evaluate(dev40.loader) test_loss, test_metric = self.evaluate(test40.loader) logging.info(f"{'dev40:':4} Loss: {dev_loss:.4f} {dev_metric}") logging.info(f"{'test40:':4} Loss: {test_loss:.4f} {test_metric}") dev_loss, dev_metric = self.evaluate(dev.loader) test_loss, test_metric = self.evaluate(test.loader) logging.info(f"{'dev:':4} Loss: {dev_loss:.4f} {dev_metric}") logging.info(f"{'test:':4} Loss: {test_loss:.4f} {test_metric}") total_time = timedelta() best_e, best_metric = 1, Metric() logging.info("Begin training") if args.unsupervised: max_uas = 0. cnt = 0 for epoch in range(1, args.epochs + 1): start = datetime.now() self.train(train.loader) logging.info(f"Epoch {epoch} / {args.epochs}:") dev_loss, dev_metric = self.evaluate(dev40.loader) test_loss, test_metric = self.evaluate(test40.loader) logging.info(f"{'dev40:':4} Loss: {dev_loss:.4f} {dev_metric}") logging.info( f"{'test40:':4} Loss: {test_loss:.4f} {test_metric}") dev_loss, dev_metric = self.evaluate(dev.loader) test_loss, test_metric = self.evaluate(test.loader) logging.info(f"{'dev:':4} Loss: {dev_loss:.4f} {dev_metric}") logging.info( f"{'test:':4} Loss: {test_loss:.4f} {test_metric}") t = datetime.now() - start logging.info(f"{t}s elapsed\n") else: for epoch in range(1, args.epochs + 1): start = datetime.now() if args.self_train: self.train(train.loader, train_arcs_preds) else: self.train(train.loader) logging.info(f"Epoch {epoch} / {args.epochs}:") if args.self_train is False: dev_loss, dev_metric = self.evaluate(dev.loader) logging.info( f"{'dev:':4} Loss: {dev_loss:.4f} {dev_metric}") t = datetime.now() - start # save the model if it is the best so far if args.self_train: loss, test_metric = self.evaluate(test.loader) logging.info(f"{'test:':6} Loss: {loss:.4f} {test_metric}") else: if dev_metric > best_metric and epoch > args.patience: loss, test_metric = self.evaluate(test.loader) logging.info( f"{'test:':6} Loss: {loss:.4f} {test_metric}") best_e, best_metric = epoch, dev_metric if hasattr(self.model, 'module'): self.model.module.save(args.model) else: self.model.save(args.model) logging.info( f"{t}s elapsed, best epoch {best_e} {best_metric} (saved)\n" ) else: logging.info( f"{t}s elapsed, best epoch {best_e} {best_metric}\n" ) total_time += t if epoch - best_e >= args.patience: break if args.self_train is False: self.model = Model.load(args.model) logging.info( f"max score of dev is {best_metric.score:.2%} at epoch {best_e}" ) loss, metric = self.evaluate(test.loader) logging.info( f"the score of test at epoch {best_e} is {metric.score:.2%}" ) logging.info( f"average time of each epoch is {total_time / epoch}s, {total_time}s elapsed" )
def __call__(self, config): print("Preprocess the data") train = Corpus.load(config.ftrain) dev = Corpus.load(config.fdev) test = Corpus.load(config.ftest) if path.exists(config.model) != True: os.mkdir(config.model) if path.exists("model/") != True: os.mkdir("model/") if path.exists(config.model + config.modelname) != True: os.mkdir(config.model + config.modelname) if config.checkpoint: vocab = torch.load(config.main_path + config.vocab + config.modelname + "/vocab.tag") else: vocab = Vocab.from_corpus(config=config, corpus=train, corpus_dev=dev, corpus_test=test, min_freq=0) train_seq = read_seq(config.ftrain_seq, vocab) total_act = 0 for x in train_seq: total_act += len(x) print("number of transitions:{}".format(total_act)) torch.save(vocab, config.vocab + config.modelname + "/vocab.tag") config.update({ 'n_words': vocab.n_train_words, 'n_tags': vocab.n_tags, 'n_rels': vocab.n_rels, 'n_trans': vocab.n_trans, 'pad_index': vocab.pad_index, 'unk_index': vocab.unk_index }) print("Load the dataset") trainset = TextDataset(vocab.numericalize(train, train_seq)) devset = TextDataset(vocab.numericalize(dev)) testset = TextDataset(vocab.numericalize(test)) # set the data loaders train_loader, _ = batchify(dataset=trainset, batch_size=config.batch_size, n_buckets=config.buckets, shuffle=True) dev_loader, _ = batchify(dataset=devset, batch_size=config.batch_size, n_buckets=config.buckets) test_loader, _ = batchify(dataset=testset, batch_size=config.batch_size, n_buckets=config.buckets) print(f"{'train:':6} {len(trainset):5} sentences in total, " f"{len(train_loader):3} batches provided") print(f"{'dev:':6} {len(devset):5} sentences in total, " f"{len(dev_loader):3} batches provided") print(f"{'test:':6} {len(testset):5} sentences in total, " f"{len(test_loader):3} batches provided") print("Create the model") if config.checkpoint: parser = Parser.load(config.main_path + config.model + config.modelname + "/parser-checkpoint") else: parser = Parser(config, vocab.bertmodel) print("number of parameters:{}".format( sum(p.numel() for p in parser.parameters() if p.requires_grad))) if torch.cuda.is_available(): print('Train/Evaluate on GPU') device = torch.device('cuda') parser = parser.to(device) model = Model(vocab, parser, config, vocab.n_rels) total_time = timedelta() best_e, best_metric = 1, Metric() ## prepare optimisers num_train_optimization_steps = int(config.epochs * len(train_loader)) warmup_steps = int(config.warmupproportion * num_train_optimization_steps) ## one for parsing parameters, one for BERT parameters if config.use_two_opts: model_nonbert = [] model_bert = [] layernorm_params = [ 'layernorm_key_layer', 'layernorm_value_layer', 'dp_relation_k', 'dp_relation_v' ] for name, param in parser.named_parameters(): if 'bert' in name and not any(nd in name for nd in layernorm_params): model_bert.append((name, param)) else: model_nonbert.append((name, param)) # Prepare optimizer and schedule (linear warmup and decay) for Non-bert parameters no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters_nonbert = [{ 'params': [ p for n, p in model_nonbert if not any(nd in n for nd in no_decay) ], 'weight_decay': config.weight_decay }, { 'params': [ p for n, p in model_nonbert if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] model.optimizer_nonbert = AdamW( optimizer_grouped_parameters_nonbert, lr=config.lr2) model.scheduler_nonbert = get_linear_schedule_with_warmup( model.optimizer_nonbert, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps) # Prepare optimizer and schedule (linear warmup and decay) for Bert parameters optimizer_grouped_parameters_bert = [{ 'params': [ p for n, p in model_bert if not any(nd in n for nd in no_decay) ], 'weight_decay': config.weight_decay }, { 'params': [p for n, p in model_bert if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] model.optimizer_bert = AdamW(optimizer_grouped_parameters_bert, lr=config.lr) model.scheduler_bert = get_linear_schedule_with_warmup( model.optimizer_bert, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps) else: # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in parser.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': config.weight_decay }, { 'params': [ p for n, p in parser.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] model.optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr) model.scheduler = get_linear_schedule_with_warmup( model.optimizer, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps) start_epoch = 1 ## load model, optimiser, and other parameters from a checkpoint if config.checkpoint: check_load = torch.load(config.main_path + config.model + config.modelname + "/checkpoint") if config.use_two_opts: model.optimizer_bert.load_state_dict( check_load['optimizer_bert']) model.optimizer_nonbert.load_state_dict( check_load['optimizer_nonbert']) model.scheduler_bert.load_state_dict( check_load['lr_schedule_bert']) model.scheduler_nonbert.load_state_dict( check_load['lr_schedule_nonbert']) start_epoch = check_load['epoch'] + 1 best_e = check_load['best_e'] best_metric = check_load['best_metric'] else: model.optimizer.load_state_dict(check_load['optimizer']) model.scheduler.load_state_dict(check_load['lr_schedule']) start_epoch = check_load['epoch'] + 1 best_e = check_load['best_e'] best_metric = check_load['best_metric'] f1 = open(config.model + config.modelname + "/baseline.txt", "a") f1.write("New Model:\n") f1.close() for epoch in range(start_epoch, config.epochs + 1): start = datetime.now() # train one epoch and update the parameters model.train(train_loader) print(f"Epoch {epoch} / {config.epochs}:") f1 = open(config.model + config.modelname + "/baseline.txt", "a") dev_metric = model.evaluate(dev_loader, config.punct) f1.write(str(epoch) + "\n") print(f"{'dev:':6} {dev_metric}") f1.write(f"{'dev:':6} {dev_metric}") f1.write("\n") f1.close() t = datetime.now() - start # save the model if it is the best so far if dev_metric > best_metric: best_e, best_metric = epoch, dev_metric print(config.model + config.modelname + "/model_weights") model.parser.save(config.model + config.modelname + "/model_weights") print(f"{t}s elapsed (saved)\n") else: print(f"{t}s elapsed\n") total_time += t if epoch - best_e >= config.patience: break ## save checkpoint if config.use_two_opts: checkpoint = { "epoch": epoch, "optimizer_bert": model.optimizer_bert.state_dict(), "lr_schedule_bert": model.scheduler_bert.state_dict(), "lr_schedule_nonbert": model.scheduler_nonbert.state_dict(), "optimizer_nonbert": model.optimizer_nonbert.state_dict(), 'best_metric': best_metric, 'best_e': best_e } torch.save( checkpoint, config.main_path + config.model + config.modelname + "/checkpoint") parser.save(config.main_path + config.model + config.modelname + "/parser-checkpoint") else: checkpoint = { "epoch": epoch, "optimizer": model.optimizer.state_dict(), "lr_schedule": model.scheduler.state_dict(), 'best_metric': best_metric, 'best_e': best_e } torch.save( checkpoint, config.main_path + config.model + config.modelname + "/checkpoint") parser.save(config.main_path + config.model + config.modelname + "/parser-checkpoint") model.parser = Parser.load(config.model + config.modelname + "/model_weights") metric = model.evaluate(test_loader, config.punct) print(metric) print(f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") print(f"the score of test at epoch {best_e} is {metric.score:.2%}") print(f"average time of each epoch is {total_time / epoch}s") print(f"{total_time}s elapsed")
def __call__(self, args): # override config from CLI parameters args = Config(args.conf).update(vars(args)) args.n_attentions = args.use_attentions # back compatibility # loads train corpus into self.trainset super().__call__(args) logger.info(f"Configuration parameters:\n{args}") #train = Corpus.load(args.ftrain, self.fields, args.max_sent_length) train = self.trainset dev = Corpus.load(args.fdev, self.fields, args.max_sent_length) if args.ftest: test = Corpus.load(args.ftest, self.fields, args.max_sent_length) train = TextDataset(train, self.fields, args.buckets) dev = TextDataset(dev, self.fields, args.buckets) if args.ftest: test = TextDataset(test, self.fields, args.buckets) # set the data loaders train.loader = batchify(train, args.batch_size, True) dev.loader = batchify(dev, args.batch_size) if args.ftest: test.loader = batchify(test, args.batch_size) logger.info(f"{'train:':6} {len(train):5} sentences, " f"{len(train.loader):3} batches, " f"{len(train.buckets)} buckets") logger.info(f"{'dev:':6} {len(dev):5} sentences, " f"{len(dev.loader):3} batches, " f"{len(train.buckets)} buckets") if args.ftest: logger.info(f"{'test:':6} {len(test):5} sentences, " f"{len(test.loader):3} batches, " f"{len(train.buckets)} buckets") logger.info("Create the model") self.model = Model(args, mask_token_id=self.FEAT.mask_token_id) if self.WORD: self.model.load_pretrained(self.WORD.embed) self.model = self.model.to(args.device) if torch.cuda.device_count() > 1: self.model = TransparentDataParallel(self.model) logger.info(f"{self.model}\n") if args.optimizer == 'adamw': self.optimizer = AdamW(self.model.parameters(), args.lr, (args.mu, args.nu), args.epsilon, args.decay) training_steps = len(train.loader) // self.args.accumulation_steps \ * self.args.epochs warmup_steps = math.ceil(training_steps * self.args.warmup_steps_ratio) self.scheduler = get_linear_schedule_with_warmup( self.optimizer, num_warmup_steps=warmup_steps, num_training_steps=training_steps) else: self.optimizer = Adam(self.model.parameters(), args.lr, (args.mu, args.nu), args.epsilon) self.scheduler = ExponentialLR(self.optimizer, args.decay**(1 / args.decay_steps)) total_time = timedelta() best_e, best_metric = 1, Metric() for epoch in range(1, args.epochs + 1): start = datetime.now() logger.info(f"Epoch {epoch} / {args.epochs}:") loss, train_metric = self.train(train.loader) logger.info(f"{'train:':6} Loss: {loss:.4f} {train_metric}") loss, dev_metric = self.evaluate(dev.loader) logger.info(f"{'dev:':6} Loss: {loss:.4f} {dev_metric}") if args.ftest: loss, test_metric = self.evaluate(test.loader) logger.info(f"{'test:':6} Loss: {loss:.4f} {test_metric}") t = datetime.now() - start # save the model if it is the best so far if dev_metric > best_metric and epoch > args.patience // 10: best_e, best_metric = epoch, dev_metric if hasattr(self.model, 'module'): self.model.module.save(args.model) else: self.model.save(args.model) logger.info(f"{t}s elapsed (saved)\n") else: logger.info(f"{t}s elapsed\n") total_time += t if epoch - best_e >= args.patience: break self.model = Model.load(args.model) if args.ftest: loss, metric = self.evaluate(test.loader) logger.info( f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") if args.ftest: logger.info( f"the score of test at epoch {best_e} is {metric.score:.2%}") logger.info(f"average time of each epoch is {total_time / epoch}s") logger.info(f"{total_time}s elapsed")
def __call__(self, args): super(Train, self).__call__(args) train = Corpus.load(args.ftrain, self.fields) dev = Corpus.load(args.fdev, self.fields) test = Corpus.load(args.ftest, self.fields) train = TextDataset(train, self.fields, args.buckets) dev = TextDataset(dev, self.fields, args.buckets) test = TextDataset(test, self.fields, args.buckets) # set the data loaders train.loader = batchify(train, args.batch_size, True) dev.loader = batchify(dev, args.batch_size) test.loader = batchify(test, args.batch_size) print(f"{'train:':6} {len(train):5} sentences, " f"{len(train.loader):3} batches, " f"{len(train.buckets)} buckets") print(f"{'dev:':6} {len(dev):5} sentences, " f"{len(dev.loader):3} batches, " f"{len(train.buckets)} buckets") print(f"{'test:':6} {len(test):5} sentences, " f"{len(test.loader):3} batches, " f"{len(train.buckets)} buckets") print("Create the model") self.model = Model(args).load_pretrained(self.WORD.embed) print(f"{self.model}\n") self.model = self.model.to(args.device) if torch.cuda.device_count() > 1: self.model = nn.DataParallel(self.model) self.optimizer = Adam(self.model.parameters(), args.lr, (args.mu, args.nu), args.epsilon) self.scheduler = ExponentialLR(self.optimizer, args.decay**(1 / args.decay_steps)) total_time = timedelta() best_e, best_metric = 1, Metric() for epoch in range(1, args.epochs + 1): start = datetime.now() # train one epoch and update the parameters self.train(train.loader) print(f"Epoch {epoch} / {args.epochs}:") loss, train_metric = self.evaluate(train.loader) print(f"{'train:':6} Loss: {loss:.4f} {train_metric}") loss, dev_metric = self.evaluate(dev.loader) print(f"{'dev:':6} Loss: {loss:.4f} {dev_metric}") loss, test_metric = self.evaluate(test.loader) print(f"{'test:':6} Loss: {loss:.4f} {test_metric}") t = datetime.now() - start # save the model if it is the best so far if dev_metric > best_metric and epoch > args.patience: best_e, best_metric = epoch, dev_metric if hasattr(self.model, 'module'): self.model.module.save(args.model) else: self.model.save(args.model) print(f"{t}s elapsed (saved)\n") else: print(f"{t}s elapsed\n") total_time += t if epoch - best_e >= args.patience: break if hasattr(self.model, 'module'): self.model.module.save(args.model) else: self.model.save(args.model) print(f"{t}s elapsed (saved)\n") self.model = Model.load(args.model) loss, metric = self.evaluate(test.loader) print(f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") print(f"the score of test at epoch {best_e} is {metric.score:.2%}") print(f"average time of each epoch is {total_time / epoch}s") print(f"{total_time}s elapsed")
def __call__(self, config): print("Preprocess the data") if config.input_type == "conllu": train = UniversalDependenciesDatasetReader() train.load(config.ftrain) dev = UniversalDependenciesDatasetReader() dev.load(config.fdev) test = UniversalDependenciesDatasetReader() test.load(config.ftest) else: train = Corpus.load(config.ftrain) dev = Corpus.load(config.fdev) test = Corpus.load(config.ftest) if config.use_predicted: if config.input_type == "conllu": train_predicted = UniversalDependenciesDatasetReader() train_predicted.load(config.fpredicted_train) dev_predicted = UniversalDependenciesDatasetReader() dev_predicted.load(config.fpredicted_dev) test_predicted = UniversalDependenciesDatasetReader() test_predicted.load(config.fpredicted_test) else: train_predicted = Corpus.load(config.fpredicted_train) dev_predicted = Corpus.load(config.fpredicted_dev) test_predicted = Corpus.load(config.fpredicted_test) if path.exists(config.main_path + "/exp") != True: os.mkdir(config.main_path + "/exp") if path.exists(config.main_path + "/model") != True: os.mkdir(config.main_path + "/model") if path.exists(config.main_path + config.model + config.modelname) != True: os.mkdir(config.main_path + config.model + config.modelname) vocab = Vocab.from_corpus(config=config, corpus=train, min_freq=2) torch.save(vocab, config.main_path + config.vocab + config.modelname + "/vocab.tag") config.update({ 'n_words': vocab.n_train_words, 'n_tags': vocab.n_tags, 'n_rels': vocab.n_rels, 'pad_index': vocab.pad_index, 'unk_index': vocab.unk_index }) print("Load the dataset") if config.use_predicted: trainset = TextDataset(vocab.numericalize(train, train_predicted)) devset = TextDataset(vocab.numericalize(dev, dev_predicted)) testset = TextDataset(vocab.numericalize(test, test_predicted)) else: trainset = TextDataset(vocab.numericalize(train)) devset = TextDataset(vocab.numericalize(dev)) testset = TextDataset(vocab.numericalize(test)) # set the data loaders train_loader, _ = batchify(dataset=trainset, batch_size=config.batch_size, n_buckets=config.buckets, shuffle=True) dev_loader, _ = batchify(dataset=devset, batch_size=config.batch_size, n_buckets=config.buckets) test_loader, _ = batchify(dataset=testset, batch_size=config.batch_size, n_buckets=config.buckets) print(f"{'train:':6} {len(trainset):5} sentences in total, " f"{len(train_loader):3} batches provided") print(f"{'dev:':6} {len(devset):5} sentences in total, " f"{len(dev_loader):3} batches provided") print(f"{'test:':6} {len(testset):5} sentences in total, " f"{len(test_loader):3} batches provided") print("Create the model") parser = BiaffineParser(config, vocab.n_rels, vocab.bertmodel) print("number of pars:{}".format(sum(p.numel() for p in parser.parameters() if p.requires_grad))) if torch.cuda.is_available(): print('device:cuda') device = torch.device('cuda') parser = parser.to(device) # print(f"{parser}\n") model = Model(vocab, parser, config, vocab.n_rels) total_time = timedelta() best_e, best_metric = 1, Metric() num_train_optimization_steps = int(config.num_iter_encoder * config.epochs * len(train_loader)) warmup_steps = int(config.warmupproportion * num_train_optimization_steps) if config.use_two_opts: model_nonbert = [] model_bert = [] layernorm_params = ['layernorm_key_layer', 'layernorm_value_layer', 'dp_relation_k', 'dp_relation_v'] for name, param in parser.named_parameters(): if 'bert' in name and not any(nd in name for nd in layernorm_params): model_bert.append((name, param)) else: model_nonbert.append((name, param)) # Prepare optimizer and schedule (linear warmup and decay) for Non-bert parameters no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters_nonbert = [ {'params': [p for n, p in model_nonbert if not any(nd in n for nd in no_decay)], 'weight_decay': config.weight_decay}, {'params': [p for n, p in model_nonbert if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] model.optimizer_nonbert = AdamW(optimizer_grouped_parameters_nonbert, lr=config.lr2) model.scheduler_nonbert = get_linear_schedule_with_warmup(model.optimizer_nonbert, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps) # Prepare optimizer and schedule (linear warmup and decay) for Bert parameters optimizer_grouped_parameters_bert = [ {'params': [p for n, p in model_bert if not any(nd in n for nd in no_decay)], 'weight_decay': config.weight_decay}, {'params': [p for n, p in model_bert if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] model.optimizer_bert = AdamW(optimizer_grouped_parameters_bert, lr=config.lr1) model.scheduler_bert = get_linear_schedule_with_warmup( model.optimizer_bert, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps ) else: # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in parser.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': config.weight_decay}, {'params': [p for n, p in parser.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] model.optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr1) model.scheduler = get_linear_schedule_with_warmup( model.optimizer, num_warmup_steps=warmup_steps, num_training_steps=num_train_optimization_steps ) for epoch in range(1, config.epochs + 1): start = datetime.now() # train one epoch and update the parameters if config.use_predicted: model.train_predicted(train_loader) else: model.train(train_loader) print(f"Epoch {epoch} / {config.epochs}:") if config.use_predicted: loss, dev_metric = model.evaluate_predicted(dev_loader, config.punct) else: loss, dev_metric = model.evaluate(dev_loader, config.punct) print(f"{'dev:':6} Loss: {loss:.4f} {dev_metric}") if config.use_predicted: loss, test_metric = model.evaluate_predicted(test_loader, config.punct) else: loss, test_metric = model.evaluate(test_loader, config.punct) print(f"{'test:':6} Loss: {loss:.4f} {test_metric}") t = datetime.now() - start # save the model if it is the best so far if dev_metric > best_metric: best_e, best_metric = epoch, dev_metric print(config.model + config.modelname + "/model_weights") model.parser.save(config.main_path + config.model + config.modelname + "/model_weights") print(f"{t}s elapsed (saved)\n") else: print(f"{t}s elapsed\n") total_time += t if epoch - best_e >= config.patience: break model.parser = BiaffineParser.load(config.main_path + config.model + config.modelname + "/model_weights") if config.use_predicted: loss, metric = model.evaluate_predicted(test_loader, config.punct) else: loss, metric = model.evaluate(test_loader, config.punct) print(metric) print(f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") print(f"the score of test at epoch {best_e} is {metric.score:.2%}") print(f"average time of each epoch is {total_time / epoch}s") print(f"{total_time}s elapsed")
def __call__(self, config): if not os.path.exists(config.file): os.mkdir(config.file) if config.preprocess or not os.path.exists(config.vocab): print("Preprocess the corpus") pos_train = Corpus.load(config.fptrain, [1, 4], config.pos) dep_train = Corpus.load(config.ftrain) pos_dev = Corpus.load(config.fpdev, [1, 4]) dep_dev = Corpus.load(config.fdev) pos_test = Corpus.load(config.fptest, [1, 4]) dep_test = Corpus.load(config.ftest) print("Create the vocab") vocab = Vocab.from_corpora(pos_train, dep_train, 2) vocab.read_embeddings(Embedding.load(config.fembed)) print("Load the dataset") pos_trainset = TextDataset(vocab.numericalize(pos_train, False), config.buckets) dep_trainset = TextDataset(vocab.numericalize(dep_train), config.buckets) pos_devset = TextDataset(vocab.numericalize(pos_dev, False), config.buckets) dep_devset = TextDataset(vocab.numericalize(dep_dev), config.buckets) pos_testset = TextDataset(vocab.numericalize(pos_test, False), config.buckets) dep_testset = TextDataset(vocab.numericalize(dep_test), config.buckets) torch.save(vocab, config.vocab) torch.save(pos_trainset, os.path.join(config.file, 'pos_trainset')) torch.save(dep_trainset, os.path.join(config.file, 'dep_trainset')) torch.save(pos_devset, os.path.join(config.file, 'pos_devset')) torch.save(dep_devset, os.path.join(config.file, 'dep_devset')) torch.save(pos_testset, os.path.join(config.file, 'pos_testset')) torch.save(dep_testset, os.path.join(config.file, 'dep_testset')) else: print("Load the vocab") vocab = torch.load(config.vocab) print("Load the datasets") pos_trainset = torch.load(os.path.join(config.file, 'pos_trainset')) dep_trainset = torch.load(os.path.join(config.file, 'dep_trainset')) pos_devset = torch.load(os.path.join(config.file, 'pos_devset')) dep_devset = torch.load(os.path.join(config.file, 'dep_devset')) pos_testset = torch.load(os.path.join(config.file, 'pos_testset')) dep_testset = torch.load(os.path.join(config.file, 'dep_testset')) config.update({ 'n_words': vocab.n_init, 'n_chars': vocab.n_chars, 'n_pos_tags': vocab.n_pos_tags, 'n_dep_tags': vocab.n_dep_tags, 'n_rels': vocab.n_rels, 'pad_index': vocab.pad_index, 'unk_index': vocab.unk_index }) # set the data loaders pos_train_loader = batchify( pos_trainset, config.pos_batch_size // config.update_steps, True) dep_train_loader = batchify(dep_trainset, config.batch_size // config.update_steps, True) pos_dev_loader = batchify(pos_devset, config.pos_batch_size) dep_dev_loader = batchify(dep_devset, config.batch_size) pos_test_loader = batchify(pos_testset, config.pos_batch_size) dep_test_loader = batchify(dep_testset, config.batch_size) print(vocab) print(f"{'pos_train:':10} {len(pos_trainset):7} sentences in total, " f"{len(pos_train_loader):4} batches provided") print(f"{'dep_train:':10} {len(dep_trainset):7} sentences in total, " f"{len(dep_train_loader):4} batches provided") print(f"{'pos_dev:':10} {len(pos_devset):7} sentences in total, " f"{len(pos_dev_loader):4} batches provided") print(f"{'dep_dev:':10} {len(dep_devset):7} sentences in total, " f"{len(dep_dev_loader):4} batches provided") print(f"{'pos_test:':10} {len(pos_testset):7} sentences in total, " f"{len(pos_test_loader):4} batches provided") print(f"{'dep_test:':10} {len(dep_testset):7} sentences in total, " f"{len(dep_test_loader):4} batches provided") print("Create the model") parser = BiaffineParser(config, vocab.embed).to(config.device) print(f"{parser}\n") model = Model(config, vocab, parser) total_time = timedelta() best_e, best_metric = 1, AttachmentMethod() model.optimizer = Adam(model.parser.parameters(), config.lr, (config.mu, config.nu), config.epsilon) model.scheduler = ExponentialLR(model.optimizer, config.decay**(1 / config.decay_steps)) for epoch in range(1, config.epochs + 1): start = datetime.now() # train one epoch and update the parameters model.train(pos_train_loader, dep_train_loader) print(f"Epoch {epoch} / {config.epochs}:") lp, ld, mp, mdt, mdp = model.evaluate(None, dep_train_loader) print(f"{'train:':6} LP: {lp:.4f} LD: {ld:.4f} {mp} {mdt} {mdp}") lp, ld, mp, mdt, dev_m = model.evaluate(pos_dev_loader, dep_dev_loader) print(f"{'dev:':6} LP: {lp:.4f} LD: {ld:.4f} {mp} {mdt} {dev_m}") lp, ld, mp, mdt, mdp = model.evaluate(pos_test_loader, dep_test_loader) print(f"{'test:':6} LP: {lp:.4f} LD: {ld:.4f} {mp} {mdt} {mdp}") t = datetime.now() - start # save the model if it is the best so far if dev_m > best_metric and epoch > config.patience: best_e, best_metric = epoch, dev_m model.parser.save(config.model) print(f"{t}s elapsed (saved)\n") else: print(f"{t}s elapsed\n") total_time += t if epoch - best_e >= config.patience: break model.parser = BiaffineParser.load(config.model) lp, ld, mp, mdt, mdp = model.evaluate(pos_test_loader, dep_test_loader) print(f"max score of dev is {best_metric.score:.2%} at epoch {best_e}") print(f"the score of test at epoch {best_e} is {mdp.score:.2%}") print(f"average time of each epoch is {total_time / epoch}s") print(f"{total_time}s elapsed")