def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" ) tokenizer = get_tokenizer(dataset_config, model_config) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = CharCNN(num_classes=model_config.num_classes, embedding_dim=model_config.embedding_dim, vocab=tokenizer.vocab) model.load_state_dict(checkpoint["model_state_dict"]) summary_manager = SummaryManager(exp_dir) filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary = evaluate(model, dl, { "loss": nn.CrossEntropyLoss(), "acc": acc }, device) summary_manager.load("summary.json") summary_manager.update({f"{args.data}": summary}) summary_manager.save("summary.json") print(f"loss: {summary['loss']:.3f}, acc: {summary['acc']:.2%}")
def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" ) preprocessor = get_preprocessor(dataset_config, coarse_split_fn=split_morphs, fine_split_fn=split_jamos) # model (restore) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = SAN(model_config.num_classes, preprocessor.coarse_vocab, preprocessor.fine_vocab, model_config.fine_embedding_dim, model_config.hidden_dim, model_config.multi_step, model_config.prediction_drop_ratio) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4, collate_fn=batchify) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(exp_dir) summary = evaluate(model, dl, {"loss": log_loss, "acc": acc}, device) summary_manager.load("summary.json") summary_manager.update({f"{args.data}": summary}) summary_manager.save("summary.json") print(f"loss: {summary['loss']:.3f}, acc: {summary['acc']:.2%}")
def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" f"_teacher_forcing_ratio_{args.teacher_forcing_ratio}") src_processor, tgt_processor = get_processor(dataset_config) # model (restore) encoder = BidiEncoder(src_processor.vocab, model_config.encoder_hidden_dim, model_config.drop_ratio) decoder = AttnDecoder( tgt_processor.vocab, model_config.method, model_config.encoder_hidden_dim * 2, model_config.decoder_hidden_dim, model_config.drop_ratio, ) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") encoder.load_state_dict(checkpoint["encoder_state_dict"]) decoder.load_state_dict(checkpoint["decoder_state_dict"]) encoder.eval() decoder.eval() # evaluation summary_manager = SummaryManager(exp_dir) filepath = getattr(dataset_config, args.data) ds = NMTCorpus(filepath, src_processor.process, tgt_processor.process) dl = DataLoader( ds, args.batch_size, shuffle=False, num_workers=4, collate_fn=batchify, drop_last=False, ) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") encoder.to(device) decoder.to(device) loss = evaluate(encoder, decoder, tgt_processor.vocab, dl, device) summary = {"perplexity": np.exp(loss)} summary_manager.load("summary.json") summary_manager.update({"{}".format(args.data): summary}) summary_manager.save("summary.json") print("perplexity: {:.3f}".format(np.exp(loss)))
def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" ) tokenizer = get_tokenizer(dataset_config) # model (restore) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = SAN(num_classes=model_config.num_classes, lstm_hidden_dim=model_config.lstm_hidden_dim, da=model_config.da, r=model_config.r, hidden_dim=model_config.hidden_dim, vocab=tokenizer.vocab) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4, collate_fn=batchify) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(exp_dir) summary = evaluate(model, dl, { "loss": nn.CrossEntropyLoss(), "acc": acc }, device) summary_manager.load("summary.json") summary_manager.update({f"{args.data}": summary}) summary_manager.save("summary.json") print("loss: {:.3f}, acc: {:.2%}".format(summary["loss"], summary["acc"]))
def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) ptr_config_info = Config(f"conf/pretrained/{model_config.type}.json") exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" f"_weight_decay_{args.weight_decay}") preprocessor = get_preprocessor(ptr_config_info, model_config) with open(ptr_config_info.config, mode="r") as io: ptr_config = json.load(io) # model (restore) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint('best.tar') config = BertConfig() config.update(ptr_config) model = SentenceClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary_manager = SummaryManager(exp_dir) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device) summary_manager.load('summary.json') summary_manager.update({'{}'.format(args.data): summary}) summary_manager.save('summary.json') print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
# model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint('best_{}.tar'.format( args.type)) config = BertConfig(ptr_config.config) model = PairwiseClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device) summary_manager.load('summary_{}.json'.format(args.type)) summary_manager.update({'{}'.format(args.dataset): summary}) summary_manager.save('summary_{}.json'.format(args.type)) print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
pad_fn=pad_sequence) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint(args.restore_file + '.tar') model = CharCNN(num_classes=model_config.num_classes, embedding_dim=model_config.embedding_dim, vocab=tokenizer.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation summary_manager = SummaryManager(model_dir) filepath = getattr(data_config, args.data_name) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device) summary_manager.load('summary.json') summary_manager.update({'{}'.format(args.data_name): summary}) summary_manager.save('summary.json') print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
model = MaLSTM( num_classes=model_config.num_classes, hidden_dim=model_config.hidden_dim, vocab=tokenizer.vocab, ) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4, collate_fn=batchify) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, { "loss": nn.CrossEntropyLoss(), "acc": acc }, device) summary_manager.load("summary.json") summary_manager.update({"{}".format(args.dataset): summary}) summary_manager.save("summary.json") print("loss: {:.3f}, acc: {:.2%}".format(summary["loss"], summary["acc"]))
help='Set GPU for training') if __name__ == '__main__': args = parser.parse_args() data_dir = Path(args.data_dir) / args.data restore_dir = Path(args.restore_dir) / args.data assert args.data in ['wikidatasets', 'fb15k'], "Invalid knowledge graph dataset" if args.data == 'wikidatasets': data_dir = data_dir / args.which restore_dir = restore_dir / args.which restore_dir = restore_dir / args.model summary_manager = SummaryManager(restore_dir) summary_manager.load(f'summary_{args.model}.json') previous_summary = summary_manager.summary ent_dim = previous_summary['Experiment Summary']['entity dimension'] rel_dim = previous_summary['Experiment Summary']['relation dimension'] limit = previous_summary['Experiment Summary']['limit'] margin = previous_summary['Experiment Summary']['margin'] with open(data_dir / 'kg_test.pkl', mode='rb') as io: kg_test = pickle.load(io) with open(data_dir / 'kg_valid.pkl', mode='rb') as io: kg_valid = pickle.load(io) # restore model assert args.model in ['TransE', 'TransR', 'DistMult', 'TransD'], "Invalid Knowledge Graph Embedding Model" if args.model == 'TransE':
checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint('best_snu_{}.tar'.format( args.pretrained_config)) config = BertConfig(ptr_config.config) model = SentenceClassifier(config, num_classes=model_config.num_classes, vocab=preprocessor.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device) summary_manager.load('summary_snu_{}.json'.format(args.pretrained_config)) summary_manager.update({'{}'.format(args.dataset): summary}) summary_manager.save('summary_snu_{}.json'.format(args.pretrained_config)) print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))