def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" ) preprocessor = get_preprocessor(dataset_config, coarse_split_fn=split_morphs, fine_split_fn=split_jamos) # model (restore) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = SAN(model_config.num_classes, preprocessor.coarse_vocab, preprocessor.fine_vocab, model_config.fine_embedding_dim, model_config.hidden_dim, model_config.multi_step, model_config.prediction_drop_ratio) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, preprocessor.preprocess) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4, collate_fn=batchify) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(exp_dir) summary = evaluate(model, dl, {"loss": log_loss, "acc": acc}, device) summary_manager.load("summary.json") summary_manager.update({f"{args.data}": summary}) summary_manager.save("summary.json") print(f"loss: {summary['loss']:.3f}, acc: {summary['acc']:.2%}")
def main(json_path): cwd = Path.cwd() with open(cwd / json_path) as io: params = json.loads(io.read()) # tokenizer vocab_path = params['filepath'].get('vocab') with open(cwd / vocab_path, mode='rb') as io: vocab = pickle.load(io) tokenizer = Tokenizer(vocab=vocab, split_fn=MeCab().morphs) # model (restore) save_path = cwd / params['filepath'].get('ckpt') ckpt = torch.load(save_path) num_classes = params['model'].get('num_classes') lstm_hidden_dim = params['model'].get('lstm_hidden_dim') hidden_dim = params['model'].get('hidden_dim') da = params['model'].get('da') r = params['model'].get('r') model = SAN(num_classes=num_classes, lstm_hidden_dim=lstm_hidden_dim, hidden_dim=hidden_dim, da=da, r=r, vocab=tokenizer.vocab) model.load_state_dict(ckpt['model_state_dict']) # evaluation batch_size = params['training'].get('batch_size') tr_path = cwd / params['filepath'].get('tr') val_path = cwd / params['filepath'].get('val') tr_ds = Corpus(tr_path, tokenizer.split_and_transform) tr_dl = DataLoader(tr_ds, batch_size=batch_size, num_workers=4, collate_fn=batchify) val_ds = Corpus(val_path, tokenizer.split_and_transform) val_dl = DataLoader(val_ds, batch_size=batch_size, num_workers=4, collate_fn=batchify) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) tr_acc = get_accuracy(model, tr_dl, device) val_acc = get_accuracy(model, val_dl, device) print('tr_acc: {:.2%}, val_acc: {:.2%}'.format(tr_acc, val_acc))
def main(args): dataset_config = Config(args.dataset_config) model_config = Config(args.model_config) exp_dir = Path("experiments") / model_config.type exp_dir = exp_dir.joinpath( f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}" ) tokenizer = get_tokenizer(dataset_config) # model (restore) checkpoint_manager = CheckpointManager(exp_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = SAN(num_classes=model_config.num_classes, lstm_hidden_dim=model_config.lstm_hidden_dim, da=model_config.da, r=model_config.r, hidden_dim=model_config.hidden_dim, vocab=tokenizer.vocab) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(dataset_config, args.data) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4, collate_fn=batchify) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(exp_dir) summary = evaluate(model, dl, { "loss": nn.CrossEntropyLoss(), "acc": acc }, device) summary_manager.load("summary.json") summary_manager.update({f"{args.data}": summary}) summary_manager.save("summary.json") print("loss: {:.3f}, acc: {:.2%}".format(summary["loss"], summary["acc"]))
with open(data_config.vocab, mode="rb") as io: vocab = pickle.load(io) tokenizer = Tokenizer(vocab, split_morphs) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint("best.tar") model = SAN( num_classes=model_config.num_classes, lstm_hidden_dim=model_config.lstm_hidden_dim, hidden_dim=model_config.hidden_dim, da=model_config.da, r=model_config.r, vocab=tokenizer.vocab, ) model.load_state_dict(checkpoint["model_state_dict"]) # evaluation filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader( ds, batch_size=model_config.batch_size, num_workers=4, collate_fn=batchify ) device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") model.to(device) summary_manager = SummaryManager(model_dir) summary = evaluate(model, dl, {"loss": nn.CrossEntropyLoss(), "acc": acc}, device) summary_manager.load("summary.json")
# tokenizer with open(data_config.vocab, mode='rb') as io: vocab = pickle.load(io) tokenizer = Tokenizer(vocab=vocab, split_fn=split_morphs) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint('best.tar') model = SAN(num_classes=model_config.num_classes, lstm_hidden_dim=model_config.lstm_hidden_dim, da=model_config.da, r=model_config.r, hidden_dim=model_config.hidden_dim, vocab=tokenizer.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation summary_manager = SummaryManager(model_dir) filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4, collate_fn=batchify) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary = evaluate(model, dl, {