Exemplo n.º 1
0
def main(args):
    dataset_config = Config(args.dataset_config)
    model_config = Config(args.model_config)
    exp_dir = Path("experiments") / model_config.type
    exp_dir = exp_dir.joinpath(
        f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}"
    )

    tokenizer = get_tokenizer(dataset_config, model_config)

    checkpoint_manager = CheckpointManager(exp_dir)
    checkpoint = checkpoint_manager.load_checkpoint("best.tar")
    model = CharCNN(num_classes=model_config.num_classes,
                    embedding_dim=model_config.embedding_dim,
                    vocab=tokenizer.vocab)
    model.load_state_dict(checkpoint["model_state_dict"])

    summary_manager = SummaryManager(exp_dir)
    filepath = getattr(dataset_config, args.data)
    ds = Corpus(filepath, tokenizer.split_and_transform)
    dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    summary = evaluate(model, dl, {
        "loss": nn.CrossEntropyLoss(),
        "acc": acc
    }, device)

    summary_manager.load("summary.json")
    summary_manager.update({f"{args.data}": summary})
    summary_manager.save("summary.json")
    print(f"loss: {summary['loss']:.3f}, acc: {summary['acc']:.2%}")
Exemplo n.º 2
0
def main(args):
    dataset_config = Config(args.dataset_config)
    model_config = Config(args.model_config)

    exp_dir = Path("experiments") / model_config.type
    exp_dir = exp_dir.joinpath(
        f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}"
    )

    preprocessor = get_preprocessor(dataset_config, coarse_split_fn=split_morphs, fine_split_fn=split_jamos)

    # model (restore)
    checkpoint_manager = CheckpointManager(exp_dir)
    checkpoint = checkpoint_manager.load_checkpoint("best.tar")
    model = SAN(model_config.num_classes, preprocessor.coarse_vocab, preprocessor.fine_vocab,
                model_config.fine_embedding_dim, model_config.hidden_dim, model_config.multi_step,
                model_config.prediction_drop_ratio)
    model.load_state_dict(checkpoint["model_state_dict"])

    # evaluation
    filepath = getattr(dataset_config, args.data)
    ds = Corpus(filepath, preprocessor.preprocess)
    dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4, collate_fn=batchify)

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    summary_manager = SummaryManager(exp_dir)
    summary = evaluate(model, dl, {"loss": log_loss, "acc": acc}, device)

    summary_manager.load("summary.json")
    summary_manager.update({f"{args.data}": summary})
    summary_manager.save("summary.json")

    print(f"loss: {summary['loss']:.3f}, acc: {summary['acc']:.2%}")
Exemplo n.º 3
0
def main(args):
    dataset_config = Config(args.dataset_config)
    model_config = Config(args.model_config)

    exp_dir = Path("experiments") / model_config.type
    exp_dir = exp_dir.joinpath(
        f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}"
        f"_teacher_forcing_ratio_{args.teacher_forcing_ratio}")

    src_processor, tgt_processor = get_processor(dataset_config)

    # model (restore)
    encoder = BidiEncoder(src_processor.vocab, model_config.encoder_hidden_dim,
                          model_config.drop_ratio)
    decoder = AttnDecoder(
        tgt_processor.vocab,
        model_config.method,
        model_config.encoder_hidden_dim * 2,
        model_config.decoder_hidden_dim,
        model_config.drop_ratio,
    )

    checkpoint_manager = CheckpointManager(exp_dir)
    checkpoint = checkpoint_manager.load_checkpoint("best.tar")
    encoder.load_state_dict(checkpoint["encoder_state_dict"])
    decoder.load_state_dict(checkpoint["decoder_state_dict"])

    encoder.eval()
    decoder.eval()

    # evaluation
    summary_manager = SummaryManager(exp_dir)
    filepath = getattr(dataset_config, args.data)
    ds = NMTCorpus(filepath, src_processor.process, tgt_processor.process)
    dl = DataLoader(
        ds,
        args.batch_size,
        shuffle=False,
        num_workers=4,
        collate_fn=batchify,
        drop_last=False,
    )

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    encoder.to(device)
    decoder.to(device)

    loss = evaluate(encoder, decoder, tgt_processor.vocab, dl, device)
    summary = {"perplexity": np.exp(loss)}
    summary_manager.load("summary.json")
    summary_manager.update({"{}".format(args.data): summary})
    summary_manager.save("summary.json")
    print("perplexity: {:.3f}".format(np.exp(loss)))
Exemplo n.º 4
0
def main(args):
    dataset_config = Config(args.dataset_config)
    model_config = Config(args.model_config)

    exp_dir = Path("experiments") / model_config.type
    exp_dir = exp_dir.joinpath(
        f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}"
    )

    tokenizer = get_tokenizer(dataset_config)

    # model (restore)
    checkpoint_manager = CheckpointManager(exp_dir)
    checkpoint = checkpoint_manager.load_checkpoint("best.tar")
    model = SAN(num_classes=model_config.num_classes,
                lstm_hidden_dim=model_config.lstm_hidden_dim,
                da=model_config.da,
                r=model_config.r,
                hidden_dim=model_config.hidden_dim,
                vocab=tokenizer.vocab)
    model.load_state_dict(checkpoint["model_state_dict"])

    # evaluation
    filepath = getattr(dataset_config, args.data)
    ds = Corpus(filepath, tokenizer.split_and_transform)
    dl = DataLoader(ds,
                    batch_size=args.batch_size,
                    num_workers=4,
                    collate_fn=batchify)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    summary_manager = SummaryManager(exp_dir)
    summary = evaluate(model, dl, {
        "loss": nn.CrossEntropyLoss(),
        "acc": acc
    }, device)

    summary_manager.load("summary.json")
    summary_manager.update({f"{args.data}": summary})
    summary_manager.save("summary.json")

    print("loss: {:.3f}, acc: {:.2%}".format(summary["loss"], summary["acc"]))
Exemplo n.º 5
0
def main(args):
    dataset_config = Config(args.dataset_config)
    model_config = Config(args.model_config)
    ptr_config_info = Config(f"conf/pretrained/{model_config.type}.json")

    exp_dir = Path("experiments") / model_config.type
    exp_dir = exp_dir.joinpath(
        f"epochs_{args.epochs}_batch_size_{args.batch_size}_learning_rate_{args.learning_rate}"
        f"_weight_decay_{args.weight_decay}")

    preprocessor = get_preprocessor(ptr_config_info, model_config)

    with open(ptr_config_info.config, mode="r") as io:
        ptr_config = json.load(io)

    # model (restore)
    checkpoint_manager = CheckpointManager(exp_dir)
    checkpoint = checkpoint_manager.load_checkpoint('best.tar')
    config = BertConfig()
    config.update(ptr_config)
    model = SentenceClassifier(config,
                               num_classes=model_config.num_classes,
                               vocab=preprocessor.vocab)
    model.load_state_dict(checkpoint['model_state_dict'])

    # evaluation
    filepath = getattr(dataset_config, args.data)
    ds = Corpus(filepath, preprocessor.preprocess)
    dl = DataLoader(ds, batch_size=args.batch_size, num_workers=4)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    summary_manager = SummaryManager(exp_dir)
    summary = evaluate(model, dl, {
        'loss': nn.CrossEntropyLoss(),
        'acc': acc
    }, device)

    summary_manager.load('summary.json')
    summary_manager.update({'{}'.format(args.data): summary})
    summary_manager.save('summary.json')

    print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
Exemplo n.º 6
0
    # model (restore)
    checkpoint_manager = CheckpointManager(model_dir)
    checkpoint = checkpoint_manager.load_checkpoint('best_{}.tar'.format(
        args.type))
    config = BertConfig(ptr_config.config)
    model = PairwiseClassifier(config,
                               num_classes=model_config.num_classes,
                               vocab=preprocessor.vocab)
    model.load_state_dict(checkpoint['model_state_dict'])

    # evaluation
    filepath = getattr(data_config, args.dataset)
    ds = Corpus(filepath, preprocessor.preprocess)
    dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    summary_manager = SummaryManager(model_dir)
    summary = evaluate(model, dl, {
        'loss': nn.CrossEntropyLoss(),
        'acc': acc
    }, device)

    summary_manager.load('summary_{}.json'.format(args.type))
    summary_manager.update({'{}'.format(args.dataset): summary})
    summary_manager.save('summary_{}.json'.format(args.type))

    print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
Exemplo n.º 7
0
                          pad_fn=pad_sequence)

    # model (restore)
    checkpoint_manager = CheckpointManager(model_dir)
    checkpoint = checkpoint_manager.load_checkpoint(args.restore_file + '.tar')
    model = CharCNN(num_classes=model_config.num_classes,
                    embedding_dim=model_config.embedding_dim,
                    vocab=tokenizer.vocab)
    model.load_state_dict(checkpoint['model_state_dict'])

    # evaluation
    summary_manager = SummaryManager(model_dir)
    filepath = getattr(data_config, args.data_name)
    ds = Corpus(filepath, tokenizer.split_and_transform)
    dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    summary = evaluate(model, dl, {
        'loss': nn.CrossEntropyLoss(),
        'acc': acc
    }, device)

    summary_manager.load('summary.json')
    summary_manager.update({'{}'.format(args.data_name): summary})
    summary_manager.save('summary.json')

    print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))
Exemplo n.º 8
0
    model = MaLSTM(
        num_classes=model_config.num_classes,
        hidden_dim=model_config.hidden_dim,
        vocab=tokenizer.vocab,
    )
    model.load_state_dict(checkpoint["model_state_dict"])

    # evaluation
    filepath = getattr(data_config, args.dataset)
    ds = Corpus(filepath, tokenizer.split_and_transform)
    dl = DataLoader(ds,
                    batch_size=model_config.batch_size,
                    num_workers=4,
                    collate_fn=batchify)

    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    summary_manager = SummaryManager(model_dir)
    summary = evaluate(model, dl, {
        "loss": nn.CrossEntropyLoss(),
        "acc": acc
    }, device)

    summary_manager.load("summary.json")
    summary_manager.update({"{}".format(args.dataset): summary})
    summary_manager.save("summary.json")

    print("loss: {:.3f}, acc: {:.2%}".format(summary["loss"], summary["acc"]))
Exemplo n.º 9
0
                                   help='Set GPU for training')

if __name__ == '__main__':
    args = parser.parse_args()
    data_dir = Path(args.data_dir) / args.data
    restore_dir = Path(args.restore_dir) / args.data

    assert args.data in ['wikidatasets',
                         'fb15k'], "Invalid knowledge graph dataset"
    if args.data == 'wikidatasets':
        data_dir = data_dir / args.which
        restore_dir = restore_dir / args.which
    restore_dir = restore_dir / args.model

    summary_manager = SummaryManager(restore_dir)
    summary_manager.load(f'summary_{args.model}.json')
    previous_summary = summary_manager.summary
    ent_dim = previous_summary['Experiment Summary']['entity dimension']
    rel_dim = previous_summary['Experiment Summary']['relation dimension']
    limit = previous_summary['Experiment Summary']['limit']
    margin = previous_summary['Experiment Summary']['margin']

    with open(data_dir / 'kg_test.pkl', mode='rb') as io:
        kg_test = pickle.load(io)
    with open(data_dir / 'kg_valid.pkl', mode='rb') as io:
        kg_valid = pickle.load(io)

    # restore model
    assert args.model in ['TransE', 'TransR', 'DistMult',
                          'TransD'], "Invalid Knowledge Graph Embedding Model"
    if args.model == 'TransE':
Exemplo n.º 10
0
    checkpoint_manager = CheckpointManager(model_dir)
    checkpoint = checkpoint_manager.load_checkpoint('best_snu_{}.tar'.format(
        args.pretrained_config))

    config = BertConfig(ptr_config.config)
    model = SentenceClassifier(config,
                               num_classes=model_config.num_classes,
                               vocab=preprocessor.vocab)
    model.load_state_dict(checkpoint['model_state_dict'])

    # evaluation
    filepath = getattr(data_config, args.dataset)
    ds = Corpus(filepath, preprocessor.preprocess)
    dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    summary_manager = SummaryManager(model_dir)
    summary = evaluate(model, dl, {
        'loss': nn.CrossEntropyLoss(),
        'acc': acc
    }, device)

    summary_manager.load('summary_snu_{}.json'.format(args.pretrained_config))
    summary_manager.update({'{}'.format(args.dataset): summary})
    summary_manager.save('summary_snu_{}.json'.format(args.pretrained_config))

    print('loss: {:.3f}, acc: {:.2%}'.format(summary['loss'], summary['acc']))