示例#1
0
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    dataset = Dataset(args)
    vocab = dataset.get_vocab()

    if not os.path.isdir(args.output):
        assert not os.path.exists(args.output)
        os.makedirs(args.output)

    if args.glove is not None:
        glove_embeddings, glove_index = embeddings.load_glove_embeddings(
            path=args.glove,
            vocab=vocab,
            embedding_size=args.embedding_size,
        )

    model_cls, training_args_map = get_model_args_list(args, vocab)
    logging.info('Sweeper discovered %d different model configurations' %
                 len(training_args_map))

    best_valid_ppl = float('inf')
    best_training_args_name = None

    for training_args_name, training_args_value in tqdm.tqdm(
            training_args_map.items(),
            desc='Model Configurations',
            bar_format=TRAINING_TQDM_BAD_FORMAT,
    ):
        model_args_value = training_args_value.copy()
        del model_args_value['learning_rate']
        del model_args_value['momentum']
        del model_args_value['profile_memory_estimation_weight']
        model = model_cls(**model_args_value)

        if args.glove is not None:
            model.set_embeddings(glove_embeddings)
            if (args.profile_memory_attention is not None
                    and args.init_profile_memory_weights):
                model.init_embeddings_weights_using_glove_index(glove_index)

        engine = Engine(model=model,
                        vocab=vocab,
                        log_interval=None,
                        optimizer_params={
                            'optim': args.optimizer,
                            'learning_rate':
                            training_args_value['learning_rate'],
                            'momentum': training_args_value['momentum'],
                        },
                        verbose=False,
                        profile_memory_estimation_weight=training_args_value[
                            'profile_memory_estimation_weight'])
        engine.set_checkpoint_dir(
            checkpoint_dir=os.path.join(args.output, training_args_name),
            verbose=False,
        )

        valid_ppl = engine.full_training(
            num_epochs=args.num_epochs,
            dataset=dataset,
            verbose=False,
        )
        tqdm.tqdm.write('%s: %.5f' % (training_args_name, valid_ppl))

        if best_valid_ppl > valid_ppl:
            best_valid_ppl = valid_ppl
            best_training_args_name = training_args_name

    logging.info('Sweeping has finished with the best validation ppl %.5f' %
                 (best_valid_ppl))

    best_checkpoint_path = Engine.get_best_chechpoint(
        os.path.join(args.output, best_training_args_name))
    logging.info(
        'The best checkpoint %s. Picking up the model from there',
        best_checkpoint_path,
    )
    model = model_cls.create_from_checkpoint(best_checkpoint_path, args.gpu)
    engine.model = model
    for corpus, dl in dataset.get_test_and_valid_data_loaders_map().items():
        engine.valid(dl, corpus, use_progress_bar=False)
示例#2
0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = get_config(
    os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml"))
torch.manual_seed(config["seed"])
random.seed(config["seed"])
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

MODEL_PATH = os.path.join(os.getcwd(), "models", config["model_name"] + ".pt")

dataloader = Dataset(config["max_len"], config["max_size"],
                     config["batch_size"], config["pad_token"])
train_iterator, test_iterator, valid_iterator = dataloader.get_iterator()
print("Loaded iterator, generating vocab...")
vocab = dataloader.get_vocab()
tokenizer = dataloader.get_tokenizer()

pad_idx = vocab[config["pad_token"]]
input_dim = len(vocab)


def train(model, iterator, optimizer, criterion):
    ep_loss, ep_acc = 0, 0

    model.train()

    for labels, text, lengths in iterator:
        labels, text = labels.to(device), text.to(device)

        optimizer.zero_grad()