Пример #1
0
def validate(args, model: CharRNN, criterion, char_to_id, pbar=False):
    model.eval()
    valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8')
    batch_size = 1
    window_size = 4096
    hidden = model.init_hidden(batch_size)
    total_loss = n_chars = 0
    total_word_loss = n_words = 0
    r = tqdm.trange if pbar else range
    for idx in r(
            0, min(args.valid_chars or len(valid_corpus),
                   len(valid_corpus) - 1), window_size):
        chunk = valid_corpus[idx:idx + window_size + 1]
        inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0),
                          volatile=True)
        targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0))
        losses = []
        for c in range(inputs.size(1)):
            output, hidden = model(inputs[:, c], hidden)
            loss = criterion(output.view(batch_size, -1), targets[:, c])
            losses.append(loss.data[0])
            n_chars += 1
        total_loss += np.sum(losses)
        word_losses = word_loss(chunk, losses)
        total_word_loss += np.sum(word_losses)
        n_words += len(word_losses)
    mean_loss = total_loss / n_chars
    mean_word_perplexity = np.exp(total_word_loss / n_words)
    print('Validation loss: {:.3}, word perplexity: {:.1f}'.format(
        mean_loss, mean_word_perplexity))
    return {
        'valid_loss': mean_loss,
        'valid_word_perplexity': mean_word_perplexity
    }
Пример #2
0
def sample(model: CharRNN,
           char2int: dict,
           prime='The',
           num_chars=1000,
           top_k=5):
    """
    Given a network and a char2int map, predict the next 1000 characters
    """

    device = next(model.parameters()).device.type

    int2char = {ii: ch for ch, ii in char2int.items()}

    # set our model to evaluation mode, we use dropout after all
    model.eval()

    # First off, run through the prime characters
    chars = [char2int[ch] for ch in prime]
    h = model.init_hidden(1, device)
    for ch in chars:
        char, h = predict(model, ch, h, top_k, device)

    chars.append(char)

    # Now pass in the previous character and get a new one
    for ii in range(num_chars):
        char, h = predict(model, chars[-1], h, top_k, device)
        chars.append(char)

    return ''.join(int2char[c] for c in chars)
Пример #3
0
def main(
    representation,
    train=None,
    generate=None,
    temperature=DEFAULT_TEMPERATURE,
    max_generate_len=DEFAULT_MAX_GEN_LEN,
    generator_prime_str=FILE_START,
    window_size=DEFAULT_WINDOW_SIZE,
    batch_size=DEFAULT_BATCH_SIZE,
    disable_cuda=DEFAULT_DISABLE_CUDA,
    learning_rate=DEFAULT_LEARNING_RATE,
    num_epochs=DEFAULT_NUM_EPOCHS,
    patience=DEFAULT_PATIENCE,
    recurrent_type=DEFAULT_RECURRENT_TYPE,
    hidden_size=DEFAULT_RECURRENT_HIDDEN_SIZE,
    recurrent_layers=DEFAULT_RECURRENT_LAYERS,
    recurrent_dropout=DEFAULT_RECURRENT_DROPOUT,
    print_every_iter=DEFAULT_PRINT_EVERY_ITER,
    log_level=DEFAULT_LOG_LEVEL,
):
    # https://github.com/pytorch/pytorch/issues/13775
    torch.multiprocessing.set_start_method("spawn")

    logger.addHandler(logging.StreamHandler(sys.stdout))
    logger.setLevel(log_level)

    use_cuda = torch.cuda.is_available()
    if disable_cuda:
        use_cuda = False

    if representation == "char":
        # Create the neural network structure
        logger.info("Constructing the neural network architecture...")
        n_chars = len(CHARACTERS)
        nn = CharRNN(n_chars,
                     n_chars,
                     hidden_size=hidden_size,
                     recurrent_type=recurrent_type,
                     recurrent_layers=recurrent_layers,
                     recurrent_dropout=recurrent_dropout,
                     use_cuda=use_cuda)
        if use_cuda:
            nn.cuda()

        if train:
            # Warn if window_size is None, batch_size should be 1
            if window_size is None and batch_size is not 1:
                logger.warning("~" * 40)
                logger.warning(
                    "WARN: Undefined window_size with batch_size: {}".format(
                        batch_size))
                logger.warning(
                    "\tBatches may not have equal sequence lengths!")
                logger.warning(
                    "\tWindow size should be defined when batch_size > 1.")
                logger.warning("~" * 40)

            # Train our model
            train_full(nn,
                       max_window_size=window_size,
                       learning_rate=learning_rate,
                       n_epochs=num_epochs,
                       patience_threshold=patience,
                       batch_size=batch_size,
                       print_every=print_every_iter,
                       use_cuda=use_cuda)

        elif generate:
            progress_path = nn.get_progress_path()
            # Load our model
            logger.info("Loading the model weights...")
            path = nn.get_state_dict_path()
            if not os.path.isfile(path):
                raise FileNotFoundError(
                    ("Model does not exist at {}. " +
                     "Manual model renaming required.").format(path))
            nn.load_state_dict(torch.load(path))
            nn = nn.eval()
            generate_charseq(nn,
                             prime_str=generator_prime_str,
                             max_window_size=window_size,
                             max_generate_len=max_generate_len,
                             temperature=temperature)