Exemplo n.º 1
0
def validate_sentence(session, model, validation_batch, encoder_state, current_step):
    encoder_inputs, single_decoder_inputs, decoder_weights = validation_batch.next()
    print(BatchGenerator.batches2string(encoder_inputs))
    print(BatchGenerator.batches2string(single_decoder_inputs))
    # replicate to full batch size so we have multiple results agains the whole state
    encoder_inputs = [np.repeat(x, BATCH_SIZE, axis=0) for x in encoder_inputs]
    decoder_inputs = [np.repeat(x, BATCH_SIZE, axis=0) for x in single_decoder_inputs]
    decoder_weights = [np.repeat(x, BATCH_SIZE, axis=0) for x in decoder_weights]
    # _, eval_loss, prediction = model.step(sess, current_step - 1, encoder_inputs, decoder_inputs,
    #                                      decoder_weights, enc_state[-1:], 1.0, True)
    _, eval_loss, prediction = model.step(session, current_step - 1, encoder_inputs, decoder_inputs,
                                          decoder_weights, encoder_state, 1.0, True)
    # split into 'no of batches' list then average across batches
    reshaped = np.reshape(prediction, (prediction.shape[0] / BATCH_SIZE, BATCH_SIZE, prediction.shape[1]))
    averaged = np.mean(reshaped, axis=1)
    # now roll as in case of single batch
    rolled = np.rollaxis(np.asarray([averaged]), 1, 0)
    splitted = np.vsplit(rolled, rolled.shape[0])
    squeezed = [np.squeeze(e,0) for e in splitted]
    print(BatchGenerator.batches2string(squeezed))
    # compute character to character perplexity
    val_perp = float(np.exp(BatchGenerator.logprob(np.concatenate(squeezed),
                                                   np.concatenate(single_decoder_inputs[1:]))))
    print('--validation perp.: %.2f' % val_perp)
    return val_perp
Exemplo n.º 2
0
def run_test():
    # test batch generation
    print('download and read data')
    filename = maybe_download('text8.zip', 31344016)
    # Read data
    text = read_data(filename)
    # create datasets
    valid_size = 1000
    valid_text = text[:valid_size]
    train_text = text[valid_size:]
    # train_size = len(train_text)
    # create batch generators
    train_batches = BatchGenerator(train_text, BATCH_SIZE, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH, reverse_encoder_input=True)
    valid_batches = BatchGenerator(valid_text, 1, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH)

    # print(BatchGenerator.characters(train_batches.next()[0]))
    print('test main batch generator')
    e_bs, d_bs, dw_bs = train_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = train_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = valid_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    e_bs, d_bs, dw_bs = valid_batches.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)

    print('test random english generator')
    random_batch = RandomWordsBatchGenerator(2, MIN_CHARS_IN_BATCH, MAX_CHARS_IN_BATCH,
                                             reverse_encoder_input=False)
    for _ in range(10):
        e_bs, d_bs, dw_bs = random_batch.next()
        print(BatchGenerator.batches2string(e_bs))
        print(BatchGenerator.batches2string(d_bs))
        BatchGenerator.verify_weights(d_bs, dw_bs)

    print('test random string gen with padding')
    random_str_batch = ReverseStringBatchGenerator(1, 8, 8,
                                             reverse_encoder_input=False)
    e_bs, d_bs, dw_bs = random_str_batch.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)
    random_str_batch = ReverseStringBatchGenerator(2, 8, 16,
                                             reverse_encoder_input=False)
    e_bs, d_bs, dw_bs = random_str_batch.next()
    print(BatchGenerator.batches2string(e_bs))
    print(BatchGenerator.batches2string(d_bs))
    BatchGenerator.verify_weights(d_bs, dw_bs)