示例#1
0
def decode_lstm(load_from='lstm_model.npz'):
    npz_archive = numpy.load(load_from)
    model_options = npz_archive['model_options']
    ptb_data = npz_archive['ptb_data']

    lstm_lm = LSTM_LM(model_options['dim_proj'], model_options['y_dim'],
                      ptb_data.dictionary, SEED)

    print('Reloading params from %s' % save_to)
    load_params(load_from, lstm_lm.params)
    # Update the tparams with the new values
    zipp(lstm_lm.params, lstm_lm.tparams)

    print("model options", model_options)

    # Create the shared variables for the model
    lstm_lm.build_decode()
    test_sentences = ['with the', 'the cat', 'when the']
    test_sentences = [
        ptb_data.dictionary.read_sentence(s) for s in test_sentences
    ]
    test_sentences, test_mask = pad_and_mask(test_sentences)

    start_time = time.time()
    output = lstm_lm.f_decode(test_sentences, test_mask,
                              model_options['maxlen'])
    end_time = time.time()

    print('Decoding took %.1fs' % (end_time - start_time))
示例#2
0
    def pred_cost(self, data, iterator, verbose=False):
        """
        Probabilities for new examples from a trained model

        data : The complete dataset. A list of lists. Each nested list is a sample
        iterator : A list of lists. Each nested list is a batch with idxs to the sample in data
        """
        # Total samples
        n_samples = len(data)
        running_cost = []
        samples_seen = []

        n_done = 0

        # valid_index is a list containing the IDXs of samples for a batch
        for _, valid_index in iterator:
            x, mask, _ = pad_and_mask([data[t] for t in valid_index])
            # Accumulate running cost
            samples_seen.append(len(valid_index))
            running_cost.append(self.f_cost(x, mask))
            n_done += len(valid_index)
            if verbose:
                print("%d/%d samples classified" % (n_done, n_samples))

        return sum([
            samples_seen[i] * running_cost[i] for i in range(len(samples_seen))
        ]) / sum(samples_seen)
示例#3
0
def decode_lstm(
    load_from='lstm_model.npz'
):
    npz_archive = numpy.load(load_from)
    model_options = npz_archive['model_options']
    ptb_data = npz_archive['ptb_data']

    lstm_lm = LSTM_LM(model_options['dim_proj'], model_options['y_dim'],
                      ptb_data.dictionary, SEED)

    print('Reloading params from %s' % save_to)
    load_params(load_from, lstm_lm.params)
    # Update the tparams with the new values
    zipp(lstm_lm.params, lstm_lm.tparams)

    print("model options", model_options)

    # Create the shared variables for the model
    lstm_lm.build_decode()
    test_sentences = ['with the', 'the cat', 'when the']
    test_sentences = [ptb_data.dictionary.read_sentence(s) for s in test_sentences]
    test_sentences, test_mask = pad_and_mask(test_sentences)

    start_time = time.time()
    output = lstm_lm.f_decode(test_sentences, test_mask, model_options['maxlen'])
    end_time = time.time()

    print('Decoding took %.1fs' % (end_time - start_time))
示例#4
0
    def pred_cost(self, data, iterator, verbose=False):
        """
        Probabilities for new examples from a trained model

        data : The complete dataset. A list of lists. Each nested list is a sample
        iterator : A list of lists. Each nested list is a batch with idxs to the sample in data
        """
        # Total samples
        n_samples = len(data)
        running_cost = []
        samples_seen = []

        n_done = 0

        # valid_index is a list containing the IDXs of samples for a batch
        for _, valid_index in iterator:
            x, mask, _ = pad_and_mask([data[t] for t in valid_index])
            # Accumulate running cost
            samples_seen.append(len(valid_index))
            running_cost.append(self.f_cost(x, mask))
            n_done += len(valid_index)
            if verbose:
                print("%d/%d samples classified" % (n_done, n_samples))

        return sum([samples_seen[i] * running_cost[i] for i in range(len(samples_seen))]) / sum(samples_seen)
示例#5
0
    def pred_error(self, data, iterator, verbose=False):
        """
        Errors for samples for a trained model
        """
        valid_err = 0
        for _, valid_index in iterator:
            x, mask, y = pad_and_mask([data[0][t] for t in valid_index],
                                      numpy.array(data[1])[valid_index],
                                      maxlen=None)
            preds = self.f_pred(x, mask)
            targets = numpy.array(data[1])[valid_index]
            valid_err += (preds == targets).sum()
        valid_err = 1. - numpy_floatX(valid_err) / len(data[0])

        return valid_err
示例#6
0
    def pred_probs(self, data, iterator, verbose=False):
        """
        Probabilities for new examples from a trained model
        """
        n_samples = len(data[0])
        probs = numpy.zeros((n_samples, 2)).astype(theano.config.floatX)

        n_done = 0

        for _, valid_index in iterator:
            x, mask, y = pad_and_mask([data[0][t] for t in valid_index],
                                      numpy.array(data[1])[valid_index],
                                      maxlen=None)
            pred_probs = self.f_pred_prob(x, mask)
            probs[valid_index, :] = pred_probs

            n_done += len(valid_index)
            if verbose:
                print("%d/%d samples classified" % (n_done, n_samples))

        return probs
示例#7
0
def train_lstm(
    dim_proj=650,
    patience=10,
    max_epochs=5000,
    disp_freq=10,
    decay_c=0.,
    lrate=0.0001,
    n_words=10000,
    optimizer=adadelta,
    encoder='lstm',
    save_to='lstm_model.npz',
    load_from='lstm_model.96.npz',
    valid_freq=370,
    save_freq=1110,
    maxlen=35,
    batch_size=20,
    valid_batch_size=64,
    dataset='../../data/simple-examples/data',
    noise_std=0.,
    use_dropout=True,
    reload_model=False,
):
    model_options = locals().copy()
    print("model options", model_options)

    print("... Loading data")
    ptb_data = ptb.PTB(dataset, n_words=n_words,
                       emb_dim=model_options['dim_proj'])
    train, valid, test = ptb_data.load_data()
    print("... Done loading data")

    ydim = ptb_data.dictionary.n_words
    model_options['ydim'] = ydim

    print('Building model')
    # Create the initial parameters for the model
    lstm_lm = LSTM_LM(model_options['dim_proj'], ydim,
                      ptb_data.dictionary, SEED)

    if reload_model:
        print('Reloading params from %s' % save_to)
        load_params(load_from, lstm_lm.params)
        # Update the tparams with the new values
        zipp(lstm_lm.params, lstm_lm.tparams)

    # Create the shared variables for the model
    (use_noise, x, mask, cost) = lstm_lm.build_model()

    if decay_c > 0.:
        cost += weight_decay(cost, lstm_lm.tparams['U'], decay_c)

    f_cost = theano.function([x, mask], cost, name='f_cost')
    grads = theano.grad(cost, wrt=list(lstm_lm.tparams.values()))
    f_grad = theano.function([x, mask], grads, name='f_grad')

    lr = T.scalar('lr')
    f_grad_shared, f_update = optimizer(lr, lstm_lm.tparams, grads, cost, x, mask)

    # Keep a few sentences to decode, to see how training is performing
    decode_use_noise, _, _, _ = lstm_lm.build_decode()
    decode_use_noise.set_value(1.)
    decode_sentences = ['with the', 'the cat', 'when the']
    decode_sentences = [ptb_data.dictionary.read_sentence(s) for s in decode_sentences]
    decode_sentences, decode_mask, _ = pad_and_mask(decode_sentences)

    print('Optimization')

    kf_valid = get_minibatches_idx(len(valid), valid_batch_size)
    kf_test = get_minibatches_idx(len(test), valid_batch_size)

    print('%d train examples' % len(train))
    print('%d valid examples' % len(valid))
    print('%d test examples' % len(test))

    history_errs = []
    best_p = None
    bad_count = 0

    if valid_freq == -1:
        valid_freq = len(train) // batch_size
    if save_freq == -1:
        save_freq = len(train) // batch_size

    uidx = 0  # The number of updates done
    estop = False  # Early stop
    start_time = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0
            # Get shuffled index for the training set
            kf = get_minibatches_idx(len(train), batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples in this minibatch
                x = [train[t] for t in train_index]

                # Convert to shape (minibatch maxlen, n samples)
                # Truncated backprop
                x, mask, _ = pad_and_mask(x, maxlen=maxlen)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask)
                f_update(lrate)

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print('bad cost detected: ', cost)
                    return 1., 1., 1.

                if numpy.mod(uidx, disp_freq) == 0:
                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost)

                if save_to and numpy.mod(uidx, save_freq) == 0:
                    print('Saving...')
                    if best_p is not None:
                        lstm_lm.params = best_p
                    else:
                        lstm_lm.params = unzip(lstm_lm.tparams)
                    numpy.savez(save_to, history_errs=history_errs, **lstm_lm.params)
                    pickle.dump(model_options, open('%s.pkl' % save_to, 'wb'),
                                -1)
                    print('Done')

                if numpy.mod(uidx, valid_freq) == 0:
                    use_noise.set_value(0.)
                    valid_cost = lstm_lm.pred_cost(valid, kf_valid)
                    test_cost = lstm_lm.pred_cost(test, kf_test)
                    history_errs.append([valid_cost, test_cost])

                    if (best_p is None or valid_cost <=
                            numpy.array(history_errs)[:, 0].min()):
                        best_p = unzip(lstm_lm.tparams)
                        bad_counter = 0

                    print(('Valid ', valid_cost,
                           'Test ', test_cost))
                    print("Some sentences.. ")
                    print(ptb_data.dictionary.idx_to_words(lstm_lm.f_decode(decode_sentences, decode_mask, model_options['maxlen'])))

                    if (len(history_errs) > patience and valid_cost
                            >= numpy.array(history_errs)[:-patience, 0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            print('Early Stop')
                            estop = True
                            break

            print('Seen %d samples' % n_samples)

            if estop:
                break

    except KeyboardInterrupt:
        print('Training Interrupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, lstm_lm.tparams)
    else:
        best_p = unzip(lstm_lm.tparams)

    use_noise.set_value(0.)
    # Note that the training dataset is sorted by length.
    # This is for faster decoding, since padding will create smaller batch matrices
    kf_train_sorted = get_minibatches_idx(len(train), batch_size)
    train_cost = lstm_lm.pred_cost(train, kf_train_sorted)
    valid_cost = lstm_lm.pred_cost(valid, kf_valid)
    test_cost = lstm_lm.pred_cost(test, kf_test)

    print('Train ', train_cost, 'Valid ', valid_cost, 'Test ', test_cost)

    if save_to:
        numpy.savez(save_to, train_cost=train_cost,
                    valid_cost=valid_cost, test_cost=test_cost,
                    history_errs=history_errs, **best_p)
    print('The code run for %d epochs, with %f secs/epoch' %
          ((eidx + 1), ((end_time - start_time) / (1. * (eidx + 1)))))
    print('Training took %.1fs' % (end_time - start_time))
    return train_cost, valid_cost, test_cost
示例#8
0
def train_lstm(
    dim_proj=128,
    patience=10,
    max_epochs=5000,
    disp_freq=10,
    decay_c=0.,
    lrate=0.0001,
    n_words=10000,
    optimizer=adadelta,
    encoder='lstm',
    save_to='lstm_model.npz',
    valid_freq=370,
    save_freq=1110,
    maxlen=100,
    batch_size=16,
    valid_batch_size=64,
    dataset='../../data/aclImdb',
    noise_std=0.,
    use_dropout=True,
    reload_model=None,
    test_size=-1
):
    model_options = locals().copy()
    print("model options", model_options)

    imdb_data = imdb.IMDB(dataset, n_words=n_words,
                          emb_dim=model_options['dim_proj'])
    train, valid, test = imdb_data.load_data(valid_portion=0.05, maxlen=maxlen)

    if test_size > 0:
        # Random shuffle of the test set
        idx = numpy.arange(len(test[0]))
        numpy.random.shuffle(idx)
        idx = idx[:test_size]
        test = ([test[0][n] for n in idx], [test[1][n] for n in idx])

    ydim = numpy.max(train[1]) + 1
    model_options['ydim'] = ydim

    print('Building model')
    # Create the initial parameters for the model
    lstm_lm = LSTM_LM(model_options['dim_proj'], ydim,
                      imdb_data.dictionary, SEED)

    if reload_model:
        load_params('lstm_model.npz', lstm_lm.params)
        # Update the tparams with the new values
        zipp(lstm_lm.params, lstm_lm.tparams)

    # Create the shared variables for the model
    (use_noise, x, mask, y, cost) = lstm_lm.build_model()

    if decay_c > 0.:
        cost += weight_decay(cost, lstm_lm.tparams['U'], decay_c)

    f_cost = theano.function([x, mask, y], cost, name='f_cost')
    grads = theano.grad(cost, wrt=list(lstm_lm.tparams.values()))
    f_grad = theano.function([x, mask, y], grads, name='f_grad')

    lr = T.scalar('lr')
    f_grad_shared, f_update = optimizer(lr, lstm_lm.tparams, grads, cost, x, mask, y)

    print('Optimization')

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    print('%d train examples' % len(train[0]))
    print('%d valid examples' % len(valid[0]))
    print('%d test examples' % len(test[0]))

    history_errs = []
    best_p = None
    bad_count = 0

    if valid_freq == -1:
        valid_freq = len(train[0]) // batch_size
    if save_freq == -1:
        save_freq = len(train[0]) // batch_size

    uidx = 0  # The number of updates done
    estop = False  # Early stop
    start_time = time.time()
    try:
        for eidx in range(max_epochs):
            n_samples = 0
            # Get shuffled index for the training set
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples in this minibatch
                y = [train[1][t] for t in train_index]
                x = [train[0][t] for t in train_index]

                # Convert to shape (minibatch maxlen, n samples)
                x, mask, y = pad_and_mask(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                f_update(lrate)

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print('bad cost detected: ', cost)
                    return 1., 1., 1.

                if numpy.mod(uidx, disp_freq) == 0:
                    print('Epoch ', eidx, 'Update ', uidx, 'Cost ', cost)

                if save_to and numpy.mod(uidx, save_freq) == 0:
                    print('Saving...')
                    if best_p is not None:
                        lstm_lm.params = best_p
                    else:
                        lstm_lm.params = unzip(lstm_lm.tparams)
                    numpy.savez(save_to, history_errs=history_errs, **lstm_lm.params)
                    pickle.dump(model_options, open('%s.pkl' % save_to, 'wb'),
                                -1)
                    print('Done')

                if numpy.mod(uidx, valid_freq) == 0:
                    use_noise.set_value(0.)
                    train_err = lstm_lm.pred_error(train, kf)
                    valid_err = lstm_lm.pred_error(valid, kf_valid)
                    test_err = lstm_lm.pred_error(test, kf_test)
                    history_errs.append([valid_err, test_err])

                    if (best_p is None or valid_err <=
                            numpy.array(history_errs)[:, 0].min()):
                        best_p = unzip(lstm_lm.tparams)
                        bad_counter = 0

                    print(('Train ', train_err, 'Valid ', valid_err,
                           'Test ', test_err))

                    if (len(history_errs) > patience and valid_err
                            >= numpy.array(history_errs)[:-patience, 0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            print('Early Stop')
                            estop = True
                            break

            print('Seen %d samples' % n_samples)

            if estop:
                break

    except KeyboardInterrupt:
        print('Training Interrupted')

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, lstm_lm.tparams)
    else:
        best_p = unzip(lstm_lm.tparams)

    use_noise.set_value(0.)
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = lstm_lm.pred_error(train, kf_train_sorted)
    valid_err = lstm_lm.pred_error(valid, kf_valid)
    test_err = lstm_lm.pred_error(test, kf_test)

    print('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err)

    if save_to:
        numpy.savez(save_to, train_err=train_err,
                    valid_err=valid_err, test_err=test_err,
                    history_errs=history_errs, **best_p)
    print('The code run for %d epochs, with %f epochs/sec' %
          ((eidx + 1), ((end_time - start_time) / (1. * (eidx + 1)))))
    print('Training took %.1fs' % (end_time - start_time))
    return train_err, valid_err, test_err