Пример #1
0
def read_dataset(filename):
  with open(filename, "r") as f:
  for line in f:
    tag, words = line.lower().strip().split(" ||| ")
    yield ([w2i[x] for x in words.split(" ")], t2i[tag])

# Read in the data
train = list(read_dataset("data/classes/train.txt"))
w2i = defaultdict(lambda: UNK, w2i)
dev = list(read_dataset("data/classes/test.txt"))
nwords = len(w2i)
ntags = len(t2i)


# Define the model
W_sm = zero((nwords, ntags))  # Word weights
b_sm = zero((ntags))      # Softmax bias

# bag of words input
x = T.ivector('words')
# gold class
y = T.iscalar('class')

score = T.sum(W_sm[x], axis=0) + b_sm
# log likelihood
ll = T.log(T.nnet.softmax(score)).flatten()
# negative log likelihood loss
loss = - ll[y]

params = [W_sm, b_sm]
updates = Adam(lr=0.001).get_updates(params, loss)

train_func = theano.function([x, y], loss, updates=updates)
test_func = theano.function([x], score)

print ("startup time: %r" % (time.time() - start))
for ITER in range(100):
  # Perform training
  random.shuffle(train)
  train_loss = 0.0
  start = time.time()
  for i, (words, tag) in enumerate(train):
    my_loss = train_func(words, tag)
    train_loss += my_loss
    # print(b_sm.get_value())
    # if i > 5:
    #   sys.exit(0)

  print("iter %r: train loss/sent=%.4f, time=%.2fs" % (ITER, train_loss/len(train), time.time()-start))

  # Perform testing
  test_correct = 0.0
  for words, tag in dev:
    scores = test_func(words)
    predict = np.argmax(scores)
    if predict == tag:
      test_correct += 1

  print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))
Пример #2
0
def build_graph():
    print('build graph..')
    # Lookup parameters for word embeddings
    embedding_table = Embedding(vocab_size, args.EMBED_SIZE)

    lstm = LSTM(args.EMBED_SIZE,
                args.HIDDEN_SIZE,
                inner_init="identity",
                return_sequences=True)

    # Softmax weights/biases on top of LSTM outputs
    W_sm = uniform((args.HIDDEN_SIZE, vocab_size), scale=.5, name='W_sm')
    b_sm = uniform(vocab_size, scale=.5, name='b_sm')

    # (batch_size, sentence_length)
    x = T.imatrix(name='sentence')

    # (batch_size, sentence_length, embedding_dim)
    sent_embed, sent_mask = embedding_table(x, mask_zero=True)

    lstm_input = T.set_subtensor(
        T.zeros_like(sent_embed)[:, 1:, :], sent_embed[:, :-1, :])
    lstm_input = T.set_subtensor(lstm_input[:, 0, :],
                                 embedding_table(S)[None, :])

    # (batch_size, sentence_length, output_dim)
    lstm_output = lstm(lstm_input)

    # (batch_size, sentence_length, vocab_size)
    logits = T.dot(lstm_output, W_sm) + b_sm
    logits = T.nnet.softmax(
        logits.reshape((logits.shape[0] * logits.shape[1],
                        vocab_size))).reshape(logits.shape)

    loss = T.log(logits).reshape((-1, logits.shape[-1]))
    # (batch_size * sentence_length)
    loss = loss[T.arange(loss.shape[0]), x.flatten()]
    # (batch_size, sentence_length)
    loss = -loss.reshape((x.shape[0], x.shape[1])) * sent_mask
    # loss = loss.sum(axis=-1) / sent_mask.sum(axis=-1)
    # loss = -T.mean(loss)

    # loss is the sum of nll over all words over all examples in the mini-batch
    loss = loss.sum()

    params = embedding_table.params + lstm.params + [W_sm, b_sm]
    updates = Adam(lr=0.001).get_updates(params, loss)
    # updates = SGD(lr=0.01).get_updates(params, loss)
    train_loss_func = theano.function([x], loss, updates=updates)
    test_loss_func = theano.function([x], loss)

    return train_loss_func, test_loss_func
Пример #3
0
def build_tag_graph():
    print('build graph..', file=sys.stderr)

    # (batch_size, sentence_length)
    x = T.imatrix(name='sentence')

    # (batch_size, sentence_length)
    y = T.imatrix(name='tag')

    # Lookup parameters for word embeddings
    embedding_table = Embedding(nwords, args.WEMBED_SIZE)

    # bi-lstm
    lstm = BiLSTM(args.WEMBED_SIZE, args.HIDDEN_SIZE, return_sequences=True)

    # MLP
    W_mlp_hidden = uniform((args.HIDDEN_SIZE * 2, args.MLP_SIZE),
                           name='W_mlp_hidden')
    W_mlp = uniform((args.MLP_SIZE, ntags), name='W_mlp')

    # (batch_size, sentence_length, embedding_dim)
    sent_embed, sent_mask = embedding_table(x, mask_zero=True)

    # (batch_size, sentence_length, lstm_hidden_dim)
    lstm_output = lstm(sent_embed, mask=sent_mask)

    # (batch_size, sentence_length, ntags)
    mlp_output = T.dot(T.tanh(T.dot(lstm_output, W_mlp_hidden)), W_mlp)

    # (batch_size * sentence_length, ntags)
    mlp_output = mlp_output.reshape(
        (mlp_output.shape[0] * mlp_output.shape[1], -1))

    tag_prob_f = T.log(T.nnet.softmax(mlp_output))

    y_f = y.flatten()
    mask_f = sent_mask.flatten()

    tag_nll = -tag_prob_f[T.arange(tag_prob_f.shape[0]), y_f] * mask_f

    loss = tag_nll.sum()

    params = embedding_table.params + lstm.params + [W_mlp_hidden, W_mlp]
    updates = Adam().get_updates(params, loss)
    train_loss_func = theano.function([x, y], loss, updates=updates)

    # build the decoding graph
    tag_prob = tag_prob_f.reshape((x.shape[0], x.shape[1], -1))
    decode_func = theano.function([x], tag_prob)

    return train_loss_func, decode_func
Пример #4
0
def get_optimizer(optimizer_config, model, loss):
    name = optimizer_config['name']
    lr = optimizer_config['lr']
    if name == 'SGD':
        return SGD(model, loss, lr=lr)
    elif name == 'momentum':
        return Momentum(model, loss, lr=lr, beta=optimizer_config['beta'])
    elif name == 'adam':
        return Adam(model,
                    loss,
                    lr=lr,
                    beta_1=optimizer_config['beta_1'],
                    beta_2=optimizer_config['beta_2'])
    else:
        raise ValueError(f'Invalid optimizer: {name}')
Пример #5
0
 def optimizer(self, method: str = 'sgd'):
     unit = [
         self.n_input - 1, self.n_hidden, self.n_output, self.hyper_param
     ]
     from nn.optimizers import Scaled_CG, Adam, SGD, RMSprop, Adagrad, Momentum
     if method == 'scg':
         self.optim_routine = Scaled_CG(*unit)
     elif method == 'adam':
         self.optim_routine = Adam(*unit)
     elif method == 'sgd':
         self.optim_routine = SGD(*unit)
     elif method == 'rmsprop':
         self.optim_routine = RMSprop(*unit)
     elif method == 'adagrad':
         self.optim_routine = Adagrad(*unit)
     elif method == 'momentum':
         self.optim_routine = Momentum(*unit)
     else:
         self.optim_routine = SGD(*unit)
Пример #6
0
def train_mnist():
    from nn.optimizers import RMSprop, Adam
    import matplotlib.pyplot as plt
    import numpy as np

    from models.MNISTNet import MNISTNet
    from nn.loss import SoftmaxCrossEntropy, L2
    from nn.optimizers import Adam
    from data.datasets import MNIST
    np.random.seed(5242)

    mnist = MNIST()

    model = MNISTNet()
    loss = SoftmaxCrossEntropy(num_class=10)

    # define your learning rate sheduler
    def func(lr, iteration):
        if iteration % 1000 == 0:
            return lr * 0.5
        else:
            return lr

    adam = Adam(lr=0.001, decay=0, sheduler_func=None, bias_correction=True)
    l2 = L2(w=0.001)  # L2 regularization with lambda=0.001
    model.compile(optimizer=adam, loss=loss, regularization=l2)

    import time
    start = time.time()
    train_results, val_results, test_results = model.train(mnist,
                                                           train_batch=50,
                                                           val_batch=1000,
                                                           test_batch=1000,
                                                           epochs=2,
                                                           val_intervals=-1,
                                                           test_intervals=900,
                                                           print_intervals=100)
    print('cost:', time.time() - start)
Пример #7
0
from nn.optimizers import RMSprop, Adam

#model = Fashion_MNISTNet()
model = MyFashMNIST_CNN()
loss = SoftmaxCrossEntropy(num_class=10)


# define your learning rate sheduler
def func(lr, iteration):
    if iteration % 1000 == 0:
        return lr * 0.5
    else:
        return lr


adam = Adam(lr=0.001, decay=0, sheduler_func=None, bias_correction=True)
l2 = L2(w=0.001)  # L2 regularization with lambda=0.001
model.compile(optimizer=adam, loss=loss, regularization=l2)

import time
start = time.time()
train_results, val_results, test_results = model.train(Fashion_mnist,
                                                       train_batch=50,
                                                       val_batch=1000,
                                                       test_batch=1000,
                                                       epochs=2,
                                                       val_intervals=-1,
                                                       test_intervals=900,
                                                       print_intervals=100)
print('cost:', time.time() - start)
Пример #8
0
#model = Fashion_MNISTNet()
model = MyFashionModel_CNN()
loss = SoftmaxCrossEntropy(num_class=10)


# define your learning rate sheduler
def func(lr, iteration):
    if iteration % 1000 == 0:
        return lr * 0.5
    else:
        return lr


adam = Adam(lr=0.001,
            beta_1=0.9,
            beta_2=0.999,
            sheduler_func=None,
            bias_correction=True)
l2 = L2(w=0.001)  # L2 regularization with lambda=0.001
model.compile(optimizer=adam, loss=loss, regularization=l2)

import time
start = time.time()
train_results, val_results, test_results = model.train(Fashion_mnist,
                                                       train_batch=128,
                                                       val_batch=1000,
                                                       test_batch=1000,
                                                       epochs=40,
                                                       val_intervals=-1,
                                                       test_intervals=900,
                                                       print_intervals=100)
Пример #9
0
np.random.seed(2373)

random_indexes = np.random.choice(train_x.shape[0], size=train_x.shape[0], replace=False)
train_x = train_x[random_indexes]
train_y = train_y[random_indexes]

val_index = int(test_x.shape[0] * 0.1)
val_x = train_x[:val_index]
val_y = train_y[:val_index]
train_x = train_x[val_index:]
train_y = train_y[val_index:]

epochs = 18
lr = 0.0001
# optimizer = GradientDescentMomentum(learning_rate=lr, beta=0.9)
optimizer = Adam(learning_rate=lr, beta1=0.9, beta2=0.999)

net = NeuralNetwork([128, 64, train_y.shape[1]], epochs=epochs, activation_dict={-1: "sigmoid"},
                    batch_size=512, val_x=np.asarray(val_x.todense()), val_y=val_y,
                    optimizer=optimizer)

net.fit(train_x, train_y)
plot_losses(net.training_losses, net.validation_losses, savepath="model_losses.png")

with open("model_losses_adam_{}_{}_l2.txt".format(epochs, lr), "w") as f:
    for tl, vl in zip(net.training_losses, net.validation_losses):
        f.write("{}, {}\n".format(tl, vl))

preds = net.predict(test_x, batch_size=256)
print(accuracy(preds, test_y))
Пример #10
0
def build_tag_graph():
    print('build graph..', file=sys.stderr)

    # (sentence_length)
    # word indices for a sentence
    x = T.ivector(name='sentence')

    # (sentence_length, max_char_num_per_word)
    # character indices for each word in a sentence
    x_chars = T.imatrix(name='sent_word_chars')

    # (sentence_length)
    # target tag
    y = T.ivector(name='tag')

    # Lookup parameters for word embeddings
    word_embeddings = Embedding(nwords,
                                args.WEMBED_SIZE,
                                name='word_embeddings')

    # Lookup parameters for character embeddings
    char_embeddings = Embedding(nchars,
                                args.CEMBED_SIZE,
                                name='char_embeddings')

    # lstm for encoding word characters
    char_lstm = BiLSTM(args.CEMBED_SIZE,
                       int(args.WEMBED_SIZE / 2),
                       name='char_lstm')

    # bi-lstm
    lstm = BiLSTM(args.WEMBED_SIZE,
                  args.HIDDEN_SIZE,
                  return_sequences=True,
                  name='lstm')

    # MLP
    W_mlp_hidden = uniform((args.HIDDEN_SIZE * 2, args.MLP_SIZE),
                           name='W_mlp_hidden')
    W_mlp = uniform((args.MLP_SIZE, ntags), name='W_mlp')

    # def get_word_embed_from_chars(word_chars):
    #   # (max_char_num_per_word, char_embed_dim)
    #   # (max_char_num_per_word)
    #   word_char_embeds, word_char_masks = char_embeddings(word_chars, mask_zero=True)
    #   word_embed = char_lstm(T.unbroadcast(word_char_embeds[None, :, :], 0), mask=T.unbroadcast(word_char_masks[None, :], 0))[0]
    #
    #   return word_embed

    # def word_embed_look_up_step(word_id, word_chars):
    #   word_embed = ifelse(T.eq(word_id, UNK),
    #             get_word_embed_from_chars(word_chars),  # if it's a unk
    #             word_embeddings(word_id))
    #
    #   return word_embed

    word_embed_src = T.eq(x, UNK).astype('float32')[:, None]

    # (sentence_length, word_embedding_dim)
    word_embed = word_embeddings(x)

    # (sentence_length, max_char_num_per_word, char_embed_dim)
    # (sentence_length, max_char_num_per_word)
    word_char_embeds, word_char_masks = char_embeddings(x_chars,
                                                        mask_zero=True)

    # (sentence_length, word_embedding_dim)
    word_embed_from_char = char_lstm(word_char_embeds, mask=word_char_masks)

    sent_embed = word_embed_src * word_embed_from_char + (
        1 - word_embed_src) * word_embed

    # # (sentence_length, embedding_dim)
    # sent_embed, _ = theano.scan(word_embed_look_up_step, sequences=[x, x_chars])

    # (sentence_length, lstm_hidden_dim)
    lstm_output = lstm(T.unbroadcast(sent_embed[None, :, :], 0))[0]

    # (sentence_length, ntags)
    mlp_output = T.dot(T.tanh(T.dot(lstm_output, W_mlp_hidden)), W_mlp)

    tag_prob = T.log(T.nnet.softmax(mlp_output))

    tag_nll = -tag_prob[T.arange(tag_prob.shape[0]), y]

    loss = tag_nll.sum()

    params = word_embeddings.params + char_embeddings.params + char_lstm.params + lstm.params + [
        W_mlp_hidden, W_mlp
    ]
    updates = Adam().get_updates(params, loss)
    train_loss_func = theano.function([x, x_chars, y], loss, updates=updates)

    # build the decoding graph
    decode_func = theano.function([x, x_chars], tag_prob)

    return train_loss_func, decode_func
Пример #11
0
from data import datasets
from models.SentimentNet import SentimentNet
#from models.MyModel_SentimentNet import MyModel_SentimentNet
from nn.loss import SoftmaxCrossEntropy, L2
from nn.optimizers import Adam
import numpy as np
np.random.seed(5242)

dataset = datasets.Sentiment()
model = SentimentNet(dataset.dictionary)
#model = MyModel_SentimentNet(dataset.dictionary)
loss = SoftmaxCrossEntropy(num_class=2)

adam = Adam(lr=0.01,
            decay=0,
            sheduler_func=lambda lr, it: lr * 0.5 if it % 1000 == 0 else lr)
model.compile(optimizer=adam, loss=loss, regularization=L2(w=0.001))
train_results, val_results, test_results = model.train(dataset,
                                                       train_batch=20,
                                                       val_batch=100,
                                                       test_batch=100,
                                                       epochs=5,
                                                       val_intervals=-1,
                                                       test_intervals=25,
                                                       print_intervals=5)
Пример #12
0
import numpy as np
from nn.layers import Conv2D, Dense, PReLU
from nn.optimizers import Adam
from nn.losses import softmax
from nn.model import BaseModel

batch_size = 32
nb_classes = 10
x = np.random.rand(batch_size, 3, 64, 64)
y = np.random.randint(nb_classes, size=batch_size)

class Model(BaseModel):
    def predictor(self, inp, outp):
        model = []
        model.append(Conv2D(inp, 32))
        model.append(PReLU(model[-1]))
        model.append(Dense(model[-1], 128))
        model.append(PReLU(model[-1]))
        model.append(Dense(model[-1], nb_classes))
        return model

model = Model(x, y, softmax, Adam(1e-3))
for _ in range(100):  # train 100 steps
    print(model.fit(x, y))  # loss should go down