def train_numpy(): model = RNNNumpy(Config._VOCABULARY_SIZE, hidden_dim=Config._HIDDEN_DIM) t1 = time.time() model.sgd_step(X_train[10], y_train[10], Config._LEARNING_RATE) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) model.train_with_sgd(X_train, y_train, nepoch=Config._NEPOCH, learning_rate=Config._LEARNING_RATE) # train_with_sgd(model, X_train, y_train, nepoch=_NEPOCH, learning_rate=_LEARNING_RATE) if Config._MODEL_FILE != None: print "start saving model..." save_model_parameters_numpy(Config._MODEL_FILE, model) print "model saved!"
def load_trained_model(): model = RNNNumpy(Config._VOCABULARY_SIZE, hidden_dim=Config._HIDDEN_DIM) print 'start loading...' load_model_parameters_numpy(Config._MODEL_FILE, model) print 'load over!' return model
print("{}: Loss after num_examples_seen={} epoch={}: {}".format( time, num_examples_seen, epoch, loss)) if len(losses) > 1 and losses[-1][1] > losses[-2][1]: learning_rate *= 0.5 print("Setting learning rate to {}".format(learning_rate)) sys.stdout.flush() for i in range(len(y_train)): model.numpy_sgd_step(X_train[i], y_train[i], learning_rate) num_examples_seen += 1 if __name__ == '__main__': X_train = np.load(os.path.join(root_path, "data/X_train.npy")) y_train = np.load(os.path.join(root_path, "data/y_train.npy")) input = X_train[10] np.random.seed(0) vocabulary_size = 8000 model = RNNNumpy(vocabulary_size) # o, s = model.forward(input) # print(s[0].shape) # print(o[0].T.shape) # print("Expected Loss for random predictions: {}".format(np.log(vocabulary_size))) # print("Actual Loss: {}".format(model.calculate_loss(X_train[:1000], y_train[:1000]))) train_with_sgd(model, X_train[:100], y_train[:100], nepoch=10, evaluate_loss_after=1)
# Create the training data # Note that the length of each sentence is different # X_train - every words of each sentence except for the last one X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) # y_train - every words except for the first one y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) print() print("############################") print("# Test FORWARD PROPAGATION #") print("############################") model_test_forward_prop = RNNNumpy(vocabulary_size) # Using 10th training example o, s = model_test_forward_prop.forward_propagation(X_train[10]) print(o.shape) # (45, 8000) print(o) # Try calculating a prediction by forward-propagating # with the current weight values # even though they obviously would be very far from optimal predictions = model_test_forward_prop.predict(X_train[10]) print(predictions.shape) print(predictions) # According to the tutorial: Since we have (vocabulary_size) words, so each word
def train_numpy(self, x_train, y_train, iterations): self.model = RNNNumpy(word_dim=self.vocabulary_size, hidden_dim=100, bptt_truncate=4) self.model.sgd(x_train, y_train, 0.01, iterations)
y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) print(X_train[0]) print(y_train[0]) # Print an training data example x_example, y_example = X_train[10], y_train[10] print("x:\n%s\n%s" % (" ".join([index_to_word[x] for x in x_example]), x_example)) print("\ny:\n%s\n%s" % (" ".join([index_to_word[x] for x in y_example]), y_example)) #### Build RNN Numpy model ######## np.random.seed(10) model = RNNNumpy(vocabulary_size) out, s = model.forward_propagation(X_train[10]) print("Size of forward_propagation is:") print(out.shape) print(out) #give the prob of the next words print("\n -------------aaaaaaaaaa") # gives the indices of the highest probability predictions for each word: predictions = model.predict(X_train[10]) print(predictions.shape) print(predictions) print("\n -------------bbbbbbbbb") # Limit to 1000 examples to save time print("Expected Loss for random predictions: %f" % np.log(vocabulary_size)) print("Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000]))
print "Expected Loss for random predictions: %f" % np.log(model.word_dim) print "Actual loss: %f" % model.calculate_loss(X_train[:100], y_train[:100]) def test_performance(model, learning_rate): print "\ntest performance: " + str(type(model)) t1 = time.time() model.sgd_step(X_train[10], y_train[10], learning_rate) t2 = time.time() print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.) model_gru = GRUTheano(word_dim=_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) model_theano = RNNTheano(word_dim=_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) model_rnn = RNNNumpy(word_dim=_VOCABULARY_SIZE, hidden_dim=_HIDDEN_DIM, bptt_truncate=-1) test_performance(model_gru, _LEARNING_RATE) test_performance(model_theano, _LEARNING_RATE) test_performance(model_rnn, _LEARNING_RATE) test_loss(model_gru) test_loss(model_theano) test_loss(model_rnn)