Exemplo n.º 1
0
# X_train - every words of each sentence except for the last one
X_train = np.asarray([[word_to_index[w] for w in sent[:-1]]
                      for sent in tokenized_sentences])
# y_train - every words except for the first one
y_train = np.asarray([[word_to_index[w] for w in sent[1:]]
                      for sent in tokenized_sentences])

print()

print("############################")
print("# Test FORWARD PROPAGATION #")
print("############################")
model_test_forward_prop = RNNNumpy(vocabulary_size)

# Using 10th training example
o, s = model_test_forward_prop.forward_propagation(X_train[10])

print(o.shape)  # (45, 8000)
print(o)

# Try calculating a prediction by forward-propagating
# with the current weight values
# even though they obviously would be very far from optimal
predictions = model_test_forward_prop.predict(X_train[10])
print(predictions.shape)
print(predictions)

# According to the tutorial: Since we have (vocabulary_size) words, so each word
# should be predicted, on average, with probability 1/C, which would yield
# a loss of L = -1/N * N * log(1/C) = log(C)
print("Expected loss for random predictions: %f" % np.log(vocabulary_size))
Exemplo n.º 2
0
                      for sent in tokenized_sentences])

print(X_train[0])
print(y_train[0])

# Print an training data example
x_example, y_example = X_train[10], y_train[10]
print("x:\n%s\n%s" % (" ".join([index_to_word[x]
                                for x in x_example]), x_example))
print("\ny:\n%s\n%s" % (" ".join([index_to_word[x]
                                  for x in y_example]), y_example))

#### Build RNN Numpy model ########
np.random.seed(10)
model = RNNNumpy(vocabulary_size)
out, s = model.forward_propagation(X_train[10])
print("Size of forward_propagation is:")
print(out.shape)
print(out)  #give the prob of the next words

print("\n -------------aaaaaaaaaa")
# gives the indices of the highest probability predictions for each word:
predictions = model.predict(X_train[10])
print(predictions.shape)
print(predictions)

print("\n -------------bbbbbbbbb")
# Limit to 1000 examples to save time
print("Expected Loss for random predictions: %f" % np.log(vocabulary_size))
print("Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000]))
Exemplo n.º 3
0
import preprocess
from rnn_numpy import RNNNumpy
import numpy as np

X_train,y_train,vocabulary_size = preprocess.create_train_data()
np.random.seed(10)
model = RNNNumpy(vocabulary_size)
output, hidden_states = model.forward_propagation(X_train[10])
print output.shape
print output

predictions = model.predict(X_train[10])
print predictions.shape
print predictions

# Limit to 1000 examples to save time
print "Expected Loss for random predictions: %f" % np.log(vocabulary_size)
print "Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000])
Exemplo n.º 4
0
index_to_word = [x[0] for x in vocab]
index_to_word.append(unknown_token)
word_to_index = dict([(w,i) for i,w in enumerate(index_to_word)])

print "Using vocabulary size %d." % vocabulary_size
print "The least frequent word in our vocabulary is '%s' and appeared %d times." % (vocab[-1][0], vocab[-1][1])

# Replace all words not in our vocabulary with the unknown token
for i, sent in enumerate(tokenized_sentences):
    tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in sent]

# Create the training data
X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])
y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences])

np.random.seed(10)
model = RNNNumpy(vocabulary_size)
o, s = model.forward_propagation(X_train[10])
print o.shape
print o

# model = RNNNumpy(vocabulary_size, hidden_dim=_HIDDEN_DIM)
# t1 = time.time()
# model.sgd_step(X_train[10], y_train[10], _LEARNING_RATE)
# t2 = time.time()
# print "SGD Step time: %f milliseconds" % ((t2 - t1) * 1000.)

# if _MODEL_FILE != None:
#     load_model_parameters_numpy(_MODEL_FILE, model)

# train_with_sgd(model, X_train, y_train, nepoch=_NEPOCH, learning_rate=_LEARNING_RATE)