-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
157 lines (133 loc) · 5.21 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from __future__ import print_function, division
import tensorflow as tf
from tensorflow.python.ops import rnn_cell
from tensorflow.python.ops import seq2seq
from common_utils import get_logger
import sys
import numpy
import pprint
MODEL_SCOPE = "char_rnn"
DEVICE_SCOPE = "/gpu:0"
logger = get_logger()
class Model(object):
def __init__(self, config):
"""Init model from provided configuration
Args:
config (dict): Model's configuration
Should have:
rnn_size: size of RNN hidden state
num_layers: number of RNN layers
rnn_type: lstm, rnn, or gru
batch_size: batch size
seq_length: sequence length
grad_clip: Clip gradient value by this value
vocab_size: size of vocabulary
infer: True/False, if True, use the predicted output
to feed back to RNN insted of gold target output.
is_train: True if is training
"""
logger.info("Create model with options: \n{}".format(pprint.pformat(config)))
self.rnn_size = config["rnn_size"]
self.num_layers = config["num_layers"]
self.rnn_type = config["rnn_type"]
self.batch_size = config["batch_size"]
self.seq_length = config["seq_length"]
self.grad_clip = config["grad_clip"]
self.vocab_size = config["vocab_size"]
self.infer = config["infer"]
self.is_train = config["is_train"]
self.reuse = config["reuse"]
if self.infer:
self.batch_size = 1
self.seq_length = 1
if self.rnn_type == "rnn":
cell_fn = rnn_cell.BasicRNNCell
elif self.rnn_type == "gru":
cell_fn = rnn_cell.GRUCell
elif self.rnn_type == "lstm":
cell_fn = rnn_cell.LSTMCell
else:
msg = "Rnn type should be either rnn, gru or lstm"
logger.error(msg)
sys.exit(msg)
# Define the cell
cell = cell_fn(self.rnn_size)
# Create multiple layers RNN
self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers)
self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_length])
self.initial_state = cell.zero_state(self.batch_size, tf.float32)
with tf.variable_scope(MODEL_SCOPE, reuse=self.reuse):
softmax_w = tf.get_variable("softmax_w", [self.rnn_size, self.vocab_size], )
softmax_b = tf.get_variable("softmax_b", [self.vocab_size])
# Model params stored in DEVICE_SCOPE (here using GPU)
with tf.device(DEVICE_SCOPE):
embeddings = tf.get_variable("embeddings", [self.vocab_size, self.rnn_size])
# Split it into list of step input, i.e. along dimension 1
inputs = tf.split(1, self.seq_length, tf.nn.embedding_lookup(embeddings, self.input_data))
'''
tf.split works like numply.split, inputs is now a list of step
inputs (to rnn). Each step input has shape (batch_size, 1, rnn_size).
We don't need that dimension 1, remove it by squeezing.
'''
inputs = [tf.squeeze(_input, [1]) for _input in inputs]
'''
Instead of writing the neuralnet manually, use seq2seq.rnn_decoder.
In test time, the predicted output is fed back to RNN instead of
gold target output like in training time.
'''
def loop(prev, _):
prev = tf.matmul(prev, softmax_w) + softmax_b
# Wow, this stop_gradient is cool
prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
return tf.nn.embedding_lookup(embeddings, prev_symbol)
outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if self.infer else None, scope=MODEL_SCOPE)
# Concat each sequence of the batch
output = tf.reshape(tf.concat(1, outputs), [-1, self.rnn_size]) # now (batch_size x seq_length) x rnn_size
self.logits = tf.matmul(output, softmax_w) + softmax_b
self.probs = tf.nn.softmax(self.logits)
loss = seq2seq.sequence_loss_by_example( \
[self.logits], \
[tf.reshape(self.targets, [-1])], \
[tf.ones([self.batch_size * self.seq_length])])
self.cost = tf.reduce_sum(loss) / (self.batch_size * self.seq_length)
self.final_state = last_state
if not self.is_train:
return
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.grad_clip)
optimizer = tf.train.AdamOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def sample(self, sess, ivocab, vocab, num=500, prime=" ", sampling_type=1):
state = self.cell.zero_state(1, tf.float32).eval()
for char in prime[:-1]:
x = numpy.zeros((1, 1))
x[0, 0] = vocab[char]
feed = {self.input_data: x, self.initial_state: state}
[state] = sess.run([self.final_state], feed)
def weighted_pick(weights):
t = numpy.cumsum(weights)
s = numpy.sum(weights)
return int(numpy.searchsorted(t, numpy.random.rand(1) * s))
ret = prime
char = prime[-1]
for n in range(num):
x = numpy.zeros((1, 1))
x[0, 0] = vocab[char]
feed = {self.input_data: x, self.initial_state: state}
[probs, state] = sess.run([self.probs, self.final_state], feed)
p = probs[0]
if sampling_type == 0:
sample = numpy.argmax(p)
elif sampling_type == 2:
if char == " ":
sample = weighted_pick(p)
else:
sample = numpy.argmax(p)
else:
sample = weighted_pick(p)
pred = ivocab[sample]
ret += pred
char = pred
return ret