-
Notifications
You must be signed in to change notification settings - Fork 0
/
context_rnn.py
121 lines (96 loc) · 3.75 KB
/
context_rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import math
from tensorflow.models.rnn.rnn_cell import RNNCell
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variable_scope as vs
def glorot_initializer(in_size, out_size):
"""
Normalized initialization proposed for variance stabilization per layer
Links:
Understanding the difficulty of training deep feedforward neural networks
http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
"""
width = math.sqrt(6.0 / (in_size + out_size))
return init_ops.random_uniform_initializer(-width, width)
class SCRNNCell(RNNCell):
"""
Tensor Flow port of Structurally Constrained Recurrent Neural Network model
Links:
Learning Longer Memory in Recurrent Neural Networks
http://arxiv.org/abs/1412.7753
Original implentation in Torch
https://github.com/facebook/SCRNNs
"""
def __init__(self, batch_size, input_size,
hidden_size, context_size, alpha):
self._input_size = input_size
self._hidden_size = hidden_size
self._context_size = context_size
self._batch_size = batch_size
self._alpha = alpha
@property
def state_size(self):
return self._hidden_size + self._context_size
@property
def output_size(self):
return self._hidden_size + self._context_size
def __call__(self, inputs, state, scope=None):
# state = [h_(t-1), s_(t-1)]
hidden_state = array_ops.slice(
state, begin=(0, 0),
size=(self._batch_size, self._hidden_size)
)
context_state = array_ops.slice(
state, begin=(0, self._hidden_size),
size=(self._batch_size, self._context_size)
)
with vs.variable_scope(scope or type(self).__name__):
B = vs.get_variable(
'B_matrix', shape=[self._input_size, self._context_size],
initializer=glorot_initializer(
in_size=self._input_size,
out_size=self._context_size
)
)
A = vs.get_variable(
'A_matrix', shape=[self._input_size, self._hidden_size],
initializer=glorot_initializer(
in_size=self._input_size,
out_size=self._hidden_size
)
)
R = vs.get_variable(
'R_matrix', shape=[self._hidden_size, self._hidden_size],
initializer=glorot_initializer(
in_size=self._hidden_size,
out_size=self._hidden_size
)
)
P = vs.get_variable(
'P_matrix', shape=[self._context_size, self._hidden_size],
initializer=glorot_initializer(
in_size=self._context_size,
out_size=self._hidden_size
)
)
bias_term = vs.get_variable(
'Bias', shape=[self._hidden_size],
initializer=init_ops.constant_initializer(
value=0.0
)
)
new_context = (1.0 - self._alpha) * math_ops.matmul(inputs, B) + self._alpha * context_state
# TODO: math_ops.batch_matmul?
# math_ops.tanh, nn_ops.softsign
new_hidden = nn_ops.elu(
math_ops.matmul(new_context, P) +
math_ops.matmul(inputs, A) +
math_ops.matmul(hidden_state, R) +
bias_term
)
new_state = array_ops.concat(
values=[new_hidden, new_context], concat_dim=1
)
return new_state, new_state