def test_ops(self): x = K.variable(np.random.rand(8, 12)) y = K.variable(np.random.rand(12, 25)) z = K.placeholder((25, 18, 13)) w = K.placeholder((18, 18)) # ====== dot ====== # t = K.dot(x, y) self.assertEquals(K.get_shape(t), (8, 25)) self.assertEquals(K.get_shape(t), K.eval(t).shape) t = K.dot(t, K.dimshuffle(z, (1, 0, 2))) self.assertEquals(K.get_shape(t), (8, 18, 13)) # ====== transpose ====== # self.assertEquals(K.get_shape(K.transpose(z)), (13, 18, 25)) self.assertEquals(K.get_shape(K.transpose(t, axes=(2, 0, 1))), (13, 8, 18)) # ====== eye ====== # self.assertEquals(K.get_shape(K.eye(5)), K.eval(K.eye(5)).shape) # ====== diag ====== # self.assertEquals(K.get_shape(K.diag(w)), (18, )) # self.assertEquals(K.get_shape(K.diag(x)), # K.eval(K.diag(y)).shape) self.assertEquals(K.get_shape(K.square(x)), K.eval(K.square(x)).shape) self.assertEquals(K.get_shape(K.abs(x)), K.eval(K.abs(x)).shape) self.assertEquals(K.get_shape(K.sqrt(x)), K.eval(K.sqrt(x)).shape) self.assertEquals(K.get_shape(K.exp(x)), K.eval(K.exp(x)).shape) self.assertEquals(K.get_shape(K.log(x)), K.eval(K.log(x)).shape) self.assertEquals(K.get_shape(K.round(x)), K.eval(K.round(x)).shape) self.assertEquals(K.get_shape(K.pow(x, 2)), K.eval(K.pow(x, 2)).shape) self.assertEquals(K.get_shape(K.clip(x, -1, 1)), K.eval(K.clip(x, -1, 1)).shape) self.assertEquals(K.get_shape(K.inv(x)), K.eval(K.inv(x)).shape)
def get_mean_logsigma(self, x): b_mean = 0. if not hasattr(self, 'b_mean') else self.b_mean b_logsigma = 0. if not hasattr(self, 'b_logsigma') else self.b_logsigma mean = self.activation(K.dot(x, self.W_mean) + b_mean) logsigma = self.activation(K.dot(x, self.W_logsigma) + b_logsigma) mean.name = 'variational_mean' logsigma.name = 'variational_logsigma' add_role(mean, VARIATIONAL_MEAN) add_role(logsigma, VARIATIONAL_LOGSIGMA) return mean, logsigma
def _rnn(self, X, h0, mask=None): ##################################### # X: sequences inputs (included bias) # init: prev_states # W: concatenated [W_update, W_reset] # mask: mask inputs (optional) prev_states = h0 nb_units = self.num_units # hidden connection of all gates and states update hid_connection = K.dot(prev_states, self.W_hid) # hidden to hidden connection hid_gate = _slice_x(hid_connection, slice(None, nb_units * 2)) X_gate = _slice_x(X, slice(None, nb_units * 2)) b_gate = 0 if self.b_init is None else _slice_x( self.b, slice(None, nb_units * 2)) # states hid_states = _slice_x(hid_connection, slice(nb_units * 2, None)) X_states = _slice_x(X, slice(nb_units * 2, None)) b_states = 0 if self.b_init is None else _slice_x( self.b, slice(nb_units * 2, None)) # new gates _ = self.gate_activation(X_gate + hid_gate + b_gate) update_values = _slice_x(_, slice(None, nb_units)) reset_values = _slice_x(_, slice(nb_units, nb_units * 2)) # calculate new gates new_states = self.activation(X_states + reset_values * hid_states + b_states) # final new states next_states = (new_states * update_values + prev_states * (1 - update_values)) # mask the next state if mask is not None: next_states = K.switch(mask, next_states, prev_states) return next_states
def _apply(self, X, h0=None, mask=None, **kwargs): input_shape = K.get_shape(X) # ====== check mask ====== # if mask is not None and (K.ndim(mask) != K.ndim(X) - 1 or K.get_shape(mask)[-1] != input_shape[1]): raise Exception( 'Mask must has "%d" dimensions and the time dimension ' '(i.e. the second dimension) must equal to "%d"' ', but the given mask has shape "%s".' % (K.ndim(X) - 1, input_shape[1], K.get_shape(mask))) # ====== initialize states ====== # h0 = _check_rnn_hidden_states(h0, self, input_shape, 'h0') # turn off repeat_states if batch_size already included if K.get_shape(h0)[0] != 1: self.repeat_states = False # ====== precompute input ====== # X = K.dot(X, self.W_in) if self.input_mode != 'skip' else X if self.input_mode == 'norm': # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) out = self._rnn(X, h0=h0, mask=mask, **self.get_recurrent_info(kwargs)) for i in out: K.add_shape(i, shape=tuple(input_shape[:-1]) + (self.num_units, )) # only care about the first state return out[0] if len(out) == 1 else out
def _time_step(o, ids): ctx = X[:, ids:ids + self.n_time_context, :] ctx.set_shape((ctx.shape[0], self.n_time_context, ctx.shape[2])) ctx = tf.reshape(ctx, shape=(-1, ctx.shape[2])) # applying deep dense network for l in range(self.n_layers): ctx = K.dot(ctx, self.get('W%d' % l)) if self.b_init is not None: ctx = ctx + self.get('b%d' % l) ctx = self.activation[l](ctx) ctx = tf.reshape(ctx, shape=(-1, self.n_time_context, new_feat_dim)) # applying pooling if self.time_pool in ('concat', 'none'): ctx = tf.reshape(ctx, shape=(tf.shape(ctx)[0], self.n_time_context * new_feat_dim)) elif self.time_pool == 'max': ctx = tf.reduce_max(ctx, axis=1) elif self.time_pool == 'min': ctx = tf.reduce_min(ctx, axis=1) elif self.time_pool == 'sum': ctx = tf.reduce_sum(ctx, axis=1) elif self.time_pool == 'avg': ctx = tf.reduce_mean(ctx, axis=1) elif self.time_pool == 'stat': mean, var = tf.nn.moments(ctx, axes=1) ctx = tf.concat([mean, tf.sqrt(var)], -1) return ctx
def _apply(self, x): input_shape = K.get_shape(x) # calculate projection activation = K.dot(x, self.W) if hasattr(self, 'b') and self.b is not None: activation = activation + self.b # set shape for output K.add_shape(activation, input_shape[:-1] + (self.num_units, )) # Nonlinearity might change the shape of activation activation = self.activation(activation) return activation
def _apply(self, X, h0=None, c0=None, mask=None): batch_size = K.get_shape(X, native=True)[0] is_bidirectional = self.direction_mode == 'bidirectional' input_mode = ('skip' if self.input_mode == 'skip' or self.input_mode == 'norm' else 'linear') # ====== precompute input ====== # # linear or norm input mode if self.input_mode == 'norm': X = K.dot(X, self.W_in) # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) # cudnnRNN doesnt' support multiple inputs shapeX = K.get_shape(X, native=True) ndims = K.ndim(X) if 'rnn' in self.rnn_mode: N = 1 elif self.rnn_mode == 'gru': N = 3 else: N = 4 newshape = [shapeX[i] for i in range(ndims - 1)] + [self.num_units, N] X = K.mean(K.reshape(X, newshape), axis=-1) # ====== hidden state ====== # num_layers = self.num_layers * 2 if is_bidirectional else self.num_layers require_shape = (num_layers, batch_size, self.num_units) h0 = _check_cudnn_hidden_init(h0, require_shape, self, 'h0') c0 = _check_cudnn_hidden_init(c0, require_shape, self, 'c0') # ====== parameters ====== # if self.params_split: parameters = K.concatenate([ K.flatten(i, outdim=1) for i in self.parameters if not has_roles(i, INITIAL_STATE) ]) else: parameters = self.params # ====== return CuDNN RNN ====== # results = K.rnn_dnn(X, hidden_size=self.num_units, rnn_mode=self.rnn_mode, num_layers=self.num_layers, parameters=parameters, h0=h0, c0=c0, input_mode=input_mode, direction_mode=self.direction_mode, dropout=self.dropout, name=self.name) if not self.return_states: results = results[0] # only get the output return results
def _apply(self, X, h0=None, c0=None, mask=None, **kwargs): # check input_shape input_shape = K.get_shape(X) # ====== check mask ====== # if mask is not None and (K.ndim(mask) != 2 or K.get_shape(mask)[-1] != input_shape[1]): raise Exception('Mask must be a 2-D matrix and the time dimension ' '(i.e. the second dimension) must equal to "%d"' ', but the given mask has shape "%s".' % (input_shape[1], K.get_shape(mask))) # add broadcastable dimension for mask if mask is not None: mask = K.expand_dims(mask, dim=-1) # ====== initialize states ====== # # hidden states h0 = _check_rnn_hidden_states(h0, self, input_shape, 'h0') c0 = _check_rnn_hidden_states(c0, self, input_shape, 'c0') # turn off repeat_states if batch_size already included if K.get_shape(h0)[0] != 1 and K.get_shape(c0)[0] != 1: self.repeat_states = False # ====== precompute input ====== # # linear or norm input mode if self.input_mode != 'skip': X = K.dot(X, self.W_in) if self.input_mode == 'norm': # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) # skip input elif input_shape[-1] == self.num_units: X = K.repeat(X, 4, axes=-1) # ====== compute recurrent output ====== # out = self._rnn(X, h0=h0, c0=c0, mask=mask, **self.get_recurrent_info(kwargs)) if not self.return_cell_memory: out = out[:-1] for i in out: K.add_shape(i, shape=input_shape[:-1] + (self.num_units, )) # only care about the first state return out[0] if len(out) == 1 else out
def test_linear_algebra_value(self): np.random.seed(1208) x = K.variable(np.random.randn(2, 4, 3)) y = K.variable(np.random.rand(1, 2, 3, 5)) z = K.dot(x, y) self.assertEqual(K.get_shape(z), (2, 4, 1, 2, 5)) self.assertEqual( repr(np.sum(K.eval(z)))[:8], "-1.0198305134529524"[:8]) np.random.seed(1208) x = K.variable(np.random.randn(100, 3, 4, 5)) y = K.variable(np.random.rand(100, 12, 5, 6)) z = K.batched_dot(x, y) self.assertEqual(K.get_shape(z), K.eval(z).shape) self.assertEqual(repr(K.eval(z).sum())[:7], "1655.44")
def _rnn(self, X, h0, c0, mask=None): ##################################### # X: sequences inputs (included bias) # init: prev_states # W: concatenated [W_update, W_reset] # mask: mask inputs (optional) prev_states = h0 prev_memory = c0 nb_units = self.num_units # hidden to hidden connection bias = 0 if self.b_init is None else self.b _ = X + K.dot(prev_states, self.W_hid) + bias hid_input = _slice_x(_, slice(None, nb_units)) hid_forget = _slice_x(_, slice(nb_units, nb_units * 2)) hid_hidden = _slice_x(_, slice(nb_units * 2, nb_units * 3)) hid_output = _slice_x(_, slice(nb_units * 3, None)) # peepholes connection if hasattr(self, 'peepholes'): hid_input += prev_memory * _slice_x(self.peepholes, slice(None, nb_units)) hid_forget += prev_memory * _slice_x(self.peepholes, slice(nb_units, nb_units * 2)) # calculate new gates input_gate = self.gate_activation(hid_input) forget_gate = self.gate_activation(hid_forget) new_memory = self.activation(hid_hidden) # next cell memory next_memory = (forget_gate * prev_memory + input_gate * new_memory) # output gate if hasattr(self, 'peepholes'): hid_output += next_memory * _slice_x(self.peepholes, slice(nb_units * 2, None)) output_gate = self.gate_activation(hid_output) # new hidden state next_states = output_gate * self.activation(next_memory) # mask the next state if mask is not None: next_states = K.switch(mask, next_states, prev_states) next_memory = K.switch(mask, next_memory, prev_memory) return next_states, next_memory
def test_computational_graph2(self): np.random.seed(1208) X = K.variable(np.zeros((8, 12)), name='X') Y = K.variable(np.random.rand(12, 8), name='Y') Z = K.placeholder(shape=(8, 8), name='Z') a = K.dot(X, Y) add_roles(a, Auxiliary) a = a + Z g1 = K.ComputationGraph(a) self.assertEqual(len(g1.trainable_variables), 2) self.assertEqual(len(g1.placeholders), 1) self.assertEqual(len(g1.updates), 1) self.assertEqual(len(g1.auxiliary_variables), 1) f = K.function(Z, [a] + g1.auxiliary_variables) output = f(np.random.rand(8, 8)) self.assertEqual(repr(np.sum(output[0]))[:5], "32.20") self.assertEqual(np.sum(output[1]), 0) self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
def _rnn(self, X, h0, mask=None): bias = 0. if self.b_init is None else self.b next_states = self.activation(X + K.dot(h0, self.W_hid) + bias) if mask is not None: next_states = K.switch(mask, next_states, h0) return next_states
import numpy as np from odin.utils import UnitTimer, Progbar from odin import backend as K, nnet as N X1 = K.placeholder(shape=(10000, 1000), name='X1') X2 = K.placeholder(shape=(10000, 1000), name='X2') X3 = K.placeholder(shape=(10000, 2000), name='X3') y1 = K.placeholder(shape=(1000, 2000), name='y1') y2 = K.placeholder(shape=(2000, 3000), name='y2') y3 = K.placeholder(shape=(3000, 4000), name='y3') y4 = K.placeholder(shape=(4000, 5000), name='y4') z = K.dot(X1, y1) + K.dot(X2, y1) z = K.dot(z, y2) z = K.dot(z, y3) z = K.dot(z, y4) print(z) f = K.function([X1, X2, y1, y2, y3, y4], outputs=z) X1 = X3[:, :1000] X2 = X3[:, 1000:] z1 = K.dot(X1, y1) + K.dot(X2, y1) z1 = K.dot(z1, y2) z1 = K.dot(z1, y3) z1 = K.dot(z1, y4) print(z1) f1 = K.function([X3, y1, y2, y3, y4], outputs=z1)