def cal_hidden_state(self, test, layernum): if layernum == 0: acx = test else: acx = get_activations_single_layer(self.model, np.array([test]), self.layerName(layernum - 1)) units = int( int(self.model.layers[layernum].trainable_weights[0].shape[1]) / 4) # print("No units: ", units) # get weight W = self.model.layers[layernum].get_weights()[0] U = self.model.layers[layernum].get_weights()[1] b = self.model.layers[layernum].get_weights()[2] W_i = W[:, :units] W_f = W[:, units:units * 2] W_c = W[:, units * 2:units * 3] W_o = W[:, units * 3:] U_i = U[:, :units] U_f = U[:, units:units * 2] U_c = U[:, units * 2:units * 3] U_o = U[:, units * 3:] b_i = b[:units] b_f = b[units:units * 2] b_c = b[units * 2:units * 3] b_o = b[units * 3:] # calculate the hidden state value h_t = np.zeros((self.imagesize, units)) c_t = np.zeros((self.imagesize, units)) f_t = np.zeros((self.imagesize, units)) h_t0 = np.zeros((1, units)) c_t0 = np.zeros((1, units)) for i in range(0, self.imagesize): f_gate = hard_sigmoid( np.dot(acx[i, :], W_f) + np.dot(h_t0, U_f) + b_f) i_gate = hard_sigmoid( np.dot(acx[i, :], W_i) + np.dot(h_t0, U_i) + b_i) o_gate = hard_sigmoid( np.dot(acx[i, :], W_o) + np.dot(h_t0, U_o) + b_o) new_C = np.tanh(np.dot(acx[i, :], W_c) + np.dot(h_t0, U_c) + b_c) c_t0 = f_gate * c_t0 + i_gate * new_C h_t0 = o_gate * np.tanh(c_t0) c_t[i, :] = c_t0 h_t[i, :] = h_t0 f_t[i, :] = f_gate return [h_t, c_t, f_t]
def cal_hidden_state(self, test, layer): acx = get_activations_single_layer(self.model, np.array([test]), self.layerName(0)) acx = np.squeeze(acx) units = int( int(self.model.layers[1].trainable_weights[0].shape[1]) / 4) # print("No units: ", units) # lstm_layer = model.layers[1] W = self.model.layers[1].get_weights()[0] U = self.model.layers[1].get_weights()[1] b = self.model.layers[1].get_weights()[2] W_i = W[:, :units] W_f = W[:, units:units * 2] W_c = W[:, units * 2:units * 3] W_o = W[:, units * 3:] U_i = U[:, :units] U_f = U[:, units:units * 2] U_c = U[:, units * 2:units * 3] U_o = U[:, units * 3:] b_i = b[:units] b_f = b[units:units * 2] b_c = b[units * 2:units * 3] b_o = b[units * 3:] # calculate the hidden state value h_t = np.zeros((self.max_review_length, units)) c_t = np.zeros((self.max_review_length, units)) f_t = np.zeros((self.max_review_length, units)) h_t0 = np.zeros((1, units)) c_t0 = np.zeros((1, units)) for i in range(0, self.max_review_length): f_gate = hard_sigmoid( np.dot(acx[i, :], W_f) + np.dot(h_t0, U_f) + b_f) i_gate = hard_sigmoid( np.dot(acx[i, :], W_i) + np.dot(h_t0, U_i) + b_i) o_gate = hard_sigmoid( np.dot(acx[i, :], W_o) + np.dot(h_t0, U_o) + b_o) new_C = np.tanh(np.dot(acx[i, :], W_c) + np.dot(h_t0, U_c) + b_c) c_t0 = f_gate * c_t0 + i_gate * new_C h_t0 = o_gate * np.tanh(c_t0) c_t[i, :] = c_t0 h_t[i, :] = h_t0 f_t[i, :] = f_gate return h_t, c_t, f_t
def get_mlp_model(n_in, n_out, n_layers=2, n_hidden=50): assert n_layers >= 2, '`n_layers` should be greater than 1 (otherwise it is just an mlp)' # initialize weights weights = [utils.get_weights('w_1', n_in, n_hidden)] weights += [utils.get_weights('w_%d' % i, n_hidden, n_hidden) for i in range(2, n_layers)] weights += [utils.get_weights('w_%d' % n_layers, n_hidden, n_out)] # initialize biases biases = [utils.get_weights('b_%d' % i, n_hidden) for i in range(1, n_layers)] biases += [utils.get_weights('b_%d' % n_layers, n_out)] # binarized versions deterministic_binary_weights = [utils.binarize(w, mode='deterministic') for w in weights] stochastic_binary_weights = [utils.binarize(w, mode='stochastic') for w in weights] # variables lr = T.scalar(name='learning_rate') X = T.matrix(name='X', dtype=theano.config.floatX) y = T.matrix(name='y', dtype=theano.config.floatX) # generate outputs of mlps d_outs = [utils.hard_sigmoid(T.dot(X, deterministic_binary_weights[0]) + biases[0])] for w, b in zip(deterministic_binary_weights[1:], biases[1:]): d_outs.append(utils.hard_sigmoid(T.dot(d_outs[-1], w) + b)) s_outs = [utils.hard_sigmoid(T.dot(X, stochastic_binary_weights[0]) + biases[0])] for w, b in zip(stochastic_binary_weights[1:], biases[1:]): s_outs.append(utils.hard_sigmoid(T.dot(s_outs[-1], w) + b)) # cost function (see utils) cost = utils.get_cost((s_outs[-1]+1.)/2., (y+1.)/2., mode='mse') # get the update functions params = weights + biases grads = [T.grad(cost, p) for p in stochastic_binary_weights + biases] updates = [(p, T.clip(p - lr * g, -1, 1)) for p, g in zip(params, grads)] # generate training and testing functions train_func = theano.function([X, y, lr], [cost], updates=updates) test_func = theano.function([X], [d_outs[-1]]) grads_func = theano.function([X, y], grads) int_output_func = theano.function([X], s_outs + d_outs) return train_func, test_func, grads_func, weights + biases, int_output_func
def cal_hidden_state(self, test, layernum): if layernum == 0: acx = np.array(test) else: acx = get_activations_single_layer(self.model, np.array(test), self.layerName(layernum - 1)) units = int( int(self.model.layers[layernum].trainable_weights[0].shape[1]) / 4) W = self.model.layers[layernum].get_weights()[0] U = self.model.layers[layernum].get_weights()[1] b = self.model.layers[layernum].get_weights()[2] h_t0 = np.zeros((acx.shape[0], 1, units)) c_t0 = np.zeros((acx.shape[0], 1, units)) s_t = np.tensordot(acx, W, axes=([2], [0])) + np.tensordot( h_t0, U, axes=([2], [0])) + b i = hard_sigmoid(s_t[:, :, :units]) f = hard_sigmoid(s_t[:, :, units:units * 2]) _c = np.tanh(s_t[:, :, units * 2:units * 3]) o = hard_sigmoid(s_t[:, :, units * 3:]) c_t = i * _c + f * c_t0 h_t = o * np.tanh(c_t) # h_t0 = np.zeros(( 1, units)) # c_t0 = np.zeros(( 1, units)) # s_t = np.dot(acx, W) + np.dot(h_t0, U) + b # i = hard_sigmoid(s_t[:, :units]) # f = hard_sigmoid(s_t[:, units: units * 2]) # _c = np.tanh(s_t[:, units * 2: units * 3]) # o = hard_sigmoid(s_t[:, units * 3:]) # c_t = i*_c + f*c_t0 # h_t = o*np.tanh(c_t) return h_t, c_t, f