def unit(parent_x, child_h, child_c, child_exists): (h_i, h_o, h_u), _ = Ui, Uo, Uu, h, exists: (exists *, h), exists * Uo, h), exists *, h)), sequences=[ self.U_i, self.U_o, self.U_u, child_h, child_exists ]) i = T.nnet.sigmoid(, parent_x) + h_i.sum(axis=0) + self.b_i) o = T.nnet.sigmoid(, parent_x) + h_o.sum(axis=0) + self.b_o) u = T.tanh(, parent_x) + h_u.sum(axis=0) + self.b_u) def _sub_f(U): sub_h_f, _ = fn=lambda sub_U, h, exists: exists *, h), sequences=[U, child_h, child_exists]) return sub_h_f.sum(axis=0) h_f, _ = U: _sub_f(U), sequences=[self.U_f]) f = (T.nnet.sigmoid(, parent_x).dimshuffle('x', 0) + h_f + self.b_f.dimshuffle('x', 0)) * child_exists.dimshuffle(0, 'x')) c = i * u + T.sum(f * child_c, axis=0) h = o * T.tanh(c) return h, c
def unit(parent_x, child_h, child_c, child_exists): (h_i, h_o, h_u), _ = fn=lambda Ui, Uo, Uu, h, exists: (exists *, h), exists *, h), exists *, h)), sequences=[self.U_i, self.U_o, self.U_u, child_h, child_exists]) i = T.nnet.sigmoid(, parent_x) + h_i.sum(axis=0) + self.b_i) o = T.nnet.sigmoid(, parent_x) + h_o.sum(axis=0) + self.b_o) u = T.tanh(, parent_x) + h_u.sum(axis=0) + self.b_u) def _sub_f(U): sub_h_f, _ = fn=lambda sub_U, h, exists: exists *, h), sequences=[U, child_h, child_exists]) return sub_h_f.sum(axis=0) h_f, _ = fn=lambda U: _sub_f(U), sequences=[self.U_f]) f = (T.nnet.sigmoid(, parent_x).dimshuffle('x', 0) + h_f + self.b_f.dimshuffle('x', 0)) * child_exists.dimshuffle(0, 'x')) c = i * u + T.sum(f * child_c, axis=0) h = o * T.tanh(c) return h, c
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends, y_ends): mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2)))) initial_state = T.log(T.zeros_like(labels)) initial_state = T.set_subtensor(initial_state[:, 0], 0) def select_probabilities(rnn_outputs, label): return rnn_outputs[:, label] rnn_outputs, _ =, [original_rnn_outputs, labels]) rnn_outputs = T.log(rnn_outputs.dimshuffle((1, 0, 2))) def forward_step(probabilities, last_probabilities): all_forward_probabilities = T.stack( last_probabilities + probabilities, log_shift_matrix(last_probabilities, 1) + probabilities, log_shift_matrix(last_probabilities, 2) + probabilities + mask, ) result = func(all_forward_probabilities, 0) return result forward_probabilities, _ = theano.scan(fn=forward_step, sequences=rnn_outputs, outputs_info=initial_state) forward_probabilities = forward_probabilities.dimshuffle((1, 0, 2)) def compute_cost(forward_probabilities, x_end, y_end): return -func(forward_probabilities[x_end - 1, y_end - 2:y_end]) return, [forward_probabilities, x_ends, y_ends])[0]
def compute_cost_log_in_parallel(original_rnn_outputs, labels, func, x_ends, y_ends): mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2)))) initial_state = T.log(T.zeros_like(labels)) initial_state = T.set_subtensor(initial_state[:,0], 0) def select_probabilities(rnn_outputs, label): return rnn_outputs[:,label] rnn_outputs, _ =, [original_rnn_outputs, labels]) rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2))) def forward_step(probabilities, last_probabilities): all_forward_probabilities = T.stack( last_probabilities + probabilities, log_shift_matrix(last_probabilities, 1) + probabilities, log_shift_matrix(last_probabilities, 2) + probabilities + mask, ) result = func(all_forward_probabilities, 0) return result forward_probabilities, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = initial_state) forward_probabilities = forward_probabilities.dimshuffle((1,0,2)) def compute_cost(forward_probabilities, x_end, y_end): return -func(forward_probabilities[x_end-1,y_end-2:y_end]) return, [forward_probabilities, x_ends, y_ends])[0]
def compute_cost_with_cross_entropy_in_parallel(original_rnn_outputs, labels, x_ends, y_ends): mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2)))) arange = T.arange(labels.shape[1]) initial_state = T.log(T.zeros_like(labels)) initial_state = T.set_subtensor(initial_state[:, 0], 0) def select_probabilities(rnn_outputs, label): return rnn_outputs[:, label] rnn_outputs, _ =, [original_rnn_outputs, labels]) rnn_outputs = T.log(rnn_outputs.dimshuffle((1, 0, 2))) def forward_step(probabilities, last_probabilities): all_forward_probabilities = T.stack( last_probabilities + probabilities, log_shift_matrix(last_probabilities, 1) + probabilities, log_shift_matrix(last_probabilities, 2) + probabilities + mask, ) max_probability, backlink = T.max_and_argmax(all_forward_probabilities, 0) backlink = arange - backlink return max_probability, backlink results, _ = theano.scan(fn=forward_step, sequences=rnn_outputs, outputs_info=[initial_state, None]) forward_probabilities, backward_pointers = results def compute_cost(rnn_outputs, forward_probabilities, backward_pointers, x_end, y_end, label): def backward_step(backlinks, position): new_position = backlinks[position] return new_position, position initial_state = T.argmax( forward_probabilities[x_end - 1, y_end - 2:y_end]) + y_end - 2 results, _ = theano.scan(fn=backward_step, sequences=backward_pointers[0:x_end, :], outputs_info=[initial_state, None], go_backwards=True) alignment = label[results[1][::-1]] return aggregate(categorical_crossentropy(rnn_outputs[0:x_end], alignment), mode='sum') forward_probabilities = forward_probabilities.dimshuffle((1, 0, 2)) backward_pointers = backward_pointers.dimshuffle((1, 0, 2)) return, [ original_rnn_outputs, forward_probabilities, backward_pointers, x_ends, y_ends, labels ])[0]
def compute_objective_and_gradients(self, nSamp): hsamp = self.mrec.getSample(self.Y, nSamp) # evaluate the generative model density P_\theta(y_i , h_i) p_yh, _ =, sequences=hsamp) # evaluate the recognition model density Q_\phi(h_i | y_i) q_hgy, _ =, sequences=hsamp) ff = (p_yh - q_hgy) sortidx = ff.argsort(axis=0) fmax = ff[(sortidx[-1], T.arange(ff.shape[-1]))].dimshuffle('x', 0) f_hy = T.exp(ff - fmax) sum_across_samples = f_hy.sum(axis=0, keepdims=True) Lhat = T.log(sum_across_samples / nSamp) + fmax col_idx = T.arange(ff.shape[-1]) # This 1e-12 constant is for debugging nans # in other parts of code. We know we'll get # nans where we'll then overwrite. Use it with # compute cross-validated estimates of Lhat # nanguard mode. hold_out_except_last = T.log( (sum_across_samples - f_hy) / (nSamp - 1)) + fmax #+1e-12) + fmax f2max_vec = ff[(sortidx[-2], T.arange(ff.shape[-1]))] f2max = f2max_vec.dimshuffle('x', 0) # Do tricky things to keep the numerics in order (avoid a term being \approxeq 0) ff_nolast = T.set_subtensor(ff[(sortidx[-1], col_idx)], f2max_vec) f_hy_last = T.exp(ff_nolast - f2max) # compute held-out sum when we hold out the maximum element hold_out_last = T.log( (f_hy_last.sum(axis=0, keepdims=True) - f_hy_last) / (nSamp - 1)) + f2max # compute final held-out estimates hold_out = T.set_subtensor( hold_out_except_last[(sortidx[-1], col_idx)], hold_out_last[(sortidx[-1], col_idx)]) Lhat_cv = Lhat - hold_out the_ws = f_hy / sum_across_samples weighted_q = T.sum((Lhat_cv * q_hgy + the_ws * ff).mean(axis=1)) #weighted_q = T.sum((Lhat_cv*q_hgy + the_ws*(p_yh-q_hgy)).sum(axis=1)) # gradients for approximate posterior dqhgy = T.grad(cost=weighted_q, wrt=self.mrec.getParams(), consider_constant=([the_ws, Lhat_cv] + hsamp), disconnected_inputs='ignore') # gradients for prior dpyh = T.grad(cost=T.sum((the_ws * ff).mean(axis=1)), wrt=self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore') #dpyh = T.grad(cost=T.sum((the_ws*(p_yh-q_hgy)).sum(axis=1)), wrt = self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore') return [Lhat.mean(), dpyh, dqhgy]
def make_nade(D, z_dim): log("make_nade with D={},z_dim={},g={}".format(D, z_dim, g)) x = T.fmatrix('x') c_vals = np.random.normal(0, 1, size=(1, z_dim)).astype('float32') c = theano.shared(c_vals, name="c") p_x = 1 def a_adder(W_col_T, x_i, acc): = "W_col_T" prod = W_col_T * T.sum(x_i) = "prod" ret_T = acc.T + prod return ret_T.T """ for i in range(D): W_col_vals = np.random.normal(0,1,size=(z_dim,1)).astype('float32') W_col = theano.shared(W_col_vals,name="W_col_%d"%(i+1)) W_cols.append(W_col) """ W_vals = np.random.normal(0, 1, size=(z_dim, D)).astype('float32') W = theano.shared(W_vals, name="W") a_s_W, _u = theano.scan(fn=a_adder, outputs_info=c[0, :], sequences=[W.T, x]) a_s_excess = T.concatenate([c, a_s_W], axis=0) a_s = a_s_excess[:D, :] V_vals = np.random.normal(0, 1, size=(D, z_dim)).astype('float32') V = theano.shared(V_vals, name="V") hs = g(a_s) b_val = np.random.normal(0, 1, size=(D, 1)).astype('float32') b = theano.shared(b_val, name="b") def scan_p_x_cond(V_row, hi, b_i): p_x_cond = g(, hi) + b_i) return p_x_cond p_x_cond, _u =, sequences=[V, hs, b]) def scan_p_x_cond_obs(x_i, p): ret = x_i * p + (1 - x_i) * (1 - p) return ret p_x_cond_obs, _u =, sequences=[x, p_x_cond]) nll = -T.sum(T.log(p_x_cond_obs)) p_x = return (W, c, V, b), x, hs, p_x, nll, p_x_cond
def step(visible, filtered_hidden_mean_m1, filtered_hidden_cov_m1): A, B = transition, emission # (h, h), (h, v) # Shortcuts for the filtered mean and covariance from the previous # time step. f_m1 = filtered_hidden_mean_m1 # (n, h) F_m1 = filtered_hidden_cov_m1 # (n, h, h) # Calculate mean of joint. hidden_mean =, A) + hnm # (n, h) visible_mean =, B) + vnm # (n, v) # Calculate covariance of joint. hidden_cov = stacked_dot(A.T, stacked_dot(F_m1, A)) # (n, h, h) hidden_cov += hnc visible_cov = stacked_dot( # (n, v, v) B.T, stacked_dot(hidden_cov, B)) visible_cov += vnc visible_hidden_cov = stacked_dot(hidden_cov, B) # (n, h, v) visible_error = visible - visible_mean # (n, v) inv_visible_cov, _ = x: matrix_inverse(x), visible_cov) # (n, v, v) # I don't know a better name for this monster. visible_hidden_cov_T = visible_hidden_cov.dimshuffle(0, 2, 1) # (n, v, h) D = stacked_dot(inv_visible_cov, visible_hidden_cov_T) f = ( D * visible_error.dimshuffle(0, 1, 'x') # (n, h) ).sum(axis=1) f += hidden_mean F = hidden_cov F -= stacked_dot(visible_hidden_cov, D) log_l = ( inv_visible_cov * # (n,) visible_error.dimshuffle(0, 1, 'x') * visible_error.dimshuffle(0, 'x', 1)).sum(axis=(1, 2)) log_l *= -.5 dets, _ = x: det(x), visible_cov) log_l -= 0.5 * T.log(dets) log_l -= np.log(2 * np.pi) return f, F, log_l
def step(visible, filtered_hidden_mean_m1, filtered_hidden_cov_m1): A, B = transition, emission # (h, h), (h, v) # Shortcuts for the filtered mean and covariance from the previous # time step. f_m1 = filtered_hidden_mean_m1 # (n, h) F_m1 = filtered_hidden_cov_m1 # (n, h, h) # Calculate mean of joint. hidden_mean =, A) + hnm # (n, h) visible_mean =, B) + vnm # (n, v) # Calculate covariance of joint. hidden_cov = stacked_dot( A.T, stacked_dot(F_m1, A)) # (n, h, h) hidden_cov += hnc visible_cov = stacked_dot( # (n, v, v) B.T, stacked_dot(hidden_cov, B)) visible_cov += vnc visible_hidden_cov = stacked_dot(hidden_cov, B) # (n, h, v) visible_error = visible - visible_mean # (n, v) inv_visible_cov, _ = lambda x: matrix_inverse(x), visible_cov) # (n, v, v) # I don't know a better name for this monster. visible_hidden_cov_T = visible_hidden_cov.dimshuffle(0, 2, 1) # (n, v, h) D = stacked_dot(inv_visible_cov, visible_hidden_cov_T) f = (D * visible_error.dimshuffle(0, 1, 'x') # (n, h) ).sum(axis=1) f += hidden_mean F = hidden_cov F -= stacked_dot(visible_hidden_cov, D) log_l = (inv_visible_cov * # (n,) visible_error.dimshuffle(0, 1, 'x') * visible_error.dimshuffle(0,'x', 1)).sum(axis=(1, 2)) log_l *= -.5 dets, _ = x: det(x), visible_cov) log_l -= 0.5 * T.log(dets) log_l -= np.log(2 * np.pi) return f, F, log_l
def sym_histograms(self, X, masks=None): """ Encodes a set of objects (X is a tensor3) :param X: tensor3 containing the feature vectors for each object :return: """ if masks is None: histograms, updates =, sequences=(X, )) else: histograms, updates =, sequences=(X, masks)) return histograms
def createObjectiveFunction(self): ''' @escription: initialize objective function and minimization function @X,y data matrix/vector @u random noise for simulator @v standard normal for reparametrization trick ''' y = T.dvector("y") W, U = T.dvectors("W", "U") V = T.dscalar("V") mu = self.params[0] #logSigma = self.params[1] logSigma = sharedX(0.6) logLambda = sharedX(0) #self.params[2] negKL = 0.5 * self.dimTheta + 0.5 * T.sum(2 * logSigma - mu**2 - T.exp(logSigma)**2) results, updates =, sequences=[W, U], non_sequences=[V]) f = results results2, updates2 =, sequences=[W, U]) f2 = results2 #SSE = T.sum((y-f)**2) logLike = -self.m * ( 0.5 * np.log(2 * np.pi) + logLambda) - 0.5 * T.sum( (T.flatten(y) - T.flatten(f))**2) / (T.exp(logLambda)**2) #logLike2 = -self.m*(0.5 * np.log(2 * np.pi) + logLambda)-0.5*T.sum((y-f2)**2)/(T.exp(logLambda)**2) elbo = (negKL + logLike) #elbo2 = (negKL + logLike2) obj = -elbo #obj = SSE self.f = th.function([y, W, U, V], f, updates=updates, on_unused_input='ignore') self.lowerboundfunction = th.function([y, W, U, V], obj, updates=updates, on_unused_input='ignore') derivatives = T.grad(obj, self.params) self.gradientfunction = th.function([y, W, U, V], derivatives, updates=updates, on_unused_input='ignore')
def set_output(self): cshape = self.camera_params.shape # (c, batch, 11) voxel = self._input voxel_tiled = tensor.tile(voxel, (cshape[0], 1, 1, 1, 1)) cams = tensor.reshape(self.camera_params, (cshape[0] * cshape[1], cshape[2])) camlocs =, cams)[0].astype('float32') raydirs =, cams)[0].astype('float32') rendered = self.op(voxel_tiled, camlocs, raydirs) rendered_sub =, [rendered, cams])[0] output_shape = (cshape[0], cshape[1], self.img_h, self.img_w, self.feat_d) self._output = tensor.reshape(rendered_sub, output_shape) self._output = self._output.dimshuffle(0, 1, 4, 2, 3)
def pv_function(self, tensor_input): indexf_matrix = theano.shared( np.zeros( [self.max_length, self.max_length], dtype=np.int32 ), name = 'indexf_matrix', borrow=True ) pf_matrix = theano.shared( np.zeros( [self.max_length, self.max_length], dtype=theano.config.floatX ), name = 'pf_matrix', borrow=True ) pf_matrix = T.set_subtensor(pf_matrix[0, 0:tensor_input.shape[0]], 1.0) vf_matrix = theano.shared( np.zeros( (self.max_length, self.max_length, self.size), dtype=theano.config.floatX ), name = 'vf_matrix', borrow=True ) results, updates = fn = lambda i, L, t_tensor_input: L[t_tensor_input[i]], sequences=[T.arange(tensor_input.shape[0])], non_sequences=[self.L, tensor_input], name = 'vf_matrix prepare' ) vf_matrix = T.set_subtensor(vf_matrix[0, 0:tensor_input.shape[0]], results) for i in range(1,self.max_length): results, updates = fn = self._pv_function, sequences=[T.arange(self.max_length-i)], non_sequences = [i, pf_matrix, vf_matrix], #name = 'pv function' ) indexf_matrix = T.set_subtensor(indexf_matrix[i, 0:self.max_length-i], results[0]) pf_matrix = T.set_subtensor(pf_matrix[i, 0:self.max_length-i], results[1]) vf_matrix = T.set_subtensor(vf_matrix[i, 0:self.max_length-i], results[2]) return indexf_matrix, pf_matrix, vf_matrix
def call(self, x, mask): x_switched = K.switch(mask[:, :, None], x, 0.0) activation_ranks =, x_switched)[0] activation_energies = K.switch(mask[:, None, :maxsents], activation_ranks, -1e20) activation_weights =, activation_energies)[0] base_values = (mask * ((K.sum(mask[:, :maxsents] + 0.0, axis=-1))** -1)[:, None])[:, None, :maxsents] pad_weights = K.concatenate( (base_values, activation_weights[:, :-1, :]), axis=1) diff_weights = activation_weights - pad_weights posi_diffs = K.switch(diff_weights > 0, diff_weights, 0.0) norm_pds = (K.sum(posi_diffs, axis=-1) + K.epsilon())**-1 attentions = posi_diffs * norm_pds[:, :, None] return attentions
def decode_to_probs(self, activations, relative_position, low_bound, high_bound): squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH)) n_parallel = squashed.shape[0] probs = T.nnet.softmax(squashed) def _scan_fn(cprobs, cpos): if self.with_artic: abs_probs = cprobs[:2] rel_probs = cprobs[2:] else: rel_probs = cprobs abs_probs = T.ones((2,)) aligned = T.roll(rel_probs, (cpos-low_bound)%12) num_tile = int(math.ceil((high_bound-low_bound)/self.WINDOW_SIZE)) tiled = T.tile(aligned, (num_tile,))[:(high_bound-low_bound)] full = T.concatenate([abs_probs, tiled], 0) return full # probs = theano.printing.Print("probs",['shape'])(probs) # relative_position = theano.printing.Print("relative_position",['shape'])(relative_position) from_scan, _ =, sequences=[probs, T.flatten(relative_position)]) # from_scan = theano.printing.Print("from_scan",['shape'])(from_scan) newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0) fixed = T.reshape(from_scan, newshape, ndim=activations.ndim) return fixed
def apply(self, image, image_shape, location, scale): a, b = self.compute_hard_windows(image_shape, location, scale) if self.batched_window: patch = self.apply_inner(image, location, scale, a[0], b[0]) else: def map_fn(image, image_shape, a, b, location, scale): # apply_inner expects a batch axis image = T.shape_padleft(image) location = T.shape_padleft(location) scale = T.shape_padleft(scale) patch = self.apply_inner(image, location, scale, a, b) # return without batch axis return patch[0] patch, _ =, sequences=[image, a, b, location, scale]) savings = (1 - T.cast( (b - a).prod(axis=1), floatX) / self.add_auxiliary_variable(savings, name="savings") return patch
def get_output_for(self, input, **kwargs): def sample_one_image(img, y, x): return lambda x, y, image: image[:, y:(y + self.patch_size[0]), x: (x + self.patch_size[1])], sequences=[x, y], non_sequences=img)[0] if self.pad: shp = (input.shape[0], input.shape[1], input.shape[2] + self.patch_size[0] * 2 - 2, input.shape[3] + self.patch_size[1] * 2 - 2) padded_input = T.zeros(shp) padded_input = T.set_subtensor( padded_input[:, :, (self.patch_size[0] - 1):(-self.patch_size[0] + 1), (self.patch_size[1] - 1):(-self.patch_size[1] + 1)], input) input = padded_input y = self.rng.random_integers(size=(input.shape[0], self.patches_per_example), low=0, high=input.shape[2] - self.patch_size[0]) x = self.rng.random_integers(size=(input.shape[0], self.patches_per_example), low=0, high=input.shape[3] - self.patch_size[1]) return, sequences=[input, y, x])[0].reshape( (-1, input.shape[1], self.patch_size[0], self.patch_size[1]))
def get_reward_sequences(self, env_state_sessions, agent_action_sessions): """Computes the rewards given to agent at each time step for each batch. :param env_state_sessions: Environment state [batch_i,seq_i,state_units] history for all sessions. :type env_state_sessions: theano tensor [batch_i,seq_i,state_units] :param agent_action_sessions: Actions chosen by agent at each tick for all sessions. :type agent_action_sessions: int[batch_i,seq_i] :return rewards: What reward was given to an agent for corresponding action from state in that batch. :rtype: float[batch_i,seq_i] """ env_state_sessions = check_list(env_state_sessions) n_states = len(env_state_sessions) agent_action_sessions = check_list(agent_action_sessions) n_actions = len(agent_action_sessions) def compute_reward(batch_i, *args): session_states, session_actions = unpack_list(args, [n_states, n_actions]) return self.get_reward(session_states, session_actions, batch_i) sequences = [T.arange(agent_action_sessions[0].shape[0], ), ] + env_state_sessions + agent_action_sessions rewards, updates =, sequences=sequences) assert len(updates) == 0 return rewards.reshape(agent_action_sessions[0].shape) # reshape bach to original
def cosine_similarity(x, y, eps=1e-6): r""" Cosine similarity between a vector and each row of a base matrix. Parameters ---------- x: a 1D Theano variable Vector to compare to each row of the matrix y. y: a 2D Theano variable Matrix to be compared to eps: float Precision of the operation (necessary for differentiability). Return ------ z: a 1D Theano variable A vector whose components are the cosine similarities between x and each row of y. """ def _cosine_similarity(x, y, eps=1e-6): y = y.dimshuffle(1, 0) z =, y) z /= T.sqrt(T.sum(x * x) * T.sum(y * y, axis=0) + eps) return z def step(x_b, y_b): return _cosine_similarity(x_b, y_b, eps) z, _ =, sequences=[x, y]) return z
def gen_full_alignment(self): # Get only the focus columns for seq_name,sequence in self.seq_name_to_sequence.items(): # Replace periods with dashes (the uppercase equivalent) sequence = sequence.replace(".","-") #then get only the focus columns self.seq_name_to_sequence[seq_name] = [sequence[ix].upper() for ix in self.focus_cols] # Remove sequences that have bad characters alphabet_set = set(list(self.alphabet)) seq_names_to_remove = [] for seq_name,sequence in self.seq_name_to_sequence.items(): for letter in sequence: if letter not in alphabet_set and letter != "-": seq_names_to_remove.append(seq_name) seq_names_to_remove = list(set(seq_names_to_remove)) for seq_name in seq_names_to_remove: del self.seq_name_to_sequence[seq_name] # Encode the sequences print ("Encoding sequences") self.x_train = np.zeros((len(self.seq_name_to_sequence.keys()),len(self.focus_cols),len(self.alphabet))) self.x_train_name_list = [] for i,seq_name in enumerate(self.seq_name_to_sequence.keys()): sequence = self.seq_name_to_sequence[seq_name] self.x_train_name_list.append(seq_name) for j,letter in enumerate(sequence): if letter in self.aa_dict: k = self.aa_dict[letter] self.x_train[i,j,k] = 1.0 # Fast sequence weights with Theano if self.calc_weights: print ("Computing sequence weights") # Numpy version # import scipy # from scipy.spatial.distance import pdist, squareform # self.weights = scale / np.sum(squareform(pdist(seq_index_array, metric="hamming")) < theta, axis=0) # # Theano weights X = T.tensor3("x") cutoff = T.scalar("theta") X_flat = X.reshape((X.shape[0], X.shape[1]*X.shape[2])) N_list, updates = x: 1.0 / T.sum(, x) /, x) > 1 - cutoff), X_flat) weightfun = theano.function(inputs=[X, cutoff], outputs=[N_list],allow_input_downcast=True) # self.weights = weightfun(self.x_train, self.theta)[0] else: # If not using weights, use an isotropic weight matrix self.weights = np.ones(self.x_train.shape[0]) self.Neff = np.sum(self.weights) print ("Neff =",str(self.Neff)) print ("Data Shape =",self.x_train.shape)
def get_reward_sequences(self,env_state_sessions,agent_action_sessions): """ computes the rewards given to agent at each time step for each batch parameters: env_state_seq - environment state [batch_i,seq_i,state_units] history for all sessions agent_action_seq - int[batch_i,seq_i] returns: rewards float[batch_i,seq_i] - what reward was given to an agent for corresponding action from state in that batch """ def compute_reward(batch_i,session_states,session_actions): return self.get_reward(session_states,session_actions,batch_i) sequences = [ T.arange(env_state_sessions.shape[0],), env_state_sessions, agent_action_sessions, ] rewards,updates =, sequences=sequences) assert len(updates)==0 return rewards.reshape(agent_action_sessions.shape) #reshape bach to original
def connect(self, S): self.S = S def step(s_current, h_prev): h_t = self.activation(, self.W_ih) +, self.W_hh) ) y_t = self.activation(, self.W_ho) ) return h_t, y_t [self.H, self.output], _ = theano.scan( step, sequences = self.S, outputs_info = [self.h_init, None] ) self.prediction, _ = lambda x: T.argmax(x), sequences = self.output ) self.final_state = self.H[self.H.shape[0] - 1] self.outputter = theano.function([self.S], self.output) self.predicter = theano.function([self.S], self.prediction) self.CONNECTED = True
def gaussian_filter_2d_variable_sigma(input, sigmas, window_radius=None, border_mode='zero'): def filter_sigma(idx, kernel): dimpattern_w = ('x', 'x', 'x', 0) dimpattern_h = ('x', 'x', 0, 'x') filter_w = kernel.dimshuffle(dimpattern_w) blur_w = T.nnet.conv2d(padded_input[idx:idx + 1], filter_w, border_mode=_get_chained_w_h_conv_border( conv_border, 'w'), filter_shape=[1, 1, 1, None]) filter_h = kernel.dimshuffle(dimpattern_h) return T.nnet.conv2d(blur_w, filter_h, border_mode=_get_chained_w_h_conv_border( conv_border, 'h'), filter_shape=[1, 1, None, 1]) ndim = 4 assert input.ndim == ndim, \ "there must be {} dimensions, got {}".format(ndim, input.ndim) window_radius = gaussian_kernel_default_radius(sigmas, window_radius) padded_input, conv_border = add_border(input, window_radius, border_mode) kernel = gaussian_kernel_1d(sigmas, window_radius) blur, _ =, sequences=[T.arange(sigmas.shape[0]), kernel]) return blur.reshape(input.shape)
def getSample(self, Y, nSamp = 1): def get_layers(ii): output = lasagne.layers.get_output(lasagne.layers.get_all_layers(self.sbn_nn), inputs = Y) return output[::-1] output,_ =, T.arange(nSamp)) return output
def compile_vime_reward(l_prediction,l_prev_states,l_actions,weights, get_loss = lambda pred,real: T.mean((pred-real)**2), n_samples = 1, delta=0.01,**kwargs): """compiles a function that predicts vime reward for each state in a batch""" prev_states = T.matrix("previous states") actions = T.ivector("actions") next_states = T.matrix("next states") if n_samples ==1: get_bnn = lambda state,action: lasagne.layers.get_output(l_prediction, inputs={l_prev_states:state[None,:], l_actions:action[None]},**kwargs) else: get_bnn = lambda state,action: sample_output(l_prediction, input_dict={l_prev_states:state[None,:], l_actions:action[None]}, n_samples=n_samples,**kwargs) vime_reward_per_state,auto_updates = s,a,s_next: get_r_vime_on_state(weights, get_loss(get_bnn(s,a),s_next), delta), sequences=[prev_states,actions,next_states]) return theano.function([prev_states,actions,next_states],vime_reward_per_state, updates=auto_updates,allow_input_downcast=True)
def test_function5(self): w = theano.shared(1.0, name="w") def joke(a, b): k = w * a # g = 0.01 * T.grad((k - 1)**2, w) return k, {w: w - 1.0} x = T.dscalar("x") hs, _ = theano.scan(joke, sequences=[np.array([1.0, 2.0, 3.0])], outputs_info=[np.float64(1.0)] ) print hs, _ def upd(h): return T.grad(hs[h], w) gs, up =, sequences=[T.arange(hs.shape[0])]) print gs, up # print hs, _ # print gs, up func = theano.function(inputs=[], outputs=gs, updates= []) print func() print w.get_value()
def build_validation(X, Y): def myscanfunc(ind): X_ = X[ind*megabatch_size:(ind+1)*megabatch_size] Y_ = Y[ind*megabatch_size:(ind+1)*megabatch_size] return simple_build_likelihood(X_, Y_, ind=T.mod(pool_ind+ind, param_pool_size)) result = (myscanfunc, sequences=[T.arange(T.max([1, X.shape[0]/megabatch_size]))]) [0] return T.sum(result)*dataset_size/X.shape[0]-_KLD
def compute_tree(self, emb_x, tree): self.recursive_unit = self.create_recursive_unit() self.leaf_unit = self.create_leaf_unit() num_nodes = tree.shape[0] # num internal nodes num_leaves = self.num_words - num_nodes # compute leaf hidden states leaf_h, _ =, sequences=[emb_x[:num_leaves]]) if self.irregular_tree: init_node_h = T.concatenate([leaf_h, leaf_h], axis=0) else: init_node_h = leaf_h # use recurrence to compute internal node hidden states def _recurrence(cur_emb, node_info, t, node_h, last_h): child_exists = node_info > -1 offset = num_leaves * int(self.irregular_tree) - child_exists * t child_h = node_h[node_info + offset] * child_exists.dimshuffle( 0, 'x') parent_h = self.recursive_unit(cur_emb, child_h, child_exists) node_h = T.concatenate( [node_h, parent_h.reshape([1, self.hidden_dim])]) return node_h[1:], parent_h dummy = theano.shared(self.init_vector([self.hidden_dim])) (_, parent_h), _ = theano.scan( fn=_recurrence, outputs_info=[init_node_h, dummy], sequences=[emb_x[num_leaves:], tree, T.arange(num_nodes)], n_steps=num_nodes) return T.concatenate([leaf_h, parent_h], axis=0)
def gaussian_filter_2d_variable_sigma(input, sigmas, window_radius=None, border_mode='zero' ): def filter_sigma(idx, kernel): dimpattern_w = ('x', 'x', 'x', 0) dimpattern_h = ('x', 'x', 0, 'x') filter_w = kernel.dimshuffle(dimpattern_w) blur_w = T.nnet.conv2d( padded_input[idx:idx+1], filter_w, border_mode=_get_chained_w_h_conv_border(conv_border, 'w'), filter_shape=[1, 1, 1, None]) filter_h = kernel.dimshuffle(dimpattern_h) return T.nnet.conv2d( blur_w, filter_h, border_mode=_get_chained_w_h_conv_border(conv_border, 'h'), filter_shape=[1, 1, None, 1]) ndim = 4 assert input.ndim == ndim, \ "there must be {} dimensions, got {}".format(ndim, input.ndim) window_radius = gaussian_kernel_default_radius(sigmas, window_radius) padded_input, conv_border = add_border(input, window_radius, border_mode) kernel = gaussian_kernel_1d(sigmas, window_radius) blur, _ = filter_sigma, sequences=[T.arange(sigmas.shape[0]), kernel]) return blur.reshape(input.shape)
def gradSort(met, X): # -----------Start Batch Loop------------ m = T.fvector() x = T.fmatrix() # Sort the input on the metric z = T.argsort(m, axis=0) out = x[z] + 0 * T.sum(m) sort = theano.function([m, x], [out]) # Fix the gradient of the sort operation to be the sum of # the gradients with respect to the input features def grad_edit(inps, grads): m, x = inps g, = grads z = T.argsort(m, axis=0) s = T.sum(g, axis=-1) am = T.max(abs(s), axis=-1) s = 10 * (s - T.clip(s, -.90 * am, .90 * am)) out = s return out, g op = theano.OpFromGraph([m, x], [out]) op.grad = grad_edit results, updates =, sequences=[met, X], name='batch_sort') # ---------END Batch Loop----------------- r = results return r
def attend(self, y_p): updates = self.default_updates() for g in range(self.attrs['glimpse']): for i in range(len(self.base)-1,-1,-1): factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1 B, C, I, h_p, _ = self.get(y_p, i, g) if i == len(self.base) - 1: z_i = self.distance(C, h_p) else: length = T.cast(T.max(T.sum(I,axis=0))+1,'int32') ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32') def pick(i_t, ext): pad = T.minimum(i_t+ext, B.shape[0]) - ext return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0) idx, _ =, sequences = [pos/factor], non_sequences = [ext]) idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero() C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2])) z_i = self.distance(C, h_p) I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1])) if i > 0: pos = T.argmax(self.softmax(z_i,I),axis=0) * factor ext = factor else: w_i = self.softmax(z_i,I) B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2])) proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0) for i in range(len(self.base)): self.glimpses[i].append(proto) return, self.custom_vars['W_att_in_0']), updates
def cross_cpu(self, entities): n, m = entities.shape pop = T.reshape(entities, (2, n * m / 2)) if self.fast_rng is None: xpoints = self.rng.random_integers(size=(n / 2, ), low=0, high=m - 1) else: xpoints = self.fast_rng.uniform(size=(n / 2, ), low=0, high=m - 1) xpoints = xpoints.astype('int32') def choice_vector(xpoint, nbits): return T.concatenate([ T.zeros((xpoint, ), dtype='uint8'), T.ones((nbits - xpoint, ), dtype='uint8') ]) values, updates =, sequences=[xpoints], non_sequences=[m], name='choice_vector_building') a = T.reshape(values, (n * m / 2, )) pop = T.concatenate([T.choose(a, pop), T.choose(1 - a, pop)]) pop = T.reshape(pop, (n, m)) return pop
def theano_scan_color(writer, draw_fn): with writer as writer_buf: writer_buf_reshaped = writer_buf.reshape((Screen.screen_vane_count, Screen.screen_max_magnitude, 3)) vane_matrix = [[[float(vane), float(vane), float(vane)] for px in range(Screen.screen_max_magnitude)] for vane in range(Screen.screen_vane_count)] px_matrix = [[[float(px),float(px),float(px)] for px in range(Screen.screen_max_magnitude)] for vane in range(Screen.screen_vane_count)] col_matrix = [[[float(0), float(1), float(2)] for px in range(Screen.screen_max_magnitude)] for vane in range(Screen.screen_vane_count)] vane_vec = T.as_tensor(vane_matrix) px_vec = T.as_tensor(px_matrix) col_vec = T.as_tensor(col_matrix) step = T.fscalar('step') draw_fn_with_step = draw_fn(step) f, _ =, [vane_vec, px_vec, col_vec]) fn_actual = theano.function([step], f, allow_input_downcast=True, on_unused_input='ignore') step_actual = 0 while True: writer.frame_ready() start = time.time() writer_buf_reshaped[:] = fn_actual(step_actual) step_actual -= 1 done = time.time() fps = 1.0/(done - start) if fps < TARGET_FPS: logging.warning('Frame rate is %f, which is lower than target %d', fps, TARGET_FPS)
def compute_tree(self, emb_x, tree): self.recursive_unit = self.create_recursive_unit() self.leaf_unit = self.create_leaf_unit() num_nodes = tree.shape[0] # num internal nodes num_leaves = self.num_words - num_nodes # compute leaf hidden states leaf_h, _ = fn=self.leaf_unit, sequences=[emb_x[:num_leaves]]) # use recurrence to compute internal node hidden states def _recurrence(cur_emb, node_info, t, node_h, last_h): child_exists = node_info > -1 child_h = node_h[node_info - child_exists * t] * child_exists.dimshuffle(0, 'x') parent_h = self.recursive_unit(cur_emb, child_h, child_exists) node_h = T.concatenate([node_h, parent_h.reshape([1, self.hidden_dim])]) return node_h[1:], parent_h dummy = theano.shared(self.init_vector([self.hidden_dim])) (_, parent_h), _ = theano.scan( fn=_recurrence, outputs_info=[leaf_h, dummy], sequences=[emb_x[num_leaves:], tree, T.arange(num_nodes)], n_steps=num_nodes) return T.concatenate([leaf_h, parent_h], axis=0)
def attend(self, y_p): updates = self.default_updates() for g in range(self.attrs['glimpse']): for i in range(len(self.base)-1,-1,-1): factor = T.constant(self.base[i].attrs['factor'][0], 'int32') if i > 0 else 1 B, C, I, H, W_att_in, b_att_in = self.get(y_p, i, g) if i == len(self.base) - 1: z_i = self.distance(C, H) else: length = T.cast(T.max(T.sum(I,axis=0))+1,'int32') ext = T.cast(T.minimum(ext/factor,T.min(length)),'int32') def pick(i_t, ext): pad = T.minimum(i_t+ext, B.shape[0]) - ext return T.concatenate([T.zeros((pad,), 'int8'), T.ones((ext,), 'int8'), T.zeros((B.shape[0]-pad-ext+1,), 'int8')], axis=0) idx, _ =, sequences = [pos/factor], non_sequences = [ext]) idx = (idx.dimshuffle(1,0)[:-1].flatten() > 0).nonzero() C = C.reshape((C.shape[0]*C.shape[1],C.shape[2]))[idx].reshape((ext,C.shape[1],C.shape[2])) z_i = self.distance(C, H) I = I.reshape((I.shape[0]*I.shape[1],))[idx].reshape((ext,I.shape[1])) if i > 0: pos = T.argmax(self.softmax(z_i,I),axis=0) * factor ext = factor else: w_i = self.softmax(z_i,I) B = B.reshape((B.shape[0]*B.shape[1],B.shape[2]))[idx].reshape((ext,B.shape[1],B.shape[2])) proto = T.sum(B * w_i.dimshuffle(0,1,'x').repeat(B.shape[2],axis=2),axis=0) for i in range(len(self.base)): self.glimpses[i].append(proto) return, self.custom_vars['W_att_in_0']), updates
def get_reward_sequences(self, env_state_sessions, agent_action_sessions): """ computes the rewards given to agent at each time step for each batch parameters: env_state_seq - environment state [batch_i,seq_i,state_units] history for all sessions agent_action_seq - int[batch_i,seq_i] returns: rewards float[batch_i,seq_i] - what reward was given to an agent for corresponding action from state in that batch """ env_state_sessions = check_list(env_state_sessions) n_states = len(env_state_sessions) agent_action_sessions = check_list(agent_action_sessions) n_actions = len(agent_action_sessions) def compute_reward(batch_i, *args): session_states, session_actions = unpack_list( args, [n_states, n_actions]) return self.get_reward(session_states, session_actions, batch_i) sequences = [ T.arange(agent_action_sessions[0].shape[0], ), ] + env_state_sessions + agent_action_sessions rewards, updates =, sequences=sequences) assert len(updates) == 0 return rewards.reshape( agent_action_sessions[0].shape) # reshape bach to original
def get_reward_sequences(self, env_state_sessions, agent_action_sessions): """Computes the rewards given to agent at each time step for each batch. :param env_state_sessions: Environment state [batch_i,seq_i,state_units] history for all sessions. :type env_state_sessions: theano tensor [batch_i,seq_i,state_units] :param agent_action_sessions: Actions chosen by agent at each tick for all sessions. :type agent_action_sessions: int[batch_i,seq_i] :return rewards: What reward was given to an agent for corresponding action from state in that batch. :rtype: float[batch_i,seq_i] """ env_state_sessions = check_list(env_state_sessions) n_states = len(env_state_sessions) agent_action_sessions = check_list(agent_action_sessions) n_actions = len(agent_action_sessions) def compute_reward(batch_i, *args): session_states, session_actions = unpack_list( args, [n_states, n_actions]) return self.get_reward(session_states, session_actions, batch_i) sequences = [ T.arange(agent_action_sessions[0].shape[0], ), ] + env_state_sessions + agent_action_sessions rewards, updates =, sequences=sequences) assert len(updates) == 0 return rewards.reshape( agent_action_sessions[0].shape) # reshape bach to original
def compute_objective_and_gradients(self, nSamp): hsamp = self.mrec.getSample(self.Y, nSamp) # evaluate the generative model density P_\theta(y_i , h_i) p_yh,_ =, sequences=hsamp) # evaluate the recognition model density Q_\phi(h_i | y_i) q_hgy,_ =, sequences=hsamp) ff = (p_yh-q_hgy) sortidx = ff.argsort(axis=0) fmax = ff[(sortidx[-1],T.arange(ff.shape[-1]))].dimshuffle('x',0) f_hy = T.exp(ff - fmax) sum_across_samples = f_hy.sum(axis=0, keepdims = True) Lhat = T.log(sum_across_samples/nSamp) + fmax col_idx = T.arange(ff.shape[-1]) # This 1e-12 constant is for debugging nans # in other parts of code. We know we'll get # nans where we'll then overwrite. Use it with # compute cross-validated estimates of Lhat # nanguard mode. hold_out_except_last = T.log((sum_across_samples - f_hy)/(nSamp-1)) + fmax #+1e-12) + fmax f2max_vec = ff[(sortidx[-2],T.arange(ff.shape[-1]))] f2max = f2max_vec.dimshuffle('x',0) # Do tricky things to keep the numerics in order (avoid a term being \approxeq 0) ff_nolast = T.set_subtensor(ff[(sortidx[-1],col_idx)], f2max_vec) f_hy_last = T.exp(ff_nolast - f2max) # compute held-out sum when we hold out the maximum element hold_out_last = T.log((f_hy_last.sum(axis=0, keepdims=True) - f_hy_last)/(nSamp-1)) + f2max # compute final held-out estimates hold_out = T.set_subtensor(hold_out_except_last[(sortidx[-1],col_idx)], hold_out_last[(sortidx[-1],col_idx)]) Lhat_cv = Lhat - hold_out the_ws = f_hy / sum_across_samples weighted_q = T.sum((Lhat_cv*q_hgy + the_ws*ff).mean(axis=1)) #weighted_q = T.sum((Lhat_cv*q_hgy + the_ws*(p_yh-q_hgy)).sum(axis=1)) # gradients for approximate posterior dqhgy = T.grad(cost=weighted_q, wrt = self.mrec.getParams(), consider_constant=([the_ws,Lhat_cv]+hsamp), disconnected_inputs='ignore') # gradients for prior dpyh = T.grad(cost=T.sum((the_ws*ff).mean(axis=1)), wrt = self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore') #dpyh = T.grad(cost=T.sum((the_ws*(p_yh-q_hgy)).sum(axis=1)), wrt = self.mprior.getParams(), consider_constant=hsamp + [the_ws], disconnected_inputs='ignore') return [Lhat.mean(), dpyh, dqhgy]
def _score_ao_tg(self, tag_ids, word_ids): output, _ =, sequences=[tag_ids, word_ids], non_sequences=[self._tg_lp_tag_np_table, self._tg_tag_emb, self._tg_word_emb], name="_score_ao_tg") return T.sum(output)
def hessian_diag1(f, v): g = gradient1(f, v) idx = tt.arange(g.shape[0], dtype='int32') def hess_ii(i): return gradient1(g[i], v)[i] return, idx)[0]
def call(self, inputs, mask=None): l1 = inputs[0] l2 = inputs[1] def f(i, l1, l2): return T.clip(T.batched_tensordot(l1[i], l2[i], 1), FLOAT_MIN, FLOAT_MAX).astype(FLOATX) return, T.arange(l1.shape[0]), non_sequences=[l1, l2])[0]
def __call__(self,X): #out = self.W[:,X] def step(x): return self.W[:x] stk = lambda x: self.W[x],X) out = T.stacklists(stk[0]) #return out.dimshuffle('x','x',0,1) return out
def sym_histograms(self, X): """ Encodes a set of objects (X is a tensor3) :param X: tensor3 containing the feature vectors for each object :return: """ histograms, updates =, X) return histograms
def __init__( self, rng, input, vocab_size, embed_dm, embeddings=None, ): """ input: theano.tensor.dmatrix, (number of instances, sentence word number) vocab_size: integer, the size of vocabulary, embed_dm: integer, the dimension of word vector representation embeddings: theano.tensor.TensorType pretrained embeddings """ if embeddings: print "Use pretrained embeddings: ON" assert embeddings.get_value().shape == ( vocab_size, embed_dm), "%r != %r" % (embeddings.get_value().shape, (vocab_size, embed_dm)) self.embeddings = embeddings else: print "Use pretrained embeddings: OFF" embedding_val = np.asarray(rng.normal(0, 0.05, size=(vocab_size, embed_dm)), dtype=theano.config.floatX) embedding_val[ vocab_size - 1, :] = 0 # the <PADDING> character is intialized to 0 self.embeddings = theano.shared(np.asarray( embedding_val, dtype=theano.config.floatX), borrow=True, name='embeddings') self.params = [self.embeddings] self.param_shapes = [(vocab_size, embed_dm)] # Return: # :type, theano.tensor.tensor4 # :param, dimension(1, 1, word embedding dimension, number of words in sentence) # made to be 4D to fit into the dimension of convolution operation sent_embedding_list, updates = lambda sent: self.embeddings[sent], input) sent_embedding_tensor = T.stacklists( sent_embedding_list) # make it into a 3D tensor self.output = sent_embedding_tensor.dimshuffle( 0, 'x', 2, 1) # make it a 4D tensor
def jacobian1(f, v): """jacobian of f wrt v""" f = tt.flatten(f) idx = tt.arange(f.shape[0], dtype='int32') def grad_i(i): return gradient1(f[i], v) return, idx)[0]
def _pv_function1(self, tensor_left, tensor_right, pf_matrix, vf_matrix): results, updates = fn = lambda i, tensor_left, tensor_right, pf_matrix, vf_matrix: self.get_new_p(tensor_left, tensor_right, i, pf_matrix, vf_matrix), sequences=[T.arange(tensor_left, tensor_right)], non_sequences = [tensor_left, tensor_right, pf_matrix, vf_matrix], name = 'pv function' ) max_pf, index = T.max_and_argmax(results, axis=0) return [index + tensor_left, max_pf, self.get_new_v(tensor_left, tensor_right, index + tensor_left, vf_matrix)]
def call(self, inputs, mask=None): def f(i, embedding, text_input): mask = T.neq(text_input[i], 0).astype(FLOATX) vec =, embedding[i]) vec /= T.maximum(vec.norm(2, 0), K.epsilon()) return, self.W) + self.b return, T.arange(inputs[0].shape[0]), non_sequences=inputs)[0]
def read(self, img, center_x, center_y): loc_x, loc_y = center_x, center_y if img.ndim == 2: img = self.matrix2tensor4(img) batch_size = img.shape[0] img_paded = self.padding_img(img) retina, _ =, sequences=[img_paded, loc_x, loc_y]) return retina.reshape((batch_size, self.get_dim('glimpse')))
def compute_cost_with_cross_entropy_in_parallel(original_rnn_outputs, labels, x_ends, y_ends): mask = T.log(1 - T.or_(T.eq(labels, T.zeros_like(labels)), T.eq(labels, shift_matrix(labels, 2)))) arange = T.arange(labels.shape[1]) initial_state = T.log(T.zeros_like(labels)) initial_state = T.set_subtensor(initial_state[:,0], 0) def select_probabilities(rnn_outputs, label): return rnn_outputs[:,label] rnn_outputs, _ =, [original_rnn_outputs, labels]) rnn_outputs = T.log(rnn_outputs.dimshuffle((1,0,2))) def forward_step(probabilities, last_probabilities): all_forward_probabilities = T.stack( last_probabilities + probabilities, log_shift_matrix(last_probabilities, 1) + probabilities, log_shift_matrix(last_probabilities, 2) + probabilities + mask, ) max_probability, backlink = T.max_and_argmax(all_forward_probabilities, 0) backlink = arange - backlink return max_probability, backlink results, _ = theano.scan(fn = forward_step, sequences = rnn_outputs, outputs_info = [initial_state, None]) forward_probabilities, backward_pointers = results def compute_cost(rnn_outputs, forward_probabilities, backward_pointers, x_end, y_end, label): def backward_step(backlinks, position): new_position = backlinks[position] return new_position, position initial_state = T.argmax(forward_probabilities[x_end-1,y_end-2:y_end]) + y_end - 2 results, _ = theano.scan(fn = backward_step, sequences = backward_pointers[0:x_end,:], outputs_info = [initial_state, None], go_backwards = True) alignment = label[results[1][::-1]] return aggregate(categorical_crossentropy(rnn_outputs[0:x_end], alignment), mode='sum') forward_probabilities = forward_probabilities.dimshuffle((1,0,2)) backward_pointers = backward_pointers.dimshuffle((1,0,2)) return, [original_rnn_outputs, forward_probabilities, backward_pointers, x_ends, y_ends, labels])[0]
def __init__(self, rng, input, vocab_size, embed_dm, embeddings = None, ): """ input: theano.tensor.dmatrix, (number of instances, sentence word number) vocab_size: integer, the size of vocabulary, embed_dm: integer, the dimension of word vector representation embeddings: theano.tensor.TensorType pretrained embeddings """ if embeddings: print "Use pretrained embeddings: ON" assert embeddings.get_value().shape == (vocab_size, embed_dm), "%r != %r" %( embeddings.get_value().shape, (vocab_size, embed_dm) ) self.embeddings = embeddings else: print "Use pretrained embeddings: OFF" embedding_val = np.asarray( rng.normal(0, 0.05, size = (vocab_size, embed_dm)), dtype = theano.config.floatX ) embedding_val[vocab_size-1,:] = 0 # the <PADDING> character is intialized to 0 self.embeddings = theano.shared( np.asarray(embedding_val, dtype = theano.config.floatX), borrow = True, name = 'embeddings' ) self.params = [self.embeddings] self.param_shapes = [(vocab_size, embed_dm)] # Return: # :type, theano.tensor.tensor4 # :param, dimension(1, 1, word embedding dimension, number of words in sentence) # made to be 4D to fit into the dimension of convolution operation sent_embedding_list, updates = sent: self.embeddings[sent], input) sent_embedding_tensor = T.stacklists(sent_embedding_list) # make it into a 3D tensor self.output = sent_embedding_tensor.dimshuffle(0, 'x', 2, 1) # make it a 4D tensor
def pv_function(self, tensor_input): indexf_matrix = theano.shared( np.ones( (self.max_length, self.max_length), dtype=theano.config.floatX ), name = 'indexf_matrix', borrow=True ) pf_matrix = theano.shared( np.eye( self.max_length, dtype=theano.config.floatX ), name = 'pf_matrix', borrow=True ) vf_matrix = theano.shared( np.zeros( (self.max_length, self.max_length, self.size), dtype=theano.config.floatX ), name = 'vf_matrix', borrow=True ) results, updates = theano.reduce( fn = lambda i, t_vf_matrix, L, t_tensor_input: T.set_subtensor(t_vf_matrix[i, i], L[t_tensor_input[i]]), outputs_info = vf_matrix, sequences=[T.arange(tensor_input.shape[0])], non_sequences=[self.L, tensor_input], name = 'vf_matrix prepare' ) vf_matrix = results for i in range(1,self.max_length): ''' for j in range(self.max_length-i): new_index, new_pf, new_vf = self._pv_function1(j, j+i, pf_matrix, vf_matrix) indexf_matrix = T.set_subtensor(indexf_matrix[j, j+i], new_index) pf_matrix = T.set_subtensor(pf_matrix[j, j+i], new_pf) vf_matrix = T.set_subtensor(vf_matrix[j, j+i], new_vf) ''' results, updates = fn = lambda j, pf_matrix, vf_matrix, i: self._pv_function1(j, j+i, pf_matrix, vf_matrix), sequences=[T.arange(self.max_length-i)], non_sequences = [pf_matrix, vf_matrix, i], #name = 'pv function' ) for j in range(self.max_length-i): indexf_matrix = T.set_subtensor(indexf_matrix[j, j+i], results[0][j]) pf_matrix = T.set_subtensor(pf_matrix[j, j+i], results[1][j]) vf_matrix = T.set_subtensor(vf_matrix[j, j+i], results[2][j]) return indexf_matrix, pf_matrix, vf_matrix
def make_gradlogps(mdp,agent): o = TT.matrix("o",mdp.output_dtype("o")) b = TT.matrix("b",agent.output_dtype("b")) newa = agent.ponder({"o":o})["a"] logp_n = agent.cpd().logliks(newa, b) def onegrad(i): logp1 = theano.clone(logp_n, replace = {b:b[i:i+1],o:o[i:i+1]})[0] return symbolic.flatten(TT.grad(logp1, agent.policy_vars())) gradlogps,_ =, TT.arange(logp_n.shape[0])) Glf.ftheano_gradlogp = theano.function([o,b],gradlogps) Glf.f_gradlogp = staticmethod(lambda : G.pool.gather(gradlogpmapper,np.concatenate,None))
def order0_ll_score_given_word_only(tg_word_id, tg_lp_tag_np_table, tg_tag_emb, tg_word_emb,num_tag): # Return the total return log_sum_exp(, sequences=[T.arange(num_tag)], non_sequences=[tg_word_id, tg_lp_tag_np_table, tg_tag_emb, tg_word_emb], name="order0_ll_score_map")[0])
def _pv_function(self, tensor_left, length, pf_matrix, vf_matrix): tensor_right = tensor_left + length results, updates = fn = self.get_new_p, sequences=[T.arange(tensor_left, tensor_right)], non_sequences = [tensor_left, tensor_right, pf_matrix, vf_matrix], name = 'pv function' ) max_pf, index = T.max_and_argmax(results, axis=0) max_vf = self.get_new_v(tensor_left, tensor_right, index + tensor_left, vf_matrix) return [index + tensor_left, max_pf, max_vf]