def _beam_search_one_step(_step_score, _state, output_score, number_of_samples, beam_size, state_dim, output_score_list, prev_output_index_list, output_label_id_list, embedding, _tensors_to_debug=None): output_dim = K.shape(_step_score)[1] # nb_samples*beam_size, output_dim # accumulate score _score = K.expand_dims(output_score) + K.log(_step_score) # nb_samples*beam_size, output_dim # select top output labels for each sample _score = K.reshape(_score, shape=K.pack([number_of_samples, beam_size * output_dim ])) # nb_samples, beam_size* output_dim _top_score , _top_indice = top_k (_score, beam_size) # -1, beam_size # update accumulated output score output_score_list.append (_top_score) output_score = K.reshape(_top_score, shape=(-1,)) # nb_samples * beam_size # update output label and previous output index # _top_indice = beam_id * output_dim + output_label_id prev_output_index = _top_indice // output_dim prev_output_index_list.append(prev_output_index) output_label_id = _top_indice - prev_output_index * output_dim output_label_id_list.append (output_label_id) # update current input and current_state current_input = embedding (K.reshape(output_label_id, shape=(-1,))) # nb_samples* beam_siz, input_dim # _state : nb_samples*beam_size, state_dim # first reshape _state to nb_samples, beam_size, state_dim # then gather by sample to get a tensor with the shape: nb_samples, beam_size, state_dim # finally reshape to nb_samples*beam_size, state_dim # note that prev_output_index has a shape of -1, beam_size, so should be reshape to nb_samples, beam_size before calling gather_by_sample current_state = K.reshape (gather_by_sample(K.reshape(_state, shape=K.pack([number_of_samples , beam_size , state_dim ])), K.reshape(prev_output_index, shape=K.pack([number_of_samples, beam_size]))), shape=K.pack([number_of_samples * beam_size , state_dim ])) if _tensors_to_debug is not None: _tensors_to_debug += [_score, _top_score, _top_indice] return output_score, current_input, current_state
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs
def A_network_output(x): # The input of this layer is [L, mu, a] in concatenated form. We first split # those up. idx = 0 L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) / 2] idx += (self.nb_actions * self.nb_actions + self.nb_actions) / 2 mu = x[:, idx:idx + self.nb_actions] idx += self.nb_actions a = x[:, idx:idx + self.nb_actions] idx += self.nb_actions # Create L and L^T matrix, which we use to construct the positive-definite matrix P. Ls = [] LTs = [] for idx in xrange(self.batch_size): L = K.zeros((self.nb_actions, self.nb_actions)) L = T.set_subtensor(L[np.tril_indices(self.nb_actions)], L_flat[idx, :]) diag = K.exp(T.diag(L)) L = T.set_subtensor(L[np.diag_indices(self.nb_actions)], diag) Ls.append(L) LTs.append(K.transpose(L)) # TODO: diagonal elements exp L = K.pack(Ls) LT = K.pack(LTs) P = K.batch_dot(L, LT, axes=(1, 2)) assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). A = -.5 * K.batch_dot(K.batch_dot(a - mu, P, axes=(1, 2)), a - mu, axes=1) assert K.ndim(A) == 2 return A
def A_network_output(x): # The input of this layer is [L, mu, a] in concatenated form. We first split # those up. idx = 0 L_flat = x[:, idx:idx + (self.nb_actions * self.nb_actions + self.nb_actions) / 2] idx += (self.nb_actions * self.nb_actions + self.nb_actions) / 2 mu = x[:, idx:idx + self.nb_actions] idx += self.nb_actions a = x[:, idx:idx + self.nb_actions] idx += self.nb_actions # Create L and L^T matrix, which we use to construct the positive-definite matrix P. Ls = [] LTs = [] for idx in xrange(self.batch_size): L = K.zeros((self.nb_actions, self.nb_actions)) L = T.set_subtensor(L[np.tril_indices(self.nb_actions)], L_flat[idx, :]) diag = K.exp(T.diag(L)) L = T.set_subtensor(L[np.diag_indices(self.nb_actions)], diag) Ls.append(L) LTs.append(K.transpose(L)) # TODO: diagonal elements exp L = K.pack(Ls) LT = K.pack(LTs) P = K.batch_dot(L, LT, axes=(1, 2)) assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). A = -.5 * K.batch_dot(K.batch_dot(a - mu, P, axes=(1, 2)), a - mu, axes=1) assert K.ndim(A) == 2 return A
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs
def beam_search(initial_input, initial_state, constant_context, embedding, step_func, beam_size=1, max_length=20): '''Returns a lattice with time steps = max_length and beam size = beam_size; each node of the lattice at time step t has a parent node at time step t-1, an accumulated score, and a label as its output. # Parameters ---------- initial_input : a tensor with a shape of nb_samples, representing the initial input used by the step function initial_state: a tensor with a shape of nb_samples,state_dim, representing the initial state used by the step function constant_context: a tensor with a shape of nb_samples,context_dim, representing the context tensor used by the step function embedding: an embedding layer that maps input/output labels to their embedding step_func: in a form like step_func(current_input, current_state, constant_context), which returns a score tensor and a tensor representing the updated state beam_size: beam size max_length: max time steps to expand # Returns ------ output_label_id_tensor: a tensor with a shape of max_length, nb_samples, beam_size of type int32, representing labels of nodes prev_output_index_tensor: a tensor with a shape of max_length, nb_samples, beam_size of type int32, representing parent's indexes (in the range of 0..beam_size-1) of nodes output_score_tensor: a tensor with a shape of max_length, nb_samples, beam_size of type float32, representing accumulated scores of nodes ''' number_of_samples = K.shape(initial_input)[0] state_dim = K.shape(initial_state)[K.ndim(initial_state) - 1] current_input = repeat(initial_input, beam_size) # shape: nb_samples*beam_size, input_dim current_state = repeat(initial_state, beam_size) # shape: nb_samples*beam_size, state_dim constant_context = repeat(constant_context, beam_size) # shape: nb_samples*beam_size,context_input_dim output_score = K.sum(K.zeros_like(current_state), -1) # shape: nb_samples*beam_size output_score_list = [] # nb_samples, beam_size output_label_id_list = [] prev_output_index_list = [] # the index of candidate from which current label id is generated for _ in xrange(max_length): _step_score, _state = step_func(current_input, current_state, constant_context) # nb_samples*beam_size , output_dim output_score, current_input, current_state = _beam_search_one_step(_step_score, _state, output_score, number_of_samples, beam_size, state_dim, output_score_list, prev_output_index_list, output_label_id_list, embedding) # returning a list instead of a tuple of tensors so that keras will know multiple output tensors are generated return [K.pack(output_label_id_list), K.pack(prev_output_index_list), K.pack(output_score_list)]
def repeat(x, n): x_shape = K.shape(x) x_ndim = K.ndim(x) # to 1D tensor x_tiled = K.tile(K.reshape(x, (-1,)), n) # re-shape to (n,...) x_tiled_shape = K.pack([n] + [x_shape[i] for i in range(x_ndim)]) output = K.reshape(x_tiled, x_tiled_shape) pattern = [1, 0] + [i + 1 for i in range(1, x_ndim)] output = K.permute_dimensions(output, pattern) output_shape = K.pack([n * x_shape[0]] + [x_shape[i] for i in range(1, x_ndim)]) return K.reshape(output, output_shape)
def get_initial_states(self, x): M = K.zeros_like(x[:, 0, 0]) # (nb_samples,) M = K.pack([M] * self.nb_slots) # (nb_slots, nb_samples) M = K.pack([M] * self.memory_size) # (memory_size, nb_slots, nb_samples) M = K.permute_dimensions(M, (2, 1, 0)) # (nb_samples, nb_slots, memory_size) h = K.zeros_like(x[:, 0, 0]) # (nb_samples,) h = K.pack([h] * self.memory_size) # (memory_size, nb_samples) h = K.permute_dimensions(h, (1, 0)) # (nb_samples, memory_size) w = K.zeros_like(x[:, 0, 0]) # (nb_samples,) w = K.pack([w] * self.nb_slots) # (nb_slots, nb_samples) w = K.permute_dimensions(w, (1, 0)) # (nb_samples, nb_slots) states = [M, h, w] return states
def get_initial_states(self, x): M = K.zeros_like(x[:, 0, 0]) # (nb_samples,) M = K.pack([M] * self.nb_slots) # (nb_slots, nb_samples) M = K.pack([M] * self.memory_size) # (memory_size, nb_slots, nb_samples) M = K.permute_dimensions( M, (2, 1, 0)) # (nb_samples, nb_slots, memory_size) h = K.zeros_like(x[:, 0, 0]) # (nb_samples,) h = K.pack([h] * self.memory_size) # (memory_size, nb_samples) h = K.permute_dimensions(h, (1, 0)) # (nb_samples, memory_size) w = K.zeros_like(x[:, 0, 0]) # (nb_samples,) w = K.pack([w] * self.nb_slots) # (nb_slots, nb_samples) w = K.permute_dimensions(w, (1, 0)) # (nb_samples, nb_slots) states = [M, h, w] return states
def step(self, x, states): states = list(states) state_index = 0 if self.decode: x = states[0] _x = x states = states[1:] for i in range(len(self.model.layers)): layer = self.model.layers[i] if self.readout and i == 0: if self.readout in ['add', True]: x += states[-1] elif self.readout == 'mul': x *= states[-1] elif self.readout == 'pack': x = K.pack([x, states[-1]]) elif self.readout == 'readout_only': x = states[-1] if _isRNN(layer): if self.state_sync: x, new_states = layer._step(x, states[:len(layer.states)]) states[:len(layer.states)] = new_states else: x, new_states = layer._step(x, states[state_index : state_index + len(layer.states)]) states[state_index : state_index + len(layer.states)] = new_states state_index += len(layer.states) else: x = layer.call(x) if self.decode: states = [_x] + states if self.readout: states[-1] = x return x, states
def test_get_k_best_from_lattice(self): nb_samples = 2 beam_size = 3 time_steps = 2 _tensors_to_debug = [] output_label_id_list = [ K.placeholder(shape=(nb_samples, beam_size), dtype='int32') for _ in range(time_steps) ] prev_output_index_list = [ K.placeholder(shape=(nb_samples, beam_size), dtype='int32') for _ in range(time_steps) ] output_score_list = [ K.placeholder(shape=(nb_samples, beam_size)) for _ in range(time_steps) ] lattice = (K.pack(output_label_id_list), K.pack(prev_output_index_list), K.pack(output_score_list)) output, output_score = get_k_best_from_lattice( lattice, k=2, eos=-1, _tensors_to_debug=_tensors_to_debug) f = K.function(inputs=output_label_id_list + prev_output_index_list + output_score_list, outputs=[output, output_score] + _tensors_to_debug) output_label_id_list_val = [[[3, 2, 1], [1, 3, -1]], [[2, 1, 3], [3, -1, -1]]] prev_output_index_list_val = [[[0, 0, 0], [0, 0, 0]], [[0, 1, 2], [2, 2, 1]]] output_score_list_val = [[[-0.1, -0.2, -0.3], [-0.25, -0.36, -0.45]], [[-0.6, -0.5, -0.7], [-0.9, -1.2, -0.75]]] output_0 = [[2, 1], [3, 2]] output_1 = [[-1, 3], [3, -1]] output_val_ref = [output_0, output_1] # nb_samples, k, time_steps output_score_val_ref = [[-0.5, -0.6], [-0.45, -0.75]] outputs_val = f(output_label_id_list_val + prev_output_index_list_val + output_score_list_val) output_val, output_score_val = outputs_val[:2] self.assertTrue( np.sum(np.abs(output_score_val - output_score_val_ref)) < 0.001, "output_score_val") self.assertTrue(np.array_equal(output_val, output_val_ref), "output_val")
def dot(x, y): '''Multiplies 2 tensors. When attempting to multiply a ND tensor with a ND tensor, reproduces the Theano behavior (e.g. (2, 3).(4, 3, 5) = (2, 4, 5)) ''' ndim_x = K.ndim(x) ndim_y = K.ndim(y) if ndim_x is not None and ndim_x > 2 or ndim_y > 2: x_shape = tf.shape(x) y_shape = tf.shape(y) y_permute_dim = list(range(ndim_y)) y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim xt = tf.reshape(x, K.pack([-1, x_shape[ndim_x - 1]])) yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), K.pack([y_shape[ndim_y - 2], -1])) target_shape = [x_shape[i] for i in range(ndim_x - 1)] + [y_shape[i] for i in range(ndim_y - 2)] + [y_shape[ndim_y - 1]] return tf.reshape(tf.matmul(xt, yt), K.pack(target_shape)) out = tf.matmul(x, y) return out
def accumulate(attend_function, inputs, input_length, go_backwards=False, mask=None): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1, 0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] if mask is not None: if mask.ndim == ndim - 1: mask = expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] for i in indices: if mask is not None: output = attend_function( inputs[:i + 1], mask[:i + 1] ) # this should not output the time dimension; it should be marginalized over. else: output = attend_function( inputs[:i + 1] ) # this should not output the time dimension; it should be marginalized over. assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) # current assumption. modify if that changes. axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) return outputs
def step(self, x, states): states = list(states) state_index = 0 if self.decode: x = states[0] _x = x states = states[1:] for i in range(len(self.model.layers)): layer = self.model.layers[i] if self.readout and ( (i == 0 and self.readout != 'call') or (self.readout == 'call' and hasattr(layer, 'receive_readout') and layer.receive_readout)): readout = states[-1] if self._truth_tensor is not None: slices = [ slice(None), states[-2][0] - K.switch(states[-2][0], 1, 0) ] + [slice(None)] * (K.ndim(self._truth_tensor) - 2) readout = K.in_train_phase( K.switch(states[-2][0], self._truth_tensor[slices], readout), readout) if self.readout in ['add', True]: x += readout elif self.readout == 'mul': x *= readout elif self.readout == 'pack': x = K.pack([x, readout]) elif self.readout == 'readout_only': x = readout elif self.readout == 'call': x = [x, readout] if _isRNN(layer): if self.state_sync: x, new_states = layer._step(x, states[:len(layer.states)]) states[:len(layer.states)] = new_states else: x, new_states = layer._step( x, states[state_index:state_index + len(layer.states)]) states[state_index:state_index + len(layer.states)] = new_states state_index += len(layer.states) else: x = layer.call(x) if self.decode: states = [_x] + states if self.readout: if self._truth_tensor is not None: states[-2] += 1 states[-1] = x return x, states
def call(self, x, mask=None): input_vector = x[0] target_classes = x[1] nb_req_classes = self.input_spec[1].shape[1] if nb_req_classes is None: nb_req_classes = K.shape(target_classes) if K.dtype(target_classes) != 'int32': target_classes = K.cast(target_classes, 'int32') if self.mode == 0: # One giant matrix mul input_dim = self.input_spec[0].shape[1] nb_req_classes = self.input_spec[1].shape[1] path_lengths = map(len, self.paths) huffman_codes = K.variable(np.array(self.huffman_codes)) req_nodes = K.gather(self.class_path_map, target_classes) req_W = K.gather(self.W, req_nodes) y = K.batch_dot(input_vector, req_W, axes=(1, 3)) if self.bias: req_b = K.gather(self.b, req_nodes) y += req_b y = K.sigmoid(y[:, :, :, 0]) req_huffman_codes = K.gather(huffman_codes, target_classes) return K.prod(req_huffman_codes + y - 2 * req_huffman_codes * y, axis=-1) # Thug life elif self.mode == 1: # Many tiny matrix muls probs = [] for i in range(len(self.paths)): huffman_code = self.huffman_codes[i] path = self.paths[i] prob = 1. for j in range(len(path)): node = path[j] node_index = self.node_indices[node] p = K.dot(input_vector, self.W[node_index, :, :])[:, 0] if self.bias: p += self.b[node_index, :][0] h = huffman_code[j] p = K.sigmoid(p) prob *= h + p - 2 * p * h probs += [prob] probs = K.pack(probs) req_probs = K.gather(probs, target_classes) req_probs = K.permute_dimensions(req_probs, (0, 2, 1)) req_probs = K.reshape(req_probs, (-1, nb_req_classes)) batch_size = K.shape(input_vector)[0] indices = arange(batch_size * batch_size, batch_size + 1) req_probs = K.gather(req_probs, indices) return req_probs
def get_k_best_from_lattice(lattice, k=1, eos=None, _tensors_to_debug=None): '''Selects top k best path from a lattice in a descending order by their scores # Parameters ---------- lattice : a triple consisting of output_label_id_tensor, prev_output_index_tensor and output_score_tensor. This lattice is generated by calling beam_search. k: the number of path to select from that lattice eos: if not None, it is the id of the label that represents the end of sequence # Returns ------ sequence: a tensor of type int32 with a shape of nb_samples, k, time_stpes, representing the top-k best sequences sequence_score: a tensor of type float32 with a shape of nb_samples, k, representing the scores of the top-k best sequences ''' lattice = [unpack(_) for _ in lattice] for l in lattice: l.reverse() output_label_id_list, prev_output_index_list, output_score_list = lattice sequence_score, output_indice = top_k (output_score_list[0], k) # shape: nb_samples,k if _tensors_to_debug is not None: _tensors_to_debug.append(sequence_score) _tensors_to_debug.append(output_indice) nb_samples = K.shape(sequence_score)[0] # fill sequence and update sequence_score sequence = [] for cur_output_score, output_label_id, prev_output_index in zip(output_score_list, output_label_id_list, prev_output_index_list): sequence_score_candidate = K.reshape(gather_by_sample(cur_output_score, output_indice), shape=K.pack([nb_samples, k])) sequence.append (K.reshape(gather_by_sample(output_label_id, output_indice), shape=K.pack([nb_samples, k]))) # shape: -1, k, nb_samples could be -1 if eos is not None and len(sequence) > 1: cond = K.equal(sequence[-1], eos) sequence_score = choose_by_cond(cond, sequence_score_candidate, sequence_score) if _tensors_to_debug is not None: _tensors_to_debug.append(cond) _tensors_to_debug.append(sequence_score_candidate) _tensors_to_debug.append(sequence_score) output_indice = gather_by_sample(prev_output_index, output_indice) if _tensors_to_debug is not None: _tensors_to_debug.append(output_indice) if eos is not None and len(sequence) > 1: sequence_score, output_indice = top_k(sequence_score, k) sequence = [gather_by_sample(_, output_indice) for _ in sequence] # reverse the sequence so we get sequence from time step 0, 1, ..., sequence.reverse() sequence = K.permute_dimensions(K.pack(sequence), (1, 2, 0)) # time_steps, nb_samples, k -> nb_samples, k, time_steps return sequence, sequence_score
def gather_by_sample(x, indices): '''Performs gather operation along the first dimension, i.e., ret[i] = gather( x[i], indices[i]). For example, when x is a matrix, and indices is a vector, it selects one element for each row from x. Note that this is different from gather, which selects |indices| ndim-1 sub tensors (i.e., x[i], where i = indices[:::]) from x # Parameters ---------- x : a tensor with a shape nb_samples, ...; its number of dimensions >= 2 indices : a tensor of type int with a shape nb_sample,...; its number of dimensions <= # of dimensions of x - 1 # Returns ------ a tensor with the shape of nb_samples, ..., where ret[i,:::,:::]= x[i,indices[i,:::],:::]; and its number of dimensions = # dimensions of x + # dimension of indices - 2 ''' y_list = [] for x_i , i in zip(unpack(x), unpack(indices)): y_i = K.gather(x_i, i) y_list.append(y_i) return K.pack(y_list)
def gather_by_sample(x, indices): '''Performs gather operation along the first dimension, i.e., ret[i] = gather( x[i], indices[i]). For example, when x is a matrix, and indices is a vector, it selects one element for each row from x. Note that this is different from gather, which selects |indices| ndim-1 sub tensors (i.e., x[i], where i = indices[:::]) from x # Parameters ---------- x : a tensor with a shape nb_samples, ...; its number of dimensions >= 2 indices : a tensor of type int with a shape nb_sample,...; its number of dimensions <= # of dimensions of x - 1 # Returns ------ a tensor with the shape of nb_samples, ..., where ret[i,:::,:::]= x[i,indices[i,:::],:::]; and its number of dimensions = # dimensions of x + # dimension of indices - 2 ''' x_shape = K.shape(x) nb_samples = x_shape[0] ones = tf.ones(shape=K.pack([nb_samples]), dtype='int32') elems = tf.scan(lambda prev, one: prev + one , ones, initializer=tf.constant(-1, dtype='int32')) def _step(prev, i): x_i = K.gather(x, i) indices_i = K.gather(indices, i) return K.gather(x_i, indices_i) return tf.scan(_step , elems, initializer=tf.zeros(shape=x_shape[1:], dtype=x.dtype))
def reverse(x): x_list = tf.unpack(x) x_list.reverse() return K.pack(x_list)
def unroll_scan(fn, sequences=None, initial_values=None, non_sequences=None, n_steps=None, batch=False): """Limited reimplementation of theano.scan() by unrolling. Based on unroll_scan() from Lasagne. """ sequences = _to_list(sequences) initial_values = _to_list(initial_values) non_sequences = _to_list(non_sequences) sequential_outputs = [] previous = initial_values for i in range(n_steps): if not batch: args = [s[i] for s in sequences] else: args = [s[:, i] for s in sequences] args += previous + non_sequences outputs = _to_list(fn(*args)) sequential_outputs.append(outputs) previous = outputs # Output formatting. sequential_ouputs is now a list of lists, the # outer containing an item for each of the steps (n_steps in # total) and each of the inner containing the outputs of the step # function fn, i.e. # # [ [ step_1_out_1 step_1_out_2 ... step_1_out_o ] # [ step_2_out_1 step_2_out_2 ... step_2_out_o ] # ... # [ step_n_out_1 step_n_out_2 ... step_n_out_o ] ] # # these must be reorganized into the theano.scan() order # # [ [ step_1_out_1 step_2_out_1 ... step_n_out_1 ] # [ step_1_out_2 step_2_out_2 ... step_n_out_2 ] # .... # [ step_1_out_n step_2_out_2 ... step_n_out_o ] ] # # i.e from (n_steps, n_outputs) to (n_outputs, n_steps). Also, # the various step values for each output should be packed into # a tensor (instead of a list), giving [ out_1_steps, out_2_steps, # ... out_o_steps ]. # # Then, if run in batch mode, each of the output tensors will have # shape (n_steps, batch_size, ...), which should be permuted into # (batch_size, n_steps, ...). # # Finally, following the model of theano.scan(), if there is only # a single output, return the corresponding tensor t instead of # a list [t] with a single elements, and if there are no outputs, # return None instead of an empty list. # Reorganize and pack output_sequences = [] n_outputs = len(sequential_outputs[0]) for o in range(n_outputs): outs = [s[o] for s in sequential_outputs] output_sequences.append(K.pack(outs)) # Permute if batchwise if batch: for o, s in enumerate(output_sequences): dim_indices = range(K.ndim(s)) # [0, 1, ...] pattern = [1, 0] + dim_indices[2:] # [1, 0, ...] output_sequences[o] = K.permute_dimensions(s, pattern) # Remove list wrapping if len(output_sequences) == 0: output = None elif len(output_sequences) == 1: output = output_sequences[0] else: output = output_sequences return output, None # None for updates dummy