def interactive_generate(self, initial_text, generation_length, *args): vocab = get_character(self.dataset) initial_code = [] for char in initial_text: initial_code += [np.where(vocab == char)[0]] initial_code = np.array(initial_code) inputs_ = initial_code all_output_probabilities = [] logger.info("\nGeneration:") for i in range(generation_length): # time x batch x features (1 x 1 x vocab_size) last_output = self.generate(inputs_)[-1][-1:, :, :] # time x features (1 x vocab_size) '0' is for removing one dim last_output_probabilities = softmax(last_output[0]) all_output_probabilities += [last_output_probabilities] # 1 x 1 if self.softmax_sampling == 'argmax': argmax = True else: argmax = False last_output_sample = sample(last_output_probabilities, argmax) inputs_ = np.vstack([inputs_, last_output_sample]) # time x batch whole_sentence_code = inputs_ # whole_sentence whole_sentence = '' for char in vocab[whole_sentence_code[:, 0]]: whole_sentence += char logger.info(whole_sentence[:initial_code.shape[0]] + ' ...') logger.info(whole_sentence)
def visualize_gradients_flow_pie(hidden_states, updates, args, text='[done]. Finally'): unfolding_length = len(text) variables = ComputationGraph(hidden_states).variables if args.rnn_type == 'lstm': rnn_type = 'lstm' elif args.rnn_type == 'simple': rnn_type = 'simplerecurrent' else: raise NotImplemented states = [] for d in range(args.layers): states.append([variable for variable in variables if variable.name == (rnn_type + '_' + str(d) + '_apply_states')][1]) # [1] is because there are two '*_apply_states' in variables. pre_rnns = [variable for variable in variables if ((variable.name is not None) and ('pre_rnn' in variable.name))] grads = [] for i in range(unfolding_length): grads.append(tensor.sum(tensor.abs_(tensor.grad( tensor.mean(tensor.abs_(pre_rnns[0][i])), pre_rnns[0:1])), axis=0)) for layer, state in enumerate(states): grads.append(tensor.sum(tensor.abs_(tensor.grad( tensor.mean(tensor.abs_(state[i])), pre_rnns[0:layer + 1])), axis=0)) # Handle the theano shared variables for the state state_vars = [theano.shared( v[0:1, :].zeros_like().eval(), v.name + '-gen') for v, _ in updates] givens = [(v, x) for (v, _), x in zip(updates, state_vars)] f_updates = [(x, upd) for x, (_, upd) in zip(state_vars, updates)] # Compile the function logger.info("The compilation of the function has started") compiled_functions = [theano.function( inputs=ComputationGraph(grad).inputs, outputs=grad, givens=givens, updates=f_updates, mode=Mode(optimizer=None)) for grad in grads] logger.info("The function has been compiled") # input text vocab = get_character(args.dataset) code = [] for char in text: code += [np.where(vocab == char)[0]] code = np.array(code) res = [f(code) for f in compiled_functions] all_time_steps = [] for i in range(unfolding_length): temp = [] for d in range(args.layers + 1): temp.append(np.sum(np.abs(res[i * (args.layers + 1) + d]), axis=(1, 2))) all_values = np.vstack([layer / np.sum(layer, axis=0) for layer in temp]) all_time_steps += [all_values.T[:, ::-1]] # +1 is to show inputs as well plot_pie_charts(data=all_time_steps, layers=args.layers + 1, time_steps=unfolding_length, path=args.save_path + '/pie.png', text=text)
def visualize_gradients_flow_pie(hidden_states, updates, args, text='[done]. Finally'): unfolding_length = len(text) variables = ComputationGraph(hidden_states).variables if args.rnn_type == 'lstm': rnn_type = 'lstm' elif args.rnn_type == 'simple': rnn_type = 'simplerecurrent' else: raise NotImplemented states = [] for d in range(args.layers): states.append([ variable for variable in variables if variable.name == (rnn_type + '_' + str(d) + '_apply_states') ][1]) # [1] is because there are two '*_apply_states' in variables. pre_rnns = [ variable for variable in variables if ((variable.name is not None) and ('pre_rnn' in variable.name)) ] grads = [] for i in range(unfolding_length): grads.append( tensor.sum(tensor.abs_( tensor.grad(tensor.mean(tensor.abs_(pre_rnns[0][i])), pre_rnns[0:1])), axis=0)) for layer, state in enumerate(states): grads.append( tensor.sum(tensor.abs_( tensor.grad(tensor.mean(tensor.abs_(state[i])), pre_rnns[0:layer + 1])), axis=0)) # Handle the theano shared variables for the state state_vars = [ theano.shared(v[0:1, :].zeros_like().eval(), v.name + '-gen') for v, _ in updates ] givens = [(v, x) for (v, _), x in zip(updates, state_vars)] f_updates = [(x, upd) for x, (_, upd) in zip(state_vars, updates)] # Compile the function logger.info("The compilation of the function has started") compiled_functions = [ theano.function(inputs=ComputationGraph(grad).inputs, outputs=grad, givens=givens, updates=f_updates, mode=Mode(optimizer=None)) for grad in grads ] logger.info("The function has been compiled") # input text vocab = get_character(args.dataset) code = [] for char in text: code += [np.where(vocab == char)[0]] code = np.array(code) res = [f(code) for f in compiled_functions] all_time_steps = [] for i in range(unfolding_length): temp = [] for d in range(args.layers + 1): temp.append( np.sum(np.abs(res[i * (args.layers + 1) + d]), axis=(1, 2))) all_values = np.vstack( [layer / np.sum(layer, axis=0) for layer in temp]) all_time_steps += [all_values.T[:, ::-1]] # +1 is to show inputs as well plot_pie_charts(data=all_time_steps, layers=args.layers + 1, time_steps=unfolding_length, path=args.save_path + '/pie.png', text=text)