def do_apply(self, **kwargs): """Process a sequence attending the attended context at every step. Parameters ---------- **kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of Theano variables The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) current_glimpses = self.take_look( mask=attended_mask, return_dict=True, **dict_union( states, glimpses, {self.attended_name: attended, self.preprocessed_attended_name: preprocessed_attended})) current_states = self.compute_states( return_list=True, **dict_union(sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of Theano variables The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) current_glimpses = self.take_look( mask=attended_mask, return_dict=True, **dict_union( states, glimpses, {self.attended_name: attended, self.preprocessed_attended_name: preprocessed_attended})) current_states = self.compute_states( return_list=True, **dict_union(sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def generate(self, outputs, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = {name: kwargs[name] for name in self.state_names} contexts = {name: kwargs[name] for name in self.context_names} glimpses = {name: kwargs[name] for name in self.glimpse_names} next_glimpses = self.transition.take_glimpses( return_dict=True, **dict_union(states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts)) next_outputs = self.readout.emit(next_readouts) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, return_dict=True) if self.fork else {'feedback': next_feedback}) next_states = self.transition.compute_states( return_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) return (next_states + [next_outputs] + list(next_glimpses.values()) + [next_costs])
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Combines an application of the `distribute` that alter the sequential inputs of the wrapped transition and an application of the wrapped transition. All unknown keyword arguments go to the wrapped transition. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition the current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ # Masks are not mandatory, that's why 'must_have=False' sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) if self.add_contexts: kwargs.pop(self.attended_name) kwargs.pop(self.attended_mask_name) sequences.update(self.distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, glimpses), self.distribute.apply.inputs))) current_states = self.transition.apply( iterate=False, as_list=True, **dict_union(sequences, kwargs)) return current_states
def generate(self, outputs, **kwargs): """A sequence generation step. Parameters ---------- outputs : Theano variable The outputs from the previous step. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = {name: kwargs[name] for name in self.state_names} contexts = {name: kwargs[name] for name in self.context_names} glimpses = {name: kwargs[name] for name in self.glimpse_names} next_glimpses = self.transition.take_look( return_dict=True, **dict_union(states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts)) next_outputs = self.readout.emit(next_readouts) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, return_dict=True) if self.fork else {'feedback': next_feedback}) next_states = self.transition.compute_states( return_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) return (next_states + [next_outputs] + list(next_glimpses.values()) + [next_costs])
def costs(self, application_call, prediction, prediction_mask=None, groundtruth=None, groundtruth_mask=None, **sequences_states_contexts): feedback = self.feedback.apply(prediction, as_dict=True) states_outputs = self.recurrent.apply( mask=prediction_mask, return_initial_states=True, as_dict=True, # Using dict_union gives us a free sanity check that # the feedback entries do not override the ones # from sequences_states_contexts **dict_union(feedback, sequences_states_contexts)) # These variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in states_outputs: application_call.add_auxiliary_variable( states_outputs[name][-1].copy(), name=name+"_final_value") # Discard the final states for name in self.recurrent.apply.states: states_outputs[name] = states_outputs[name][:-1] # Add all states and outputs and auxiliary variables for name, variable in list(states_outputs.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # Those can potentially be used for computing the cost. sequences_contexts = dict_subset( sequences_states_contexts, self.generate.contexts, self.generate.sequences) return self.readout.costs( prediction, prediction_mask, groundtruth, groundtruth_mask, **dict_subset(dict_union(states_outputs, sequences_contexts), self.readout.costs.inputs, must_have=False))
def do_apply(self, **kwargs): attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) add_seqs = dict_subset(kwargs, self.add_sequences, pop=True, must_have=False) current_glimpses = self.take_glimpses( as_dict=True, **dict_union( states, glimpses, { self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended }, add_seqs)) current_states = self.compute_states(as_list=True, **dict_union( sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) sequences.update( self.mixer.apply(return_dict=True, **dict_subset(dict_union(sequences, glimpses), self.mixer.apply.inputs))) current_states = self.transition.apply(iterate=False, return_list=True, **dict_union( sequences, states, kwargs)) return current_states
def generate(self, outputs, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names) next_glimpses = self.transition.take_glimpses( as_dict=True, **dict_union(states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts)) next_outputs = self.readout.emit(next_readouts) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else {'feedback': next_feedback}) next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) return (next_states + [next_outputs] + list(next_glimpses.values()) + [next_costs])
def compute_states(self, **kwargs): """Compute current states when glimpses have already been computed. Parameters ---------- **kwargs Should contain everything what `self.transition` needs and in addition current glimpses. Returns ------- current_states : list of Theano variables Current states computed by `self.transition`. """ sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) sequences.update(self.mixer.apply( return_dict=True, **dict_subset(dict_union(sequences, glimpses), self.mixer.apply.inputs))) current_states = self.transition.apply( iterate=False, return_list=True, **dict_union(sequences, states, kwargs)) return current_states
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) if self.add_contexts: kwargs.pop(self.attended_name) kwargs.pop(self.attended_mask_name, None) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) # By this time **kwargs will contain the states and the contexts # of the transition # Compute next states sequences_without_mask = { name: variable for name, variable in sequences.items() if 'mask' not in name } sequences.update( self.distribute.apply(as_dict=True, **dict_subset( dict_union(sequences_without_mask, glimpses), self.distribute.apply.inputs))) current_states = self.transition.apply(iterate=False, as_dict=True, **dict_union(sequences, kwargs)) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) current_glimpses = self.attention.take_glimpses( as_dict=True, **dict_union( current_states, glimpses_needed, { self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended })) return list(current_states.values()) + list(current_glimpses.values())
def cost_matrix(self, application_call, outputs, mask=None, **kwargs): """Returns generation costs for output sequences. See Also -------- :meth:`cost` : Scalar cost. """ # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) # contexts = dict_subset(kwargs, self._context_names, must_have=False) contexts = dict_subset(kwargs, self._context_names, must_have=False) contexts['initial_state_context'] = kwargs['initial_state_context'] feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply(mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = {name: results[name][:-1] for name in self._state_names} glimpses = {name: results[name][1:] for name in self._glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) readouts = self.readout.readout(feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable(variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names + self._glimpse_names: application_call.add_auxiliary_variable(results[name][-1].copy(), name=name + "_final_value") return costs
def evaluate(self, application_call, outputs, mask=None, **kwargs): # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = OrderedDict((name, results[name][:-1]) for name in self._state_names) glimpses = OrderedDict((name, results[name][1:]) for name in self._glimpse_names) # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) # Run the language model if self.language_model: lm_states = self.language_model.evaluate( outputs=outputs, mask=mask, as_dict=True) lm_states = {'lm_' + name: value for name, value in lm_states.items()} else: lm_states = {} readouts = self.readout.readout( feedback=feedback, **dict_union(lm_states, states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names + self._glimpse_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name+"_final_value") return [costs] + states.values() + glimpses.values()
def cost(self, application_call, outputs, mask=None, **kwargs): """Returns generation costs for output sequences. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The 3(2) dimensional tensor containing output sequences. The dimension 0 must stand for time, the dimension 1 for the position on the batch. mask : :class:`~tensor.TensorVariable` The binary matrix identifying fake outputs. Notes ----- The contexts are expected as keyword arguments. """ batch_size = outputs.shape[-2] # TODO Assumes only 1 features dim # Prepare input for the iterative part states = { name: kwargs[name] for name in self.state_names if name in kwargs } contexts = {name: kwargs[name] for name in self.context_names} feedback = self.readout.feedback(outputs) inputs = (self.fork.apply(feedback, return_dict=True) if self.fork else { 'feedback': feedback }) # Run the recurrent network results = self.transition.apply(mask=mask, return_initial_states=True, return_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables states = {name: results[name][:-1] for name in self.state_names} glimpses = {name: results[name] for name in self.glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback( self.readout.initial_outputs(batch_size, **contexts))) readouts = self.readout.readout(feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) for name, variable in glimpses.items(): application_call.add_auxiliary_variable(variable.copy(), name=name) # In case the user needs some glimpses or states or smth else return costs
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended_list = kwargs[self.attended_name] preprocessed_attended_list = kwargs.pop( self.preprocessed_attended_name) attended_mask_list = kwargs.get(self.attended_mask_name) posTag = kwargs[self.posTag_name] preprocessed_posTag = kwargs.pop(self.preprocessed_posTag_name) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) current_glimpses = self.take_glimpses( as_dict=True, **dict_union( states, glimpses, { self.attended_name: attended_list, self.posTag_name: posTag, self.attended_mask_name: attended_mask_list, self.preprocessed_attended_name: preprocessed_attended_list, self.preprocessed_posTag_name: preprocessed_posTag })) #the weighted averages to go through context transition GRU one by one. current_glimpses['weighted_averages'] = self.context_transition.apply( current_glimpses['weighted_averages'], tensor.ones([ current_glimpses['weighted_averages'].shape[1], current_glimpses['weighted_averages'].shape[0] ]))[-1] current_states = self.compute_states(as_list=True, **dict_union( sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def generate(self, outputs, dont_generate_new_outputs=False, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. dont_generate_new_outputs : bool, optional If ``True``, the previous outputs are used instead of generated ones. It is a temporary hack for ASRU. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names) lm_states = dict_subset(kwargs, self._lm_state_names) next_glimpses = self.transition.take_glimpses(as_dict=True, **dict_union( states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts, lm_states)) next_outputs = (self.readout.emit(next_readouts) if not dont_generate_new_outputs else outputs) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else { 'feedback': next_feedback }) next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) next_lm_states = {} if self.language_model: unmangled_lm_states = { name[3:]: lm_states[name] for name in lm_states } next_lm_states = OrderedDict( zip( self._lm_state_names, self.language_model.generate( next_outputs, dont_generate_new_outputs=True, iterate=False, **unmangled_lm_states))) return (next_states + [next_outputs] + list(next_glimpses.values()) + list(next_lm_states.values()) + [next_costs])
def apply(self, **kwargs): # Should handle both "iterate=True" and "iterate=False" extra_input = kwargs.pop(self.extra_input_name) mask = kwargs.pop('mask', None) normal_inputs = dict_subset(kwargs, self._normal_inputs, pop=True) normal_inputs = self.distribute.apply( as_dict=True, **dict_union(normal_inputs, {self.extra_input_name: extra_input})) return self.recurrent.apply(mask=mask, **dict_union(normal_inputs, kwargs))
def mixed_generate(self, return_initial_states=True, **kwargs): critic = self.generator.readout.critic groundtruth = kwargs.pop('groundtruth') groundtruth_mask = kwargs.pop('groundtruth_mask') step = kwargs.pop('step') sampling_inputs = dict_subset( kwargs, self.generator.readout.sample.inputs) actor_scores = self.generator.readout.scores(**sampling_inputs) critic_inputs = { name: kwargs['critic_' + name] for name in critic.generator.readout.merge_names} critic_outputs = critic.generator.readout.outputs( groundtruth, groundtruth_mask, **critic_inputs) epsilon = numpy.array(self.generator.readout.epsilon, dtype=theano.config.floatX) actor_probs = tensor.exp(actor_scores) # This is a poor man's 1-hot argmax critic_probs = self.softmax.apply(critic_outputs * 1000) probs = (actor_probs * (tensor.constant(1) - epsilon) + critic_probs * epsilon) x = self.theano_rng.uniform(size=(probs.shape[0],)) samples = (tensor.gt(x[:, None], tensor.cumsum(probs, axis=1)) .astype(theano.config.floatX) .sum(axis=1) .astype('int64')) samples = tensor.minimum(samples, probs.shape[1] - 1) actor_feedback = self.generator.feedback.apply(samples, as_dict=True) actor_states_contexts = dict_subset( kwargs, self.generator.recurrent.apply.states + self.generator.recurrent.apply.contexts) actor_states_outputs = self.generator.recurrent.apply( as_dict=True, iterate=False, **dict_union(actor_feedback, actor_states_contexts)) critic_feedback = critic.generator.feedback.apply(samples, as_dict=True) critic_states_contexts = { name: kwargs['critic_' + name] for name in critic.generator.recurrent.apply.states + critic.generator.recurrent.apply.contexts} critic_apply_kwargs = dict( as_dict=True, iterate=False, **dict_union(critic_feedback, critic_states_contexts)) if self.generator.readout.critic_uses_actor_states: critic_apply_kwargs['extra_inputs'] = actor_states_outputs['states'] critic_states_outputs = critic.generator.recurrent.apply(**critic_apply_kwargs) return ([samples, step + 1] + actor_states_outputs.values() + critic_states_outputs.values())
def cost_matrix(self, application_call, outputs, mask=None, **kwargs): """Returns generation costs for output sequences. See Also -------- :meth:`cost` : Scalar cost. """ # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = {name: results[name][:-1] for name in self._state_names} glimpses = {name: results[name][1:] for name in self._glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) readouts = self.readout.readout( feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name+"_final_value") return costs
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) if self.add_contexts: kwargs.pop(self.attended_name) kwargs.pop(self.attended_mask_name, None) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) # By this time **kwargs will contain the states and the contexts # of the transition # Compute next states sequences_without_mask = { name: variable for name, variable in sequences.items() if 'mask' not in name} sequences.update(self.distribute.apply( as_dict=True, **dict_subset( dict_union(sequences_without_mask, glimpses), self.distribute.apply.inputs))) current_states = self.transition.apply( iterate=False, as_dict=True, **dict_union(sequences, kwargs)) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) current_glimpses = self.attention.take_glimpses( as_dict=True, **dict_union( current_states, glimpses_needed, {self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended})) return list(current_states.values()) + list(current_glimpses.values())
def cost(self, outputs, mask=None, **kwargs): """Returns generation costs for output sequences. Parameters ---------- outputs : Theano variable The 3(2) dimensional tensor containing output sequences. The dimension 0 must stand for time, the dimension 1 for the position on the batch. mask : The 0/1 matrix identifying fake outputs. Notes ----- The contexts are expected as keyword arguments. """ batch_size = outputs.shape[-2] # TODO Assumes only 1 features dim # Prepare input for the iterative part states = {name: kwargs[name] for name in self.state_names if name in kwargs} contexts = {name: kwargs[name] for name in self.context_names} feedback = self.readout.feedback(outputs) inputs = (self.fork.apply(feedback, return_dict=True) if self.fork else {'feedback': feedback}) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, return_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables states = {name: results[name][:-1] for name in self.state_names} glimpses = {name: results[name] for name in self.glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs( batch_size, **contexts))) readouts = self.readout.readout( feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) # In case the user needs some glimpses or states or smth else also_return = kwargs.get("also_return") if also_return: others = {name: results[name] for name in also_return} return (costs, others) return costs
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Combines an application of the `distribute` that alter the sequential inputs of the wrapped transition and an application of the wrapped transition. All unknown keyword arguments go to the wrapped transition. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition the current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ # make sure we are not popping the mask normal_inputs = [ name for name in self._sequence_names if 'mask' not in name ] sequences = dict_subset(kwargs, normal_inputs, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) topical_glimpses = dict_subset(kwargs, self._topical_glimpse_names, pop=True) if self.add_contexts: kwargs.pop(self.attended_name) # attended_mask_name can be optional kwargs.pop(self.attended_mask_name, None) kwargs.pop(self.topical_attended_name) kwargs.pop(self.topical_attended_mask_name, None) sequences.update( self.distribute.apply(as_dict=True, **dict_subset( dict_union(sequences, glimpses), self.distribute.apply.inputs))) sequences.update( self.topical_distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, topical_glimpses), self.topical_distribute.apply.inputs))) current_states = self.transition.apply(iterate=False, as_list=True, **dict_union(sequences, kwargs)) return current_states
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) topical_attended=kwargs[self.topical_attended_name]; preprocessed_topical_attended=kwargs.pop(self.preprocessed_topical_attended_name); topical_attended_mask=kwargs.get(self.topical_attended_mask_name); sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) topical_glimpses= dict_subset(kwargs,self._topical_glimpse_names,pop=True) topical_embeddings=dict_subset(kwargs,[self.topical_name],pop=True); #check whether this would work.checked! content_embeddings=dict_subset(kwargs,[self.content_name],pop=True); current_glimpses = self.take_glimpses( as_dict=True, **dict_union( states, glimpses,topical_embeddings,content_embeddings, {self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended, self.topical_attended_name:topical_attended, self.topical_attended_mask_name:topical_attended_mask, self.preprocessed_topical_attended_name:preprocessed_topical_attended}));#don't know whether current_states = self.compute_states( as_list=True, **dict_union(sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def generate(self, outputs, dont_generate_new_outputs=False, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. dont_generate_new_outputs : bool, optional If ``True``, the previous outputs are used instead of generated ones. It is a temporary hack for ASRU. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names) lm_states = dict_subset(kwargs, self._lm_state_names) next_glimpses = self.transition.take_glimpses( as_dict=True, **dict_union(states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts, lm_states)) next_outputs = (self.readout.emit(next_readouts) if not dont_generate_new_outputs else outputs) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else {'feedback': next_feedback}) next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) next_lm_states = {} if self.language_model: unmangled_lm_states = {name[3:]: lm_states[name] for name in lm_states} next_lm_states = OrderedDict(zip( self._lm_state_names, self.language_model.generate( next_outputs, dont_generate_new_outputs=True, iterate=False, **unmangled_lm_states))) return (next_states + [next_outputs] + list(next_glimpses.values()) + list(next_lm_states.values()) + [next_costs])
def get_cost_graph(self, batch=True, prediction=None, prediction_mask=None): if batch: inputs = self.inputs inputs_mask = self.inputs_mask groundtruth = self.labels groundtruth_mask = self.labels_mask else: inputs, inputs_mask = self.bottom.single_to_batch_inputs( self.single_inputs) groundtruth = self.single_labels[:, None] groundtruth_mask = None if not prediction: prediction = groundtruth if not prediction_mask: prediction_mask = groundtruth_mask kwargs = dict(inputs_mask=inputs_mask, labels=prediction, labels_mask=prediction_mask, additional_sources=dict(self.additional_sources)) kwargs = {(k + self.names_postfix): v for k, v in kwargs.iteritems()} kwargs = dict_union(kwargs, inputs) cost = self.cost(**kwargs) cost_cg = ComputationGraph(cost) return cost_cg
def initialize(self, **kwargs): logger.info("BatchNormAccumulate initializing") # get list of bricks bricks_seen = set() for p in self.parameters: brick = get_brick(p) if brick not in bricks_seen: bricks_seen.add(brick) # ensure all updates account for all bricks update_parameters = set() for b in bricks_seen: for var, update in b.updates.items(): update_parameters.add(var) assert b.n.get_value() == 0 if set(update_parameters) != set(self.parameters): raise ValueError("The updates and the parameters passed in do " "not match. This could be due to no applications " "or multiple applications found %d updates, and " "%d parameters" % (len(update_parameters), len(self.parameters))) updates = dict_union(*[b.updates for b in bricks_seen]) logger.info("Compiling BatchNorm accumulate") self._func = theano.function(self.inputs, [], updates=updates, on_unused_input="warn") super(BatchNormAccumulate, self).initialize(**kwargs)
def get_params(self, param_name=None): """Returns parameters the selected bricks and their ancestors. Parameters ---------- param_name : :class:`Path.ParamName` If given, only parameters with the name `param_name` are returned. Returns ------- params : OrderedDict A dictionary of (`path`, `param`) pairs, where `path` is the string representation of the part to the parameter, `param` is the parameter. """ def recursion(brick): # TODO path logic should be separate result = [(Path([Path.BrickName(brick.name), Path.ParamName(param.name)]), param) for param in brick.params if not param_name or param.name == param_name] result = OrderedDict(result) for child in brick.children: for path, param in recursion(child).items(): new_path = Path([Path.BrickName(brick.name)]) + path result[new_path] = param return result result = dict_union(*[recursion(brick) for brick in self.bricks]) return OrderedDict((str(key), value) for key, value in result.items())
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended_list = kwargs[self.attended_name] preprocessed_attended_list = kwargs.pop(self.preprocessed_attended_name) attended_mask_list = kwargs.get(self.attended_mask_name) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) utterance_attended=self.context_transition.apply(attended_list,preprocessed_attended_list,attended_mask_list, states['states'], mask=tensor.ones([attended_list.shape[2],attended_list.shape[0]])); current_glimpses=self.take_glimpses(utterance_attended)#? current_states = self.compute_states( as_list=True, **dict_union(sequences, states, {'weighted_averages':current_glimpses}, kwargs)) return current_states + [current_glimpses]
def _get_variables(self): """Collect variables, updates and auxiliary variables. In addition collects all :class:`.Scan` ops and recurses in the respective inner Theano graphs. """ updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_shared_variable(o)] usual_outputs = [o for o in self.outputs if not is_shared_variable(o)] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) self.sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) self.scans = list( unique([ node.op for node in self.sorted_apply_nodes if isinstance(node.op, Scan) ])) self.sorted_scan_nodes = [ node for node in self.sorted_apply_nodes if isinstance(node.op, Scan) ] self._scan_graphs = [ ComputationGraph(scan.outputs) for scan in self.scans ] seen = set() main_vars = ([ var for var in list( chain(*[ apply_node.inputs for apply_node in self.sorted_apply_nodes ])) if not (var in seen or seen.add(var)) ] + [var for var in self.outputs if var not in seen]) # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) for annotation in getattr(var.tag, 'annotations', []): if annotation not in seen: seen.add(annotation) new_avs = [ av for av in annotation.auxiliary_variables if not (av in seen_avs or seen_avs.add(av)) ] variables.extend(new_avs) updates = dict_union(updates, annotation.updates) self.variables = variables self.updates = updates
def _push_allocation_config(self): # Configure readout # TODO: optional states? contexts? state_dims = { name: self.transition.get_dim(name) for name in self.state_names } context_dims = { name: self.transition.get_dim(name) for name in self.context_names } self.glimpse_dims = { name: self.transition.get_dim(name) for name in self.glimpse_names } self.readout.source_dims = dict_union(state_dims, context_dims, self.glimpse_dims) # Configure fork feedback_names = self.readout.feedback.outputs if not len(feedback_names) == 1: raise ValueError self.fork.input_dim = self.readout.get_dim(feedback_names[0]) self.fork.fork_dims = { name: self.transition.get_dim(name) for name in self.fork.apply.outputs }
def generate(self, chars): return self.generator.generate( n_steps=3 * chars.shape[0], batch_size=chars.shape[1], attended=self.encoder.apply( **dict_union(self.fork.apply(self.lookup.apply(chars), as_dict=True))), attended_mask=tensor.ones(chars.shape))
def generate(self, chars): return self.generator.generate( n_steps=3 * chars.shape[0], batch_size=chars.shape[1], attended=self.encoder.apply( **dict_union( self.fork.apply(self.lookup.apply(chars), as_dict=True))), attended_mask=tensor.ones(chars.shape))
def take_glimpses(self, **kwargs): r"""Compute glimpses with the attention mechanism. A thin wrapper over `self.attention.take_glimpses`: takes care of choosing and renaming the necessary arguments. Parameters ---------- \*\*kwargs Must contain the attended, previous step states and glimpses. Can optionaly contain the attended mask and the preprocessed attended. Returns ------- glimpses : list of :class:`~tensor.TensorVariable` Current step glimpses. """ states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) result = self.attention.take_glimpses( kwargs.pop(self.attended_name), kwargs.pop(self.preprocessed_attended_name, None), kwargs.pop(self.attended_mask_name, None), **dict_union(states, glimpses_needed)) # At this point kwargs may contain additional items. # e.g. AttentionRecurrent.transition.apply.contexts return result
def take_glimpses(self, **kwargs): r"""Compute glimpses with the attention mechanism. A thin wrapper over `self.attention.take_glimpses`: takes care of choosing and renaming the necessary arguments. Parameters ---------- \*\*kwargs Must contain the attended, previous step states and glimpses. Can optionaly contain the attended mask and the preprocessed attended. Returns ------- glimpses : list of :class:`~tensor.TensorVariable` Current step glimpses. """ states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) result = self.attention.take_glimpses( kwargs.pop(self.attended_name), kwargs.pop(self.preprocessed_attended_name, None), kwargs.pop(self.attended_mask_name, None), **dict_union(states, glimpses_needed)) if kwargs: raise ValueError("extra args to take_glimpses: {}".format(kwargs)) return result
def get_params(self, param_name=None): """Returns parameters the selected bricks and their ancestors. Parameters ---------- param_name : :class:`Path.ParamName` If given, only parameters with the name `param_name` are returned. Returns ------- params : OrderedDict A dictionary of (`path`, `param`) pairs, where `path` is the string representation of the part to the parameter, `param` is the parameter. """ def recursion(brick): # TODO path logic should be separate result = [ (Path([Path.BrickName(brick.name), Path.ParamName(param.name)]), param) for param in brick.params if not param_name or param.name == param_name ] result = OrderedDict(result) for child in brick.children: for path, param in recursion(child).items(): new_path = Path([Path.BrickName(brick.name)]) + path result[new_path] = param return result result = dict_union(*[recursion(brick) for brick in self.bricks]) return OrderedDict((str(key), value) for key, value in result.items())
def apply(self, input_, mask=None, **kwargs): states=self.transition.apply( mask=mask, **dict_union(self.fork.apply(input_, as_dict=True), kwargs)) # I don't know, why blocks returns a list [states, cell] for LSTM # but just states (no list) for GRU or normal RNN. We only want LSTM's states. # cells should not be visible from outside. return states[0] if isinstance(states,list) else states
def generate(self, outputs, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) topical_word_contexts=dict_subset(kwargs, self._topical_context_names) topical_embeddings=dict_subset(kwargs,[self.topical_name]); content_embeddings=dict_subset(kwargs,[self.content_name]); glimpses = dict_subset(kwargs, self._glimpse_names) next_glimpses = self.transition.take_glimpses( as_dict=True, **dict_union( states, glimpses,topical_embeddings,content_embeddings,contexts,topical_word_contexts)); glimpses_modified={'weighted_averages':next_glimpses['weighted_averages'],'weigths':next_glimpses['weights']} ''' next_glimpses = self.transition.take_glimpses( as_dict=True, **dict_union(states, glimpses, contexts,topical_embeddings)) ''' next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, glimpses_modified, contexts)) next_outputs = self.readout.emit(next_readouts) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else {'feedback': next_feedback}) tmp=next_glimpses.values(); next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts,topical_word_contexts))#why adding contexts here,the do_apply version do not have contexts return (next_states + [next_outputs] + list(next_glimpses.values()) + [next_costs])
def _push_allocation_config(self): self.attention.state_dims = self.transition.get_dims(self.state_names) self.attention.sequence_dim = self.transition.get_dim( self.attended_name) self.mixer.channel_dims = dict_subset( dict_union(self.transition.get_dims(self.sequence_names), self.attention.get_dims(self.glimpse_names)), self.mixer.apply.inputs)
def cost(self, chars, chars_mask, targets, targets_mask): return self.generator.cost_matrix( targets, targets_mask, attended=self.encoder.apply( **dict_union( self.fork.apply(self.lookup.apply(chars), as_dict=True), mask=chars_mask)), attended_mask=chars_mask)
def cost(self, chars, chars_mask, targets, targets_mask): #{ return self.generator.cost_matrix( targets, targets_mask, attended=self.encoder.apply(**dict_union(self.fork.apply( self.lookup.apply(chars), as_dict=True), mask=chars_mask)), attended_mask=chars_mask)
def cost(self, given_x, application_call): """Computes the loss function. Parameters ---------- given_x : tensor variable Batch of given visible states from dataset. Notes ----- The `application_call` argument is an effect of the `application` decorator and isn't visible to users. It's used internally to set an updates dictionary for `h` that's discoverable by `ComputationGraph`. """ x = given_x h_prev = self.h + self.initial_noise * self.theano_rng.normal(size=self.h.shape, dtype=self.h.dtype) h = h_next = h_prev old_energy = self.pp(self.energy(x, h).sum(), "old_energy", 1) for iteration in range(self.n_inference_steps): h_prev = h h = h_next h_next = self.pp( disconnected_grad(self.langevin_update(self.pp(x, "x", 3), self.pp(h_next, "h", 2))), "h_next", 2 ) new_energy = self.pp(self.energy(x, h_next).sum(), "new_energy", 1) delta_energy = self.pp(old_energy - new_energy, "delta_energy", 1) old_energy = new_energy h_prediction_residual = ( h_next - self.pp(h_prev, "h_prev", 3) + self.epsilon * tensor.grad(self.energy(x, h_prev).sum(), h_prev) ) J_h = self.pp((h_prediction_residual * h_prediction_residual).sum(axis=1).mean(axis=0), "J_h", 1) x_prediction_residual = self.pp(tensor.grad(self.energy(given_x, h_prev).sum(), given_x), "x_residual", 2) J_x = self.pp((x_prediction_residual * x_prediction_residual).sum(axis=1).mean(axis=0), "J_x", 1) if self.debug > 1: application_call.add_auxiliary_variable(J_x, name="J_x" + str(iteration)) application_call.add_auxiliary_variable(J_h, name="J_h" + str(iteration)) if iteration == 0: total_cost = J_h + J_x else: total_cost = total_cost + J_h + J_x per_iteration_cost = total_cost / self.n_inference_steps updates = OrderedDict([(self.h, h_next)]) application_call.updates = dict_union(application_call.updates, updates) if self.debug > 0: application_call.add_auxiliary_variable(per_iteration_cost, name="per_iteration_cost") if self.debug > 1: application_call.add_auxiliary_variable(self.Wxh * 1.0, name="Wxh") application_call.add_auxiliary_variable(self.Whh * 1.0, name="Whh") application_call.add_auxiliary_variable(self.Wxx * 1.0, name="Wxx") application_call.add_auxiliary_variable(self.b * 1, name="b") application_call.add_auxiliary_variable(self.c * 1, name="c") return self.pp(total_cost, "total_cost")
def generate(self, **sequences_states_contexts): sampling_inputs = dict_subset( sequences_states_contexts, self.readout.sample.inputs) samples, scores = self.readout.sample(**sampling_inputs) feedback = self.feedback.apply(samples, as_dict=True) next_states_outputs = self.recurrent.apply( as_list=True, iterate=False, **dict_union(feedback, **sequences_states_contexts)) return [samples, scores] + next_states_outputs
def _push_allocation_config(self): self.attention.state_dims = self.transition.get_dims(self.state_names) self.attention.sequence_dim = self.transition.get_dim( self.attended_name) self.mixer.channel_dims = dict_subset( dict_union( self.transition.get_dims(self.sequence_names), self.attention.get_dims(self.glimpse_names)), self.mixer.apply.inputs)
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) current_glimpses = self.take_glimpses( return_dict=True, **dict_union( states, glimpses, { self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended })) current_states = self.compute_states(return_list=True, **dict_union( sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Combines an application of the `distribute` that alter the sequential inputs of the wrapped transition and an application of the wrapped transition. All unknown keyword arguments go to the wrapped transition. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition the current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ # make sure we are not popping the mask normal_inputs = [name for name in self._sequence_names if 'mask' not in name] sequences = dict_subset(kwargs, normal_inputs, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) topical_glimpses=dict_subset(kwargs,self._topical_glimpse_names,pop=True); if self.add_contexts: kwargs.pop(self.attended_name) # attended_mask_name can be optional kwargs.pop(self.attended_mask_name, None) kwargs.pop(self.topical_attended_name) kwargs.pop(self.topical_attended_mask_name, None) sequences.update(self.distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, glimpses), self.distribute.apply.inputs))) sequences.update(self.topical_distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, topical_glimpses), self.topical_distribute.apply.inputs))) current_states = self.transition.apply( iterate=False, as_list=True, **dict_union(sequences, kwargs)) return current_states
def generate(self, **sequences_states_contexts): sampling_inputs = dict_subset(sequences_states_contexts, self.readout.sample.inputs) samples, scores = self.readout.sample(**sampling_inputs) feedback = self.feedback.apply(samples, as_dict=True) next_states_outputs = self.recurrent.apply( as_list=True, iterate=False, **dict_union(feedback, **sequences_states_contexts)) return [samples, scores] + next_states_outputs
def apply(self, input_, mask=None, **kwargs): states = self.transition.apply(mask=mask, **dict_union( self.fork.apply(input_, as_dict=True), kwargs)) # I don't know, why blocks returns a list [states, cell] for LSTM # but just states (no list) for GRU or normal RNN. We only want LSTM's states. # cells should not be visible from outside. return states[0] if isinstance(states, list) else states
def apply(self, inner_inputs, states, outer_inputs): forked_inputs = self.inner_input_fork.apply(inner_inputs, as_dict=True) forked_states = self.outer_input_fork.apply(outer_inputs, as_dict=True) gru_inputs = {key: forked_inputs[key] + forked_states[key] for key in forked_inputs.keys()} new_states = self.inner_gru.apply( iterate=False, **dict_union(gru_inputs, {'states': states})) return new_states # mean according to the time axis
def cost(self, x, context, **kwargs): x_g = self.mlp_x.apply(context) inputs = self.fork.apply(x_g, as_dict = True) h = self.transition.apply(**dict_union(inputs, kwargs)) self.final_states = [] for var in h: self.final_states.append(var[-1].copy(name = var.name + "_final_value")) cost = self.gmm_emitter.cost(h[-1], x) return cost.mean()
def lazy_init(*args, **kwargs): self = args[0] self.allocation_args = (getattr(self, 'allocation_args', []) + allocation) self.initialization_args = (getattr(self, 'initialization_args', []) + initialization) kwargs = dict_union(args_to_kwargs(args, init), kwargs) for allocation_arg in allocation: kwargs.setdefault(allocation_arg, NoneAllocation) for initialization_arg in initialization: kwargs.setdefault(initialization_arg, NoneInitialization) return init(**kwargs)
def initial_states(self, batch_size, *args, **kwargs): state_dict = dict( self.transition.initial_states( batch_size, as_dict=True, *args, **kwargs), outputs=self.readout.initial_outputs(batch_size)) if self.language_model: lm_initial_states = self.language_model.initial_states( batch_size, as_dict=True, *args, **kwargs) state_dict = dict_union(state_dict, {"lm_" + name: state for name, state in lm_initial_states.items()}) return [state_dict[state_name] for state_name in self.generate.states]
def apply(self, **kwargs): """Preprocess a sequence attending the attended context at every step. Preprocesses the attended context and runs :meth:`do_apply`. See :meth:`do_apply` documentation for further information. """ preprocessed_attended = self.attention.preprocess( kwargs[self.attended_name]) return self.do_apply( **dict_union(kwargs, {self.preprocessed_attended_name: preprocessed_attended}))
def _get_variables(self): """Collect variables, updates and auxiliary variables. In addition collects all :class:`.Scan` ops and recurses in the respective inner Theano graphs. """ updates = OrderedDict() shared_outputs = [o for o in self.outputs if is_shared_variable(o)] usual_outputs = [o for o in self.outputs if not is_shared_variable(o)] variables = shared_outputs if usual_outputs: # Sort apply nodes topologically, get variables and remove # duplicates inputs = graph.inputs(self.outputs) self.sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs) self.scans = list(unique([node.op for node in self.sorted_apply_nodes if isinstance(node.op, Scan)])) self.sorted_scan_nodes = [node for node in self.sorted_apply_nodes if isinstance(node.op, Scan)] self._scan_graphs = [ComputationGraph(scan.outputs) for scan in self.scans] seen = set() main_vars = ( [var for var in list(chain( *[apply_node.inputs for apply_node in self.sorted_apply_nodes])) if not (var in seen or seen.add(var))] + [var for var in self.outputs if var not in seen]) # While preserving order add auxiliary variables, and collect # updates seen = set() # Intermediate variables could be auxiliary seen_avs = set(main_vars) variables = [] for var in main_vars: variables.append(var) for annotation in getattr(var.tag, 'annotations', []): if annotation not in seen: seen.add(annotation) new_avs = [ av for av in annotation.auxiliary_variables if not (av in seen_avs or seen_avs.add(av))] variables.extend(new_avs) updates = dict_union(updates, annotation.updates) self.variables = variables self.updates = updates
def get_theano_function(self, additional_updates=None, **kwargs): r"""Create Theano function from the graph contained. Parameters ---------- \*\*kwargs : dict key word arguments to theano.function. Useful for specifying specific compilation modes or profiling. """ updates = self.updates if additional_updates: updates = dict_union(updates, OrderedDict(additional_updates)) return theano.function(self.inputs, self.outputs, updates=updates, **kwargs)