示例#1
0
class ShallowFusionReadout(Readout):
    def __init__(self, lm_costs_name, lm_weight,
                 normalize_am_weights=False,
                 normalize_lm_weights=False,
                 normalize_tot_weights=True,
                 am_beta=1.0,
                 **kwargs):
        super(ShallowFusionReadout, self).__init__(**kwargs)
        self.lm_costs_name = lm_costs_name
        self.lm_weight = lm_weight
        self.normalize_am_weights = normalize_am_weights
        self.normalize_lm_weights = normalize_lm_weights
        self.normalize_tot_weights = normalize_tot_weights
        self.am_beta = am_beta
        self.softmax = NDimensionalSoftmax()
        self.children += [self.softmax]

    @application
    def readout(self, **kwargs):
        lm_costs = -kwargs.pop(self.lm_costs_name)
        if self.normalize_lm_weights:
            lm_costs = self.softmax.log_probabilities(
                lm_costs, extra_ndim=lm_costs.ndim - 2)
        am_pre_softmax = self.am_beta * super(ShallowFusionReadout, self).readout(**kwargs)
        if self.normalize_am_weights:
            am_pre_softmax = self.softmax.log_probabilities(
                am_pre_softmax, extra_ndim=am_pre_softmax.ndim - 2)
        x = am_pre_softmax + self.lm_weight * lm_costs
        if self.normalize_tot_weights:
            x = self.softmax.log_probabilities(x, extra_ndim=x.ndim - 2)
        return x
class ShallowFusionReadout(Readout):
    def __init__(self,
                 lm_costs_name,
                 lm_weight,
                 normalize_am_weights=False,
                 normalize_lm_weights=False,
                 normalize_tot_weights=True,
                 am_beta=1.0,
                 **kwargs):
        super(ShallowFusionReadout, self).__init__(**kwargs)
        self.lm_costs_name = lm_costs_name
        self.lm_weight = lm_weight
        self.normalize_am_weights = normalize_am_weights
        self.normalize_lm_weights = normalize_lm_weights
        self.normalize_tot_weights = normalize_tot_weights
        self.am_beta = am_beta
        self.softmax = NDimensionalSoftmax()
        self.children += [self.softmax]

    @application
    def readout(self, **kwargs):
        lm_costs = -kwargs.pop(self.lm_costs_name)
        if self.normalize_lm_weights:
            lm_costs = self.softmax.log_probabilities(
                lm_costs, extra_ndim=lm_costs.ndim - 2)
        am_pre_softmax = self.am_beta * super(ShallowFusionReadout,
                                              self).readout(**kwargs)
        if self.normalize_am_weights:
            am_pre_softmax = self.softmax.log_probabilities(
                am_pre_softmax, extra_ndim=am_pre_softmax.ndim - 2)
        x = am_pre_softmax + self.lm_weight * lm_costs
        if self.normalize_tot_weights:
            x = self.softmax.log_probabilities(x, extra_ndim=x.ndim - 2)
        return x
class SoftmaxEmitter(AbstractEmitter, Initializable, Random):
    """A softmax emitter for the case of integer outputs.

    Interprets readout elements as energies corresponding to their indices.

    Parameters
    ----------
    initial_output : int or a scalar :class:`~theano.Variable`
        The initial output.

    """
    def __init__(self, initial_output=0, **kwargs):
        super(SoftmaxEmitter, self).__init__(**kwargs)
        self.initial_output = initial_output
        self.softmax = NDimensionalSoftmax()
        self.children = [self.softmax]

    @application
    def probs(self, readouts):
        return self.softmax.apply(readouts, extra_ndim=readouts.ndim - 2)

    @application
    def emit(self, readouts):
        probs = self.probs(readouts)
        batch_size = probs.shape[0]
        pvals_flat = probs.reshape((batch_size, -1))
        generated = self.theano_rng.multinomial(pvals=pvals_flat)
        return generated.reshape(probs.shape).argmax(axis=-1)

    @application
    def cost(self, readouts, outputs):
        # WARNING: unfortunately this application method works
        # just fine when `readouts` and `outputs` have
        # different dimensions. Be careful!
        return self.softmax.categorical_cross_entropy(
            outputs, readouts, extra_ndim=readouts.ndim - 2)

    @application
    def costs(self, readouts):
        return -self.softmax.log_probabilities(
            readouts, extra_ndim=readouts.ndim - 2)

    @application
    def initial_outputs(self, batch_size):
        return self.initial_output * tensor.ones((batch_size,), dtype='int64')

    def get_dim(self, name):
        if name == 'outputs':
            return 0
        return super(SoftmaxEmitter, self).get_dim(name)
class SoftmaxEmitter(AbstractEmitter, Initializable, Random):
    """A softmax emitter for the case of integer outputs.

    Interprets readout elements as energies corresponding to their indices.

    Parameters
    ----------
    initial_output : int or a scalar :class:`~theano.Variable`
        The initial output.

    """
    def __init__(self, initial_output=0, **kwargs):
        super(SoftmaxEmitter, self).__init__(**kwargs)
        self.initial_output = initial_output
        self.softmax = NDimensionalSoftmax()
        self.children = [self.softmax]

    @application
    def probs(self, readouts):
        return self.softmax.apply(readouts, extra_ndim=readouts.ndim - 2)

    @application
    def emit(self, readouts):
        probs = self.probs(readouts)
        batch_size = probs.shape[0]
        pvals_flat = probs.reshape((batch_size, -1))
        generated = self.theano_rng.multinomial(pvals=pvals_flat)
        return generated.reshape(probs.shape).argmax(axis=-1)

    @application
    def cost(self, readouts, outputs):
        # WARNING: unfortunately this application method works
        # just fine when `readouts` and `outputs` have
        # different dimensions. Be careful!
        return self.softmax.categorical_cross_entropy(
            outputs, readouts, extra_ndim=readouts.ndim - 2)

    @application
    def costs(self, readouts):
        return -self.softmax.log_probabilities(
            readouts, extra_ndim=readouts.ndim - 2)

    @application
    def initial_outputs(self, batch_size):
        return self.initial_output * tensor.ones((batch_size,), dtype='int64')

    def get_dim(self, name):
        if name == 'outputs':
            return 0
        return super(SoftmaxEmitter, self).get_dim(name)
    def softmax_layer(self, h, y):
        """
        Perform Softmax over the hidden state in order to
        predict the next word in the sequence and compute
        the loss.
        :param h The hidden state sequence
        :param y The target words
        """
        hidden_to_output = Linear(name='hidden_to_output', input_dim=self.hidden_size,
                                  output_dim=self.vocab_size)
        initialize(hidden_to_output, sqrt(6.0 / (self.hidden_size + self.vocab_size)))

        linear_output = hidden_to_output.apply(h)
        linear_output.name = 'linear_output'
        softmax = NDimensionalSoftmax(name="lm_softmax")
        y_hat = softmax.log_probabilities(linear_output, extra_ndim=1)
        y_hat.name = 'y_hat'

        cost = softmax.categorical_cross_entropy(y, linear_output, extra_ndim=1).mean()

        cost.name = 'cost'
        return y_hat, cost
示例#6
0
class ActorCriticReadout(SoftmaxReadout):
    """Actor-critic

    Params
    ------
    bos_token : int
        The token used to pad critic input. Critic needs to do
        at least one extra step compared to the actor in order
        to get the first glimpse of the ground-truth sequence
        before predicting the actual values.

    """
    def __init__(self,
                 reward_brick,
                 compute_targets,
                 compute_policy,
                 solve_bellman,
                 freeze_actor,
                 freeze_critic,
                 critic_uses_actor_states,
                 critic_uses_groundtruth,
                 critic=None,
                 critic_burnin_steps=None,
                 critic_policy_t=None,
                 entropy_reward_coof=None,
                 cross_entropy_reward_coof=None,
                 discount=None,
                 value_penalty=None,
                 value_softmax=False,
                 same_value_for_wrong=False,
                 accumulate_outputs=False,
                 use_value_biases=None,
                 actor_grad_estimate=None,
                 bos_token=None,
                 **kwargs):
        super(ActorCriticReadout, self).__init__(**kwargs)
        self.reward_brick = reward_brick
        self.critic = critic
        self.freeze_actor = freeze_actor
        self.freeze_critic = freeze_critic
        self.critic_uses_actor_states = critic_uses_actor_states
        self.critic_uses_groundtruth = (critic_uses_groundtruth
                                        if critic_uses_groundtruth is not None
                                        else True)
        self.critic_burnin_steps = (critic_burnin_steps
                                    if critic_burnin_steps is not None else 0)
        self.value_summand = Linear(output_dim=1, name='summand')
        self.softmax_t = 1.
        self.critic_policy_t = (critic_policy_t
                                if critic_policy_t is not None else 1.0)
        self.epsilon = 0.
        self.discount = (discount if discount is not None else 1.)
        self.entropy_reward_coof = (entropy_reward_coof
                                    if entropy_reward_coof is not None else 0.)
        self.cross_entropy_reward_coof = (cross_entropy_reward_coof
                                          if cross_entropy_reward_coof
                                          is not None else 0.)
        self.value_penalty = value_penalty
        self.value_softmax = value_softmax
        self.same_value_for_wrong = same_value_for_wrong
        self.compute_targets = compute_targets
        self.compute_policy = compute_policy
        self.solve_bellman = solve_bellman
        self.accumulate_outputs = accumulate_outputs
        self.use_value_biases = (use_value_biases
                                 if use_value_biases is not None else True)
        self.actor_grad_estimate = (actor_grad_estimate
                                    if actor_grad_estimate else 'all_actions')
        self.bos_token = bos_token
        self.softmax = NDimensionalSoftmax()
        self.children += [reward_brick, self.value_summand, self.softmax]
        if self.critic:
            self.children.append(self.critic)
        self.costs.inputs += ['attended', 'attended_mask']

    def _push_allocation_config(self):
        super(ActorCriticReadout, self)._push_allocation_config()
        self.value_summand.input_dim = self.get_dim('attended')

    @application
    def scores(self, **inputs):
        merged = self.merge(**dict_subset(inputs, self.merge_names))
        return self.softmax.log_probabilities(merged * self.softmax_t,
                                              extra_ndim=merged.ndim - 2)

    @application
    def costs(self, application_call, prediction, prediction_mask, groundtruth,
              groundtruth_mask, **inputs):
        def _prediction_subtensor(data):
            if data.ndim != 3:
                raise ValueError
            flat_data = data.reshape(
                (data.shape[0] * data.shape[1], data.shape[2]))
            flat_data = flat_data[tensor.arange(flat_data.shape[0]),
                                  prediction.flatten()]
            return flat_data.reshape(
                (prediction.shape[0], prediction.shape[1]))

        attended = disconnected_grad(inputs.pop('attended'))
        attended_mask = disconnected_grad(inputs.pop('attended_mask'))

        # Compute the rewards
        rewards = self.reward_brick.apply(prediction, prediction_mask,
                                          groundtruth, groundtruth_mask)[:, :,
                                                                         0]
        future_rewards = rewards[::-1].cumsum(axis=0)[::-1]

        # Compute the critic outputs
        if self.critic:
            padding = tensor.repeat(tensor.fill(prediction[0:1],
                                                self.bos_token),
                                    1,
                                    axis=0)
            mask_padding = tensor.repeat(tensor.fill(prediction_mask[0:1], 1.),
                                         1,
                                         axis=0)
            padded_prediction = tensor.concatenate([padding, prediction])
            padded_prediction_mask = tensor.concatenate(
                [mask_padding, prediction_mask])
            if self.critic_uses_groundtruth:
                critic_context = groundtruth
                critic_context_mask = groundtruth_mask
            else:
                critic_context = tensor.zeros_like(groundtruth[0:1])
                critic_context_mask = tensor.zeros_like(groundtruth_mask[0:1])
            critic_kwargs = dict(prediction=padded_prediction,
                                 prediction_mask=padded_prediction_mask,
                                 groundtruth=critic_context,
                                 groundtruth_mask=critic_context_mask,
                                 inputs=critic_context,
                                 inputs_mask=critic_context_mask)

            if self.critic_uses_actor_states:
                extra_inputs = disconnected_grad(inputs['states'])
                # We don't the very last hidden state of the actor
                # in extra_inputs. We have to add something instead for the shapes
                # to match. It doesn't matter at all, what exactly we add.
                critic_kwargs['extra_inputs'] = tensor.concatenate(
                    [extra_inputs,
                     tensor.zeros_like(extra_inputs[0:1])])
            critic_cg = ComputationGraph(self.critic.costs(**critic_kwargs))
            outputs, = VariableFilter(
                applications=[self.critic.generator.readout.all_outputs],
                roles=[OUTPUT])(critic_cg)
            # The first subtensor should be discarded, because it was outputted
            # for the padding. In addition to that Q-values from the first
            # 'critic_burnin_steps' will be ignored, see later in the code.
            outputs = outputs[1:]
        else:
            outputs = self.merge(**dict_subset(inputs, self.merge_names))
        prediction_outputs = _prediction_subtensor(outputs)

        # Compute Q adjustments
        adjustments = outputs
        prediction_adjustments = prediction_outputs
        if self.accumulate_outputs:
            prediction_adjustments = prediction_outputs.cumsum(axis=0)
            adjustments = tensor.inc_subtensor(
                adjustments[1:], prediction_adjustments[:-1][:, :, None])

        # Compute shared additive biases for all Q values
        if self.use_value_biases:
            value_biases = (self.value_summand.apply(attended)[:, :, 0] *
                            attended_mask).sum(axis=0)
        else:
            value_biases = tensor.zeros_like(adjustments[0, :, 0])
        values = adjustments + value_biases[None, :, None]
        prediction_values = prediction_adjustments + value_biases[None, :]

        rolled_prediction_mask = tensor.roll(prediction_mask, -1, axis=0)
        rolled_prediction_mask = tensor.set_subtensor(
            rolled_prediction_mask[-1], 0)

        # Compute probabilities
        logs = self.scores(use_epsilon=False, **inputs)
        probs = tensor.exp(logs)
        if not self.compute_policy:
            raise NotImplementedError("Not supported any more")
        prediction_logs = _prediction_subtensor(logs)

        # Compute value targets
        value_targets = (disconnected_grad(probs) * values).sum(axis=-1)
        value_targets = tensor.roll(value_targets, -1, axis=0)
        value_targets = (
            self.discount * value_targets * rolled_prediction_mask + rewards)
        value_targets = value_targets.astype(theano.config.floatX)

        total_costs = 0

        # Compute critic cost
        if not self.compute_targets:
            logger.debug("Using given targets")
            value_targets = tensor.matrix('value_targets')
        if self.solve_bellman == 'no':
            logger.debug("Not solving Bellman, just predicting the rewards")
            value_targets = rewards.copy(name='value_targets')
        elif self.solve_bellman == 'without_dp':
            future_rewards = rewards[::-1].cumsum(axis=0)[::-1]
            logger.debug("Solving Bellman, but without DP")
            value_targets = future_rewards
        elif self.solve_bellman is not True:
            raise ValueError()
        critic_costs_per_char = (
            (prediction_values - value_targets)**2) * prediction_mask
        critic_costs = critic_costs_per_char[self.critic_burnin_steps:].sum(
            axis=0)
        if not self.freeze_critic:
            total_costs += critic_costs

        # Compute critic Monte-Carlo cost
        critic_monte_carlo_costs = (
            (((prediction_values - future_rewards)**2) *
             prediction_mask)[self.critic_burnin_steps:].sum(axis=0))

        # Value penalty
        if self.value_penalty:
            logger.debug("Use value penalty")
            value_deviations = (values -
                                values.mean(axis=-1, keepdims=True))**2
            if not self.freeze_critic:
                total_costs += (
                    self.value_penalty *
                    (value_deviations.sum(axis=-1) *
                     prediction_mask)[self.critic_burnin_steps:].sum(axis=0))

        # Compute actor cost
        if self.critic:
            # The actor cost will be minimized, that's why values
            # must be negated.
            est_name = self.actor_grad_estimate
            if est_name == 'all_actions':
                disadvantages = disconnected_grad(
                    values.max(axis=-1)[:, :, None] - values)
                actor_costs = ((probs * disadvantages).sum(axis=-1) *
                               prediction_mask)
                actor_costs = actor_costs[self.critic_burnin_steps:]
            elif est_name.startswith('1_action'):
                # Here we do not provide a target for the first step for
                # the reason we lack an estimate of the value of the initial state.
                # This is how our critic works.
                # Hopefully the network won't unlearn
                # to produce a BOS first.
                future_reward_estimate = (future_rewards
                                          if est_name.endswith('unbiased') else
                                          prediction_values)
                weights = -disconnected_grad(future_reward_estimate[1:] +
                                             rewards[:-1] -
                                             prediction_values[:-1])
                actor_costs = ((prediction_logs[1:] * weights) *
                               prediction_mask[1:])
                actor_costs = actor_costs[self.critic_burnin_steps + 1:]
            else:
                raise ValueError
            actor_costs = actor_costs.sum(axis=0)

            actor_entropies = (probs * -logs).sum(axis=-1) * prediction_mask
            actor_entropies = actor_entropies[self.critic_burnin_steps:].sum(
                axis=0)
            critic_policy = disconnected_grad(
                self.softmax.apply(self.critic_policy_t * values,
                                   extra_ndim=1))
            critic_cross_entropies = ((critic_policy * -logs).sum(axis=-1) *
                                      prediction_mask)
            critic_cross_entropies = critic_cross_entropies[
                self.critic_burnin_steps:].sum(axis=0)
            actor_costs_with_penalties = (
                actor_costs - self.entropy_reward_coof * actor_entropies -
                self.cross_entropy_reward_coof * critic_cross_entropies)
            if not self.freeze_actor:
                total_costs += actor_costs_with_penalties
            else:
                total_costs += disconnected_grad(actor_costs_with_penalties)

        # Add auxiliary variables for intermediate steps of the computation
        application_call.add_auxiliary_variable(rewards, name='rewards')
        application_call.add_auxiliary_variable(value_biases,
                                                name='value_biases')
        application_call.add_auxiliary_variable(values.copy(), name='values')
        application_call.add_auxiliary_variable(outputs.copy(), name='outputs')
        application_call.add_auxiliary_variable(prediction_values,
                                                name='prediction_values')
        application_call.add_auxiliary_variable(prediction_outputs,
                                                name='prediction_outputs')
        application_call.add_auxiliary_variable(value_targets.copy(),
                                                name='value_targets')
        application_call.add_auxiliary_variable(probs.copy(), name='probs')
        application_call.add_auxiliary_variable(prediction_logs,
                                                name='prediction_log_probs')

        # Compute some statistics for debugging
        last_character_mask = prediction_mask - rolled_prediction_mask
        last_character_costs = (critic_costs_per_char *
                                last_character_mask).sum(axis=0)
        mean2_output = (((prediction_outputs**2) * prediction_mask).sum() /
                        prediction_mask.sum())**0.5
        max_output = abs(prediction_outputs * prediction_mask).max()
        expected_reward = (probs[0] * values[0]).sum(axis=-1)
        application_call.add_auxiliary_variable(last_character_costs,
                                                name='last_character_costs')
        application_call.add_auxiliary_variable(critic_costs.mean(),
                                                name='mean_critic_cost')
        application_call.add_auxiliary_variable(
            critic_monte_carlo_costs.mean(),
            name='mean_critic_monte_carlo_cost')
        if self.critic:
            application_call.add_auxiliary_variable(actor_costs.mean(),
                                                    name='mean_actor_cost')
            application_call.add_auxiliary_variable(actor_entropies.mean(),
                                                    name='mean_actor_entropy')
        application_call.add_auxiliary_variable(expected_reward.mean(),
                                                name='mean_expected_reward')
        application_call.add_auxiliary_variable(mean2_output,
                                                name='mean2_output')
        application_call.add_auxiliary_variable(max_output, name='max_output')

        return total_costs