Exemplo n.º 1
0
    def _add_local_fusion(self, subnet: ReturnnNetwork, am_output_prob):
        prefix_name = self.local_fusion_opts.get('prefix', 'local_fusion')
        with_label_smoothing = self.local_fusion_opts.get(
            'with_label_smoothing', False)

        if self.local_fusion_opts['lm_type'] == 'n_gram':
            lm_output_prob = subnet.add_kenlm_layer(
                '{}_lm_output_prob'.format(prefix_name),
                **self.local_fusion_opts['kenlm_opts'])
        else:
            lm_subnet = self.local_fusion_opts['lm_subnet']
            lm_model = self.local_fusion_opts['lm_model']
            vocab_size = self.local_fusion_opts['vocab_size']

            # make sure all layers in LM subnet are not trainable
            def make_non_trainable(d):
                for v in d.values():  # layers
                    assert isinstance(v, dict)
                    v.update({'trainable': False})

            # Add LM subnetwork.
            lm_subnet_copy = copy.deepcopy(lm_subnet)
            make_non_trainable(lm_subnet_copy)
            lm_subnet_name = '{}_lm_output'.format(prefix_name)
            subnet.add_subnetwork(lm_subnet_name, ['prev:output'],
                                  subnetwork_net=lm_subnet_copy,
                                  load_on_init=lm_model,
                                  trainable=False,
                                  n_out=vocab_size)
            lm_output_prob = subnet.add_activation_layer(
                '{}_lm_output_prob'.format(prefix_name),
                lm_subnet_name,
                activation='softmax',
                target=self.target)  # not in log-space

        # define new loss criteria
        eval_str = "self.network.get_config().typed_value('fusion_eval0_norm')(safe_log(source(0)), safe_log(source(1)))"
        if self.local_fusion_opts['lm_type'] == 'n_gram':
            eval_str = "self.network.get_config().typed_value('fusion_eval0_norm')(safe_log(source(0)), source(1))"
        combo_output_log_prob = subnet.add_eval_layer(
            'combo_output_log_prob', [am_output_prob, lm_output_prob],
            eval=eval_str)

        # local fusion criteria. Eq. (8) in the paper
        if with_label_smoothing:
            subnet.add_eval_layer(
                'combo_output_prob',
                combo_output_log_prob,
                eval="tf.exp(source(0))",
                target=self.target,
                loss='ce',
                loss_opts={'label_smoothing': self.label_smoothing})
        else:
            subnet.add_eval_layer('combo_output_prob',
                                  combo_output_log_prob,
                                  eval="tf.exp(source(0))",
                                  target=self.target,
                                  loss='ce')

        subnet.add_choice_layer('output',
                                combo_output_log_prob,
                                target=self.target,
                                beam_size=self.beam_size,
                                initial_output=0,
                                input_type='log_prob')
Exemplo n.º 2
0
    def _add_external_LM(self,
                         subnet_unit: ReturnnNetwork,
                         am_output_prob,
                         prior_output_prob=None):
        ext_lm_scale = self.ext_lm_opts[
            'lm_scale'] if not self.trained_scales else 'lm_scale'

        is_recurrent = self.ext_lm_opts.get('is_recurrent', False)

        log_lm_prob = False  # if lm_prob is already in log-space or not

        if 'gram_lm' in self.ext_lm_opts['name']:
            log_lm_prob = True  # already in log-space
            lm_output_prob = subnet_unit.add_kenlm_layer(
                'lm_output_prob', **self.ext_lm_opts['kenlm_opts'])
        elif is_recurrent:
            ext_lm_subnet = self.ext_lm_opts['lm_subnet']
            assert isinstance(ext_lm_subnet, dict)

            lm_output_prob = self.ext_lm_opts['lm_output_prob_name']
            ext_lm_subnet[lm_output_prob]['target'] = self.target
            ext_lm_subnet[lm_output_prob][
                'loss'] = None  # TODO: is this needed?
            subnet_unit.update(ext_lm_subnet)  # just append
        else:
            ext_lm_subnet = self.ext_lm_opts['lm_subnet']
            assert isinstance(ext_lm_subnet, dict)

            ext_lm_model = self.ext_lm_opts['lm_model']
            subnet_unit.add_subnetwork('lm_output',
                                       'prev:output',
                                       subnetwork_net=ext_lm_subnet,
                                       load_on_init=ext_lm_model)
            lm_output_prob = subnet_unit.add_activation_layer(
                'lm_output_prob',
                'lm_output',
                activation='softmax',
                target=self.target)

        fusion_str = 'safe_log(source(0)) + {} * '.format(ext_lm_scale)
        if log_lm_prob:
            fusion_str += 'source(1)'
        else:
            fusion_str += 'safe_log(source(1))'

        fusion_source = [am_output_prob, lm_output_prob]
        if prior_output_prob:
            fusion_source += [prior_output_prob]
            prior_scale = self.prior_lm_opts[
                'scale'] if not self.trained_scales else 'prior_scale'
            fusion_str += ' - {} * safe_log(source(2))'.format(prior_scale)

        if self.coverage_term_scale:
            fusion_str += ' + {} * source({})'.format(self.coverage_term_scale,
                                                      len(fusion_source))
            fusion_source += ['accum_coverage']

        if self.trained_scales:
            fusion_str = 'source(0) * safe_log(source(1)) + source(2) * safe_log(source(3))'
            fusion_source = [
                'am_scale', am_output_prob, 'lm_scale', lm_output_prob
            ]
            if prior_output_prob:
                fusion_str += ' - source(4) * safe_log(source(5))'
                fusion_source += ['prior_scale', prior_output_prob]

        subnet_unit.add_eval_layer('combo_output_prob',
                                   source=fusion_source,
                                   eval=fusion_str)
        subnet_unit.add_choice_layer('output',
                                     'combo_output_prob',
                                     target=self.target,
                                     beam_size=self.beam_size,
                                     initial_output=0,
                                     input_type='log_prob')