Exemplo n.º 1
0
    def _build_model(
        self,
        init_params,
    ):
        model = Seq2SeqModelHelper(init_params=init_params)
        self._build_shared(model)
        self._build_embeddings(model)

        forward_model = Seq2SeqModelHelper(init_params=init_params)
        self._build_shared(forward_model)
        self._build_embeddings(forward_model)

        if self.num_gpus == 0:
            loss_blobs = self.model_build_fun(model)
            model.AddGradientOperators(loss_blobs)
            self.norm_clipped_grad_update(
                model,
                scope='norm_clipped_grad_update'
            )
            self.forward_model_build_fun(forward_model)

        else:
            assert (self.batch_size % self.num_gpus) == 0

            data_parallel_model.Parallelize_GPU(
                forward_model,
                input_builder_fun=lambda m: None,
                forward_pass_builder_fun=self.forward_model_build_fun,
                param_update_builder_fun=None,
                devices=list(range(self.num_gpus)),
            )

            def clipped_grad_update_bound(model):
                self.norm_clipped_grad_update(
                    model,
                    scope='norm_clipped_grad_update',
                )

            data_parallel_model.Parallelize_GPU(
                model,
                input_builder_fun=lambda m: None,
                forward_pass_builder_fun=self.model_build_fun,
                param_update_builder_fun=clipped_grad_update_bound,
                devices=list(range(self.num_gpus)),
            )
        self.norm_clipped_sparse_grad_update(
            model,
            scope='norm_clipped_sparse_grad_update',
        )
        self.model = model
        self.forward_net = forward_model.net
Exemplo n.º 2
0
    def __init__(self, beam_size, model, go_token_id, eos_token_id):
        self.beam_size = beam_size
        self.model = model
        self.step_model = Seq2SeqModelHelper(
            name='step_model',
            param_model=self.model,
        )
        self.go_token_id = go_token_id
        self.eos_token_id = eos_token_id

        (
            self.timestep,
            self.scores_t_prev,
            self.tokens_t_prev,
            self.hypo_t_prev,
            self.attention_t_prev,
        ) = self.step_model.net.AddExternalInputs(
            'timestep',
            'scores_t_prev',
            'tokens_t_prev',
            'hypo_t_prev',
            'attention_t_prev',
        )
        tokens_t_prev_int32 = self.step_model.net.Cast(
            self.tokens_t_prev,
            'tokens_t_prev_int32',
            to=core.DataType.INT32,
        )
        self.tokens_t_prev_int32_flattened, _ = self.step_model.net.Reshape(
            [tokens_t_prev_int32],
            [tokens_t_prev_int32, 'input_t_int32_old_shape'],
            shape=[1, -1],
        )
Exemplo n.º 3
0
    def __init__(
        self,
        translate_params,
    ):
        self.models = translate_params['ensemble_models']
        decoding_params = translate_params['decoding_params']
        self.beam_size = decoding_params['beam_size']

        assert len(self.models) > 0
        source_vocab = self.models[0]['source_vocab']
        target_vocab = self.models[0]['target_vocab']
        for model in self.models:
            assert model['source_vocab'] == source_vocab
            assert model['target_vocab'] == target_vocab

        self.source_vocab_size = len(source_vocab)
        self.target_vocab_size = len(target_vocab)

        self.decoder_scope_names = [
            'model{}'.format(i) for i in range(len(self.models))
        ]

        self.model = Seq2SeqModelHelper(init_params=True)

        self.encoder_inputs = self.model.net.AddExternalInput('encoder_inputs')
        self.encoder_lengths = self.model.net.AddExternalInput(
            'encoder_lengths')
        self.max_output_seq_len = self.model.net.AddExternalInput(
            'max_output_seq_len')

        fake_seq_lengths = self.model.param_init_net.ConstantFill(
            [],
            'fake_seq_lengths',
            shape=[self.beam_size],
            value=100000,
            dtype=core.DataType.INT32,
        )

        beam_decoder = BeamSearchForwardOnly(
            beam_size=self.beam_size,
            model=self.model,
            go_token_id=seq2seq_util.GO_ID,
            eos_token_id=seq2seq_util.EOS_ID,
        )
        step_model = beam_decoder.get_step_model()

        state_configs = []
        output_log_probs = []
        attention_weights = []
        for model, scope_name in zip(
                self.models,
                self.decoder_scope_names,
        ):
            (
                state_configs_per_decoder,
                output_log_probs_per_decoder,
                attention_weights_per_decoder,
            ) = self._build_decoder(
                model=self.model,
                step_model=step_model,
                model_params=model['model_params'],
                scope=scope_name,
                previous_tokens=beam_decoder.get_previous_tokens(),
                timestep=beam_decoder.get_timestep(),
                fake_seq_lengths=fake_seq_lengths,
            )
            state_configs.extend(state_configs_per_decoder)
            output_log_probs.append(output_log_probs_per_decoder)
            if attention_weights_per_decoder is not None:
                attention_weights.append(attention_weights_per_decoder)

        assert len(attention_weights) > 0
        num_decoders_with_attention_blob = (
            self.model.param_init_net.ConstantFill(
                [],
                'num_decoders_with_attention_blob',
                value=1 / float(len(attention_weights)),
                shape=[1],
            ))
        # [beam_size, encoder_length, 1]
        attention_weights_average = _weighted_sum(
            model=step_model,
            values=attention_weights,
            weight=num_decoders_with_attention_blob,
            output_name='attention_weights_average',
        )

        num_decoders_blob = self.model.param_init_net.ConstantFill(
            [],
            'num_decoders_blob',
            value=1 / float(len(output_log_probs)),
            shape=[1],
        )
        # [beam_size, target_vocab_size]
        output_log_probs_average = _weighted_sum(
            model=step_model,
            values=output_log_probs,
            weight=num_decoders_blob,
            output_name='output_log_probs_average',
        )
        word_rewards = self.model.param_init_net.ConstantFill(
            [],
            'word_rewards',
            shape=[self.target_vocab_size],
            value=0,
        )
        (
            self.output_token_beam_list,
            self.output_prev_index_beam_list,
            self.output_score_beam_list,
            self.output_attention_weights_beam_list,
        ) = beam_decoder.apply(
            inputs=self.encoder_inputs,
            length=self.max_output_seq_len,
            log_probs=output_log_probs_average,
            attentions=attention_weights_average,
            state_configs=state_configs,
            data_dependencies=[],
            word_rewards=word_rewards,
        )

        workspace.RunNetOnce(self.model.param_init_net)
        workspace.FeedBlob(
            'word_rewards',
            self.build_word_rewards(
                vocab_size=self.target_vocab_size,
                word_reward=translate_params['decoding_params']['word_reward'],
                unk_reward=translate_params['decoding_params']['unk_reward'],
            ))

        workspace.CreateNet(
            self.model.net,
            input_blobs=[
                str(self.encoder_inputs),
                str(self.encoder_lengths),
                str(self.max_output_seq_len),
            ],
        )

        logger.info('Params created: ')
        for param in self.model.params:
            logger.info(param)