def __init__(self, opt, shared=None):
        super().__init__(opt, shared)
        self.model_backwards = None
        self.max_output_length = None

        self.model_backwards = self.build_model_backwards()
        if self.model_backwards is None:
            raise AttributeError(
                'build_model() and build_criterion() need to return the model or criterion'
            )
        if self.use_cuda:
            self.model_backwards.cuda()

        sync_parameters(self.model_backwards)
        print("Total backwards parameters: {}".format(
            self._total_parameters()))
        print("Trainable backwards parameters:  {}".format(
            self._trainable_parameters()))

        if self.fp16:
            self.model_backwards = self.model_backwards.half()

        if shared is None and is_distributed():
            self.model_backwards = torch.nn.parallel.DistributedDataParallel(
                self.model_backwards,
                device_ids=[self.opt['gpu']],
                broadcast_buffers=False)
示例#2
0
    def build_regret_model(self) -> RagModel:
        """
        Build and return regret RagModel.

        Assume dictionary is the same.
        """
        model_file = self.opt['regret_model_file']
        if model_file:
            assert os.path.exists(
                model_file), 'specify correct path for --regret-model-file'
            regret_opt = Opt.load(f'{model_file}.opt')
            regret_opt['n_docs'] = self.opt[
                'n_docs']  # Urgent that this is the same
            # add keys that were not in this model when originally trained
            regret_opt.update(
                {k: v
                 for k, v in self.opt.items() if k not in regret_opt})
            retriever_shared = None
            if all([
                    regret_opt[k] == self.opt[k] for k in [
                        'rag_retriever_type',
                        'path_to_index',
                        'path_to_dpr_passages',
                    ]
            ]):
                logging.warning(
                    'Sharing retrievers between model and regret model!')
                retriever_shared = self.model.encoder.retriever.share()

            model = RagModel(regret_opt,
                             self.dict,
                             retriever_shared=retriever_shared)
            with PathManager.open(self.opt['regret_model_file'], 'rb') as f:
                states = torch.load(
                    f,
                    map_location=lambda cpu, _: cpu,
                    pickle_module=parlai.utils.pickle,
                )
            assert 'model' in states
            model.load_state_dict(states['model'])
            if self.model_parallel:
                ph = PipelineHelper()
                ph.check_compatibility(self.opt)
                self.regret_model = ph.make_parallel(self.regret_model)
            else:
                self.regret_model.cuda()
            if self.fp16:
                self.regret_model = self.regret_model.half()

            sync_parameters(self.regret_model)
            train_params = trainable_parameters(self.regret_model)
            total_params = total_parameters(self.regret_model)
            logging.info(
                f"Total regret parameters: {total_params:,d} ({train_params:,d} trainable)"
            )
        else:
            model = self.model

        return model
示例#3
0
    def __init__(self, opt: Opt, shared=None):
        init_model, is_finetune = self._get_init_model(opt, shared)
        super().__init__(opt, shared)

        self.beam_size = opt.get('beam_size', 1)
        self.beam_min_length = opt.get('beam_min_length', 1)
        self.beam_block_ngram = opt.get('beam_block_ngram', -1)
        self.beam_context_block_ngram = opt.get('beam_context_block_ngram', -1)
        self.output_token_losses = opt.get('verbose', False)
        self.compute_tokenized_bleu = opt.get('compute_tokenized_bleu', False)

        if shared:
            # set up shared properties
            states = shared.get('states', {})
        else:
            # Note: we cannot change the type of metrics ahead of time, so you
            # should correctly initialize to floats or ints here

            # this is not a shared instance of this class, so do full init
            self.criterion = self.build_criterion()
            # ensure all distributed copies will always be in sync
            self.model = self.build_model()

            if self.model is None or self.criterion is None:
                raise AttributeError(
                    'build_model() and build_criterion() need to return the model or criterion'
                )
            if self.use_cuda:
                self.model.cuda()
                self.criterion.cuda()

            sync_parameters(self.model)
            print("Total parameters: {}".format(self._total_parameters()))
            print("Trainable parameters:  {}".format(self._trainable_parameters()))

            if self.fp16:
                self.model = self.model.half()

            if init_model is not None:
                # load model parameters if available
                print('[ Loading existing model params from {} ]' ''.format(init_model))
                states = self.load(init_model)
            else:
                states = {}

        if (
            # only build an optimizer if we're training
            'train' in opt.get('datatype', '')
            # and this is the main model, or on every fork if doing hogwild
            and (shared is None or self.opt.get('numthreads', 1) > 1)
        ):
            # do this regardless of share state, but don't
            self.init_optim(
                [p for p in self.model.parameters() if p.requires_grad],
                optim_states=states.get('optimizer'),
                saved_optim_type=states.get('optimizer_type'),
            )
            self.build_lr_scheduler(states, hard_reset=is_finetune)

        if shared is None and is_distributed():
            self.model = torch.nn.parallel.DistributedDataParallel(
                self.model, device_ids=[self.opt['gpu']], broadcast_buffers=False
            )

        self.reset()