Пример #1
0
    def parameter_setup(self, args):
        # Set trainability of this module.
        for param in self.model.parameters():
            param.requires_grad = bool(args.transfer_paradigm == "finetune")

        self.num_layers = self.model.config.num_hidden_layers
        if args.pytorch_transformers_max_layer >= 0:
            self.max_layer = args.pytorch_transformers_max_layer
            assert self.max_layer <= self.num_layers
        else:
            self.max_layer = self.num_layers

        # Configure scalar mixing, ELMo-style.
        if self.embeddings_mode == "mix":
            if args.transfer_paradigm == "frozen":
                log.warning(
                    "NOTE: pytorch_transformers_output_mode='mix', so scalar "
                    "mixing weights will be fine-tuned even if BERT "
                    "model is frozen.")
            # TODO: if doing multiple target tasks, allow for multiple sets of
            # scalars. See the ELMo implementation here:
            # https://github.com/allenai/allennlp/blob/master/allennlp/modules/elmo.py#L115
            assert len(parse_task_list_arg(args.target_tasks)) <= 1, (
                "pytorch_transformers_output_mode='mix' only supports a single set of "
                "scalars (but if you need this feature, see the TODO in "
                "the code!)")
            # Always have one more mixing weight, for lexical layer.
            self.scalar_mix = scalar_mix.ScalarMix(self.max_layer + 1,
                                                   do_layer_norm=False)
Пример #2
0
    def __init__(self, args, cache_dir=None):
        super(BertEmbedderModule, self).__init__()

        self.model = \
            pytorch_pretrained_bert.BertModel.from_pretrained(
                args.bert_model_name,
                cache_dir=cache_dir)
        self.embeddings_mode = args.bert_embeddings_mode

        # Set trainability of this module.
        for param in self.model.parameters():
            param.requires_grad = bool(args.bert_fine_tune)

        # Configure scalar mixing, ELMo-style.
        if self.embeddings_mode == "mix":
            if not args.bert_fine_tune:
                log.warning("NOTE: bert_embeddings_mode='mix', so scalar "
                            "mixing weights will be fine-tuned even if BERT "
                            "model is frozen.")
            # TODO: if doing multiple target tasks, allow for multiple sets of
            # scalars. See the ELMo implementation here:
            # https://github.com/allenai/allennlp/blob/master/allennlp/modules/elmo.py#L115
            assert len(parse_task_list_arg(args.target_tasks)) <= 1, \
                    ("bert_embeddings_mode='mix' only supports a single set of "
                     "scalars (but if you need this feature, see the TODO in "
                     "the code!)")
            num_layers = self.model.config.num_hidden_layers
            self.scalar_mix = scalar_mix.ScalarMix(num_layers + 1,
                                                   do_layer_norm=False)
Пример #3
0
    def __init__(self, args, cache_dir=None):
        super(BertEmbedderModule, self).__init__()

        if "bert_model_file" in args:

            if "bert_classification" in args and args.bert_classification == 1:

                log.info(
                    "Loading fine-tuned BERT Classfication model from file.")
                self.model = PretrainedBertForSequenceClassification.from_pretrained(
                    args.bert_model_name, num_labels=192)
            else:

                log.info("Loading fine-tuned BERT QA model from file.")
                self.model = PretrainedBertForQuestionAnswering.from_pretrained(
                    args.bert_model_name)

            self.model.load_state_dict(torch.load(args.bert_model_file))

        else:

            log.info("Loading pretrained BERT model without fine-tuning.")

            self.model = pytorch_pretrained_bert.BertModel.from_pretrained(
                args.bert_model_name, cache_dir=cache_dir)

        self.embeddings_mode = args.bert_embeddings_mode
        self.embedding_layer = args.bert_embedding_layer

        tokenizer = \
            pytorch_pretrained_bert.BertTokenizer.from_pretrained(
                args.bert_model_name,
                cache_dir=cache_dir)
        self._sep_id = tokenizer.vocab["[SEP]"]
        self._pad_id = tokenizer.vocab["[PAD]"]

        # Set trainability of this module.
        for param in self.model.parameters():
            param.requires_grad = bool(args.transfer_paradigm == 'finetune')

        # Configure scalar mixing, ELMo-style.
        if self.embeddings_mode == "mix":
            if args.transfer_paradigm == 'frozen':
                log.warning("NOTE: bert_embeddings_mode='mix', so scalar "
                            "mixing weights will be fine-tuned even if BERT "
                            "model is frozen.")
            # TODO: if doing multiple target tasks, allow for multiple sets of
            # scalars. See the ELMo implementation here:
            # https://github.com/allenai/allennlp/blob/master/allennlp/modules/elmo.py#L115
            assert len(parse_task_list_arg(args.target_tasks)) <= 1, \
                ("bert_embeddings_mode='mix' only supports a single set of "
                 "scalars (but if you need this feature, see the TODO in "
                 "the code!)")
            num_layers = self.model.config.num_hidden_layers
            self.scalar_mix = scalar_mix.ScalarMix(num_layers + 1,
                                                   do_layer_norm=False)
Пример #4
0
    def __init__(self, cfg, vocab=40990, n_ctx=512, embeddings_mode='none'):
        super(TransformerModel, self).__init__()
        self.embeddings_mode = embeddings_mode
        self.n_embd = cfg.n_embd

        self.vocab = vocab
        self.embed = nn.Embedding(vocab, cfg.n_embd)
        self.drop = nn.Dropout(cfg.embd_pdrop)
        block = model_pytorch.Block(n_ctx, cfg, scale=True)
        self.h = nn.ModuleList([copy.deepcopy(block) for _ in range(cfg.n_layer)])

        nn.init.normal_(self.embed.weight, std=0.02)

        if self.embeddings_mode == "mix":
            self.scalar_mix = scalar_mix.ScalarMix(cfg.n_layer + 1,
                                                   do_layer_norm=False)
Пример #5
0
    def __init__(self, args, cache_dir=None):
        super(BertEmbedderModule, self).__init__()

        if args.bert_use_pretrain:
            self.model = BertModel.from_pretrained(
                args.input_module, cache_dir=cache_dir
            )
        else:
            self.config = BertConfig(args.bert_config_file)
            self.model = BertModel(self.config)
        self.embeddings_mode = args.bert_embeddings_mode

        tokenizer = BertTokenizer.from_pretrained(
            args.input_module, cache_dir=cache_dir
        )

        self._cls_id = tokenizer.vocab["[CLS]"]
        self._sep_id = tokenizer.vocab["[SEP]"]
        self._pad_id = tokenizer.vocab["[PAD]"]

        # Set trainability of this module.
        for param in self.model.parameters():
            param.requires_grad = bool(args.transfer_paradigm == "finetune")

        # Configure scalar mixing, ELMo-style.
        if self.embeddings_mode == "mix":
            if args.transfer_paradigm == "frozen":
                log.warning(
                    "NOTE: bert_embeddings_mode='mix', so scalar "
                    "mixing weights will be fine-tuned even if BERT "
                    "model is frozen."
                )
            # TODO: if doing multiple target tasks, allow for multiple sets of
            # scalars. See the ELMo implementation here:
            # https://github.com/allenai/allennlp/blob/master/allennlp/modules/elmo.py#L115
            assert len(parse_task_list_arg(args.target_tasks)) <= 1, (
                "bert_embeddings_mode='mix' only supports a single set of "
                "scalars (but if you need this feature, see the TODO in "
                "the code!)"
            )
            num_layers = self.model.config.num_hidden_layers
            self.scalar_mix = scalar_mix.ScalarMix(num_layers + 1, do_layer_norm=False)
Пример #6
0
    def parameter_setup(self, args):
        # Set trainability of this module.
        for param in self.model.parameters():
            param.requires_grad = bool(args.transfer_paradigm == "finetune")

        self.num_layers = FLAGS.nb_encoder_layers
        if args.transformers_max_layer >= 0:
            self.max_layer = args.transformers_max_layer
            assert self.max_layer <= self.num_layers
        else:
            self.max_layer = self.num_layers

        if args.transfer_paradigm == "frozen":
            if isinstance(self, (OpenAIGPTEmbedderModule, GPT2EmbedderModule,
                                 TransfoXLEmbedderModule)):
                log.warning(
                    "NOTE: OpenAI GPT, GPT-2 and Transformer-XL add new tokens for classification"
                    "tasks, under 'frozen' transfer_paradigm, their embeddings will not be trained"
                )

        #
        # ure scalar mixing, ELMo-style.
        if self.output_mode == "mix":
            if args.transfer_paradigm == "frozen":
                log.warning("NOTE: transformers_output_mode='mix', so scalar "
                            "mixing weights will be fine-tuned even if BERT "
                            "model is frozen.")
            # TODO: if doing multiple target tasks, allow for multiple sets of
            # scalars. See the ELMo implementation here:
            # https://github.com/allenai/allennlp/blob/master/allennlp/modules/elmo.py#L115
            assert len(parse_task_list_arg(args.target_tasks)) <= 1, (
                "transformers_output_mode='mix' only supports a single set of "
                "scalars (but if you need this feature, see the TODO in "
                "the code!)")
            # Always have one more mixing weight, for lexical layer.
            self.scalar_mix = scalar_mix.ScalarMix(self.max_layer + 1,
                                                   do_layer_norm=False)