def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, "
                + "or config_filename should be passed into the "
                + "ALBERT constructor."
            )

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = AlbertConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = AlbertModel(config)
        elif pretrained_model_name is not None:
            model = AlbertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = AlbertConfig.from_json_file(config_filename)
            model = AlbertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" + " be passed into the ALBERT constructor"
            )

        model.to(self._device)

        self.add_module("albert", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
Пример #2
0
def get_model(args):
    if args.model_size == 'debug':
        num_hidden_layers = 1
        embedding_size = 8
        hidden_size = 16
        num_hidden_groups = 1
        intermediate_size = 32
        num_attention_heads = 2
        args.gen_ratio = 2

    elif args.model_size == 'small':
        num_hidden_layers = 12
        embedding_size = 128
        hidden_size = 256
        num_hidden_groups = 1
        intermediate_size = 1024
        num_attention_heads = 4
    elif args.model_size == 'base':
        num_hidden_layers = 12
        embedding_size = 128
        hidden_size = 768
        num_hidden_groups = 1
        intermediate_size = 3072
        num_attention_heads = 12

    else:
        raise Exception('Which model? small, base, large')

    generator_config = AlbertConfig(
        max_position_embeddings=args.seq_length,
        vocab_size=args.vocab_size,
        num_hidden_layers=num_hidden_layers,
        embedding_size=embedding_size,
        num_hidden_groups=num_hidden_groups,
        hidden_size=hidden_size // args.gen_ratio,
        intermediate_size=intermediate_size // args.gen_ratio,
        num_attention_heads=num_attention_heads // args.gen_ratio,
    )

    discriminator_config = AlbertConfig(
        max_position_embeddings=args.seq_length,
        vocab_size=args.vocab_size,
        num_hidden_layers=num_hidden_layers,
        embedding_size=embedding_size,
        num_hidden_groups=num_hidden_groups,
        hidden_size=hidden_size,
        intermediate_size=intermediate_size,
        num_attention_heads=num_attention_heads,
    )

    model = Electra(args,
                    gen_config=generator_config,
                    dis_config=discriminator_config)
    return model
def main(args):
    with open(args.config) as fp:
        data = json.loads(fp.read())
    config = AlbertConfig(**data)
    model = AlbertForMaskedLM(config)
    model: AlbertForMaskedLM = load_tf_weights_in_albert(model, config, args.checkpoint)
    model.save_pretrained(args.output)
Пример #4
0
    def __init__(self, my_config, args):
        super(NqModel, self).__init__()
        #albert_base_configuration = AlbertConfig(vocab_size=30000,hidden_size=768,num_attention_heads=12,intermediate_size=3072,
        #                                        attention_probs_dropout_prob=0)
        self.my_mask = None
        self.args = args
        #mfeb/albert-xxlarge-v2-squad2
        self.bert_config = AlbertConfig.from_pretrained("albert-xxlarge-v2")
        # self.bert_config.gradient_checkpointing = True
        # self.bert_config.Extgradient_checkpointing = True
        self.bert = AlbertModel.from_pretrained("albert-xxlarge-v2",
                                                config=self.bert_config)
        #        self.bert = AlbertModel.from_pretrained("albert-base-v2")
        my_config.hidden_size = self.bert.config.hidden_size

        self.right = 0
        self.all = 0
        #self.bert =  AlbertModel(albert_base_configuration)

        #self.bert2 = BertModel(bert_config)

        #self.bert = BertModel(BertConfig())

        #self.bert =  RobertaModel(RobertaConfig(max_position_embeddings=514,vocab_size=50265))

        #print(my_config,bert_config)
        #        self.tok_dense = nn.Linear(my_config.hidden_size, my_config.hidden_size)
        self.tok_dense = nn.Linear(my_config.hidden_size * 2,
                                   my_config.hidden_size * 2)

        #        self.tok_dense2 = nn.Linear(my_config.hidden_size, my_config.hidden_size)
        #        self.para_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)
        #        self.doc_dense = nn.Linear(self.config.hidden_size, self.config.hidden_size)

        self.dropout = nn.Dropout(my_config.hidden_dropout_prob)

        self.tok_outputs = nn.Linear(my_config.hidden_size * 2,
                                     1)  # tune to avoid fell into bad places

        #        self.tok_outputs2 = nn.Linear(my_config.hidden_size, 1)
        #        config.max_token_len, config.max_token_relative
        #        self.para_outputs = nn.Linear(self.config.hidden_size, 1)
        #        self.answer_type_outputs = nn.Linear(self.config.hidden_size, 2)

        #        self.tok_to_label = nn.Linear(my_config.max_token_len,2)
        #        self.par_to_label = nn.Linear(my_config.max_paragraph_len,2)

        #self.encoder = Encoder(my_config)
        self.encoder = Encoder(my_config)
        #        self.encoder2 = Encoder(my_config)

        self.my_config = my_config

        self.model_choice = None
        self.ground_answer = None

        self.ACC = 0
        self.ALL = 0

        self.ErrId = []
 def __init__(self, albert_name="ALBERT-base", device="cuda"):
     super().__init__()
     if albert_name == "ALBERT-base":
         albert_configuration = AlbertConfig(hidden_size=768,
                                             num_attention_heads=12,
                                             intermediate_size=3072)
     elif albert_name == "ALBERT-xxlarge":
         albert_configuration = AlbertConfig()
     else:
         raise
     self.device = device
     self.tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
     self.model = AlbertModel.from_pretrained('albert-base-v2').to(
         self.device)
     self.linear = nn.Linear(model.config.embedding_size, 2).to(self.device)
     self.dropout = nn.Dropout(0.1).to(self.device)
Пример #6
0
    def __init__(self, path: str ,device: str = 'cpu'):
        """ Init the NER Albert """
        if not os.path.exists(path):
            raise NotADirectoryError(
                f"{os.path.abspath(path)} must be a directory containing the model files: config, tokenizer, weights.")

        files = os.listdir(path)
        if CONFIG_JSON_FILE not in files:
            raise FileNotFoundError(f"{CONFIG_JSON_FILE} must be in {path}.")
        if WEIGHTS_FILE not in files:
            raise FileNotFoundError(f"{WEIGHTS_FILE} must be in {path}.")

        with open(os.path.join(path, CONFIG_JSON_FILE), "r") as f:
            config = json.load(f)
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        weights = torch.load(os.path.join(path, WEIGHTS_FILE),
                                  map_location=lambda storage, loc: storage)
        # Load pretrained model/tokenizer
        config = AlbertConfig.from_dict(config)
        self.model = AlbertForTokenClassification(config)
        self.model.load_state_dict(weights)
        self.model = self.model.eval()
        self.args = albert_args_ner
        if device == "cuda":
            logger.debug("Setting model with CUDA")
            self.args['device'] = 'cuda'
            self.model.to('cuda')
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)

        input_mask = None
        if self.use_input_mask:
            input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = AlbertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
            return_dict=True,
        )

        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
Пример #8
0
def get_albert_for_comparison():
    model_name = 'albert-base-v2'
    config = AlbertConfig.from_pretrained(model_name)
    config.output_hidden_states = False

    input_ids = tf.keras.Input(shape=(128, ), name='input_ids', dtype=tf.int32)
    attention_mask = tf.keras.Input(shape=(128, ),
                                    name='attention_mask',
                                    dtype=tf.int32)

    transformer_model = TFAlbertModel.from_pretrained(model_name,
                                                      config=config)
    embedding_layer = transformer_model([input_ids, attention_mask])[0]

    X = tf.keras.layers.Dense(
        config.hidden_size,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=config.initializer_range),
        activation="relu",
        name="pre_classifier",
    )(embedding_layer[:, 0])
    X = tf.keras.layers.Dropout(config.classifier_dropout_prob)(X)
    output_ = tf.keras.layers.Dense(
        1,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=config.initializer_range),
        name="classifier")(X)

    return tf.keras.Model([input_ids, attention_mask], output_)
def main():

    bert_base_config = BertConfig.from_pretrained('bert-base-uncased', num_labels=2)
    bert_base_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=bert_base_config)
    count = 0
    for name, param in bert_base_model.named_parameters():
        if param.requires_grad:
            size = 1
            for s in param.data.size():
                size = s * size
            count += size
    print('The total number of parameters in bert_base_uncased: ', count)

    roberta_config = RobertaConfig.from_pretrained('roberta-base', num_labels=2)
    roberta_model = RobertaForSequenceClassification.from_pretrained('roberta-base',config=roberta_config)
    count = 0
    for name, param in roberta_model.named_parameters():
        if param.requires_grad:
            size = 1
            for s in param.data.size():
                size = s * size
            count += size
    print('The total number of parameters in roberta: ', count)

    albert_config = AlbertConfig.from_pretrained('albert-base-v2', num_labels=2)
    albert_model = AlbertForSequenceClassification.from_pretrained('albert-base-v2', config=albert_config)
    count = 0
    for name, param in albert_model.named_parameters():
        if param.requires_grad:
            size = 1
            for s in param.data.size():
                size = s * size
            count += size
    print('The total number of parameters in albert: ', count)
Пример #10
0
    def load_model(self):
        parser = argparse.ArgumentParser()
        args = parser.parse_args()
        args.output_encoded_layers = True
        args.output_attention_layers = True
        args.output_att_score = True
        args.output_att_sum = True
        self.args = args
        # 解析配置文件, 教师模型和student模型的vocab是不变的
        self.vocab_file = "albert_model/vocab.txt"
        # 这里是使用的teacher的config和微调后的teacher模型, 也可以换成student的config和蒸馏后的student模型
        # student config:  config/chinese_bert_config_L4t.json
        # distil student model:  distil_model/gs8316.pkl
        self.bert_config_file_S = "albert_model/config.json"
        self.tuned_checkpoint_S = "trained_teacher_model/test_components.pkl"
        self.max_seq_length = 70
        # 预测的batch_size大小
        self.predict_batch_size = 64
        # 加载student的配置文件, 校验最大序列长度小于我们的配置中的序列长度
        bert_config_S = AlbertConfig.from_json_file(self.bert_config_file_S)
        bert_config_S.num_labels = self.num_labels

        # 加载tokenizer
        tokenizer = BertTokenizer(vocab_file=self.vocab_file)

        # 加载模型
        model_S = AlbertSPC(bert_config_S)
        state_dict_S = torch.load(self.tuned_checkpoint_S, map_location=self.device)
        model_S.load_state_dict(state_dict_S)
        if self.verbose:
            print("模型已加载")

        return tokenizer, model_S
Пример #11
0
    def __init__(self, args, token_vocab_size, output_dim=100):
        super(LMCDecoderBERT, self).__init__()
        self.pool_layers = args.pool_bert

        if args.debug_model:
            bert_dim = 100
            num_hidden_layers = 1
            embedding_size = 100
            intermediate_size = 100
            output_dim = 100
        else:
            bert_dim = 256
            num_hidden_layers = 2
            embedding_size = 128
            intermediate_size = 256
        num_attention_heads = max(1, bert_dim // 64)
        print('Using {} attention heads in decoder'.format(num_attention_heads))

        config = AlbertConfig(
            vocab_size=token_vocab_size,
            embedding_size=embedding_size,
            hidden_size=bert_dim,
            num_hidden_layers=num_hidden_layers,
            intermediate_size=intermediate_size,  # 3072 is default
            num_attention_heads=num_attention_heads,
            output_hidden_states=self.pool_layers
        )

        self.bert = AlbertModel(config)

        self.u = nn.Linear(bert_dim, output_dim, bias=True)
        self.v = nn.Linear(bert_dim, 1, bias=True)
        self.att_linear = nn.Linear(bert_dim, 1, bias=True)
        self.dropout = nn.Dropout(0.2)
def prediction():
    # data = input('请输入测试数据:')
    data = "Don't give me your attitude!"
    print(data)
    tokenized_data = tokenizer.tokenize(data)
    tokenized_data.insert(0, "[CLS]")
    tokenized_data.append("[SEP]")
    data_indexed = tokenizer.convert_tokens_to_ids(tokenized_data)
    data = torch.from_numpy(np.array(data_indexed)).to(device)
    data = data.unsqueeze(0)  # [1, seq_length]

    config = AlbertConfig(hidden_size=768)
    model = ALBertClassifyModel(config, num_class=2, fc_dropout=DROPOUT)
    model.load_state_dict(torch.load(SAVE_MODEL_PATH))
    model.to(device)
    model.eval()

    softmax = nn.Softmax(dim=1)

    with torch.no_grad():

        predict = model(data)
        predict_softmax = softmax(predict)
        print(predict_softmax)
        predict = torch.argmax(predict_softmax, dim=1)
        print(predict)
Пример #13
0
    def from_hocon(cls: Type[QueryCodeSiamese],
                   config: ConfigTree) -> QueryCodeSiamese:
        """Load Query1Code1_CodeSearchModel from a config tree"""

        if "training.model.encoder.type" in config:
            if config["training.model.encoder.type"] == "albert":
                logger.info("Creating QueryCodeSiamese with Albert encoder")
                albert_config = AlbertConfig(
                    **config["training.model.encoder"])
                encoder = PreTrainedModelRecordable(AlbertModel(albert_config))
            elif config["training.model.encoder.type"] == "bert":
                logger.info("Creating QueryCodeSiamese with Bert encoder")
                bert_config = BertConfig(**config["training.model.encoder"])
                encoder = PreTrainedModelRecordable(BertModel(bert_config))
        else:
            # default is BERT now
            logger.info("Creating QueryCodeSiamese with Bert encoder")
            bert_config = BertConfig(**config["training.model.encoder"])
            encoder = PreTrainedModelRecordable(BertModel(bert_config))

        model = QueryCodeSiamese(
            encoder=encoder,
            pooler=MeanWeightedPooler(
                input_size=config["training.model.encoder.hidden_size"]))

        return model
Пример #14
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        attention_mask = None
        if self.use_attention_mask:
            attention_mask = random_attention_mask(
                [self.batch_size, self.seq_length])

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                        self.type_vocab_size)

        config = AlbertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            is_decoder=False,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, token_type_ids, attention_mask
Пример #15
0
    def load_macbert_model(self):
        parser = argparse.ArgumentParser()
        args = parser.parse_args()
        args.output_encoded_layers = True
        args.output_attention_layers = True
        args.output_att_score = True
        args.output_att_sum = True
        self.args = args
        # 解析配置文件, 教师模型和student模型的vocab是不变的
        self.vocab_file = "mac_bert_model/vocab.txt"
        # 这里是使用的teacher的config和微调后的teacher模型, 也可以换成student的config和蒸馏后的student模型
        # student config:  config/chinese_bert_config_L4t.json
        # distil student model:  distil_model/gs8316.pkl
        self.bert_config_file_S = "mac_bert_model/config.json"
        self.tuned_checkpoint_S = "trained_teacher_model/macbert_teacher_max75len_5000.pkl"
        # 加载student的配置文件, 校验最大序列长度小于我们的配置中的序列长度
        bert_config_S = AlbertConfig.from_json_file(self.bert_config_file_S)

        # 加载tokenizer
        tokenizer = BertTokenizer(vocab_file=self.vocab_file)

        # 加载模型
        model_S = AlbertSPC(bert_config_S)
        state_dict_S = torch.load(self.tuned_checkpoint_S,
                                  map_location=self.device)
        model_S.load_state_dict(state_dict_S)
        if self.verbose:
            print("模型已加载")
        self.predict_tokenizer = tokenizer
        self.predict_model = model_S
        logger.info(f"macbert预测模型加载完成")
Пример #16
0
    def __init__(self, config):
        super(AlBert, self).__init__()
        model_config = AlbertConfig.from_pretrained(
            config.config_file,
            num_labels=config.num_labels,
            finetuning_task=config.task,
        )
        self.albert = AlbertModel.from_pretrained(
            config.model_name_or_path,
            config=model_config,
        )
        if config.requires_grad:
            for param in self.albert.parameters():
                param.requires_grad = True
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        #add the weighted layer
        self.hidden_weight = config.weighted_layer_tag         #must modify the config.json
        self.pooling_tag = config.pooling_tag

        if self.hidden_weight:
            self.weight_layer = config.weighted_layer_num
            #self.weight = torch.zeros(self.weight_layer).to(config.device)
            self.weight = torch.nn.Parameter(torch.FloatTensor(self.weight_layer), requires_grad=True)
            self.softmax = nn.Softmax()
            self.pooler = nn.Sequential(nn.Linear(768, 768), nn.Tanh())

        elif self.pooling_tag:
            self.maxPooling = nn.MaxPool1d(64)
            self.avgPooling = nn.AvgPool1d(64)
            self.pooler = nn.Sequential(nn.Linear(768*3, 768), nn.Tanh())
Пример #17
0
    def __init__(self, model_name, model_type):
        """
        Hyper-parameters found with validation set:
        xlnet-large-casd : epoch = 4,  learning_rate = 1E-5, batch_size = 16, epsilon = 1e-6
        bert-large-uncased : epoch = 4,  learning_rate = 3E-5, batch_size = 16, epsilon = 1e-8
        ALBERT xxlarge-v2 large : epoch = 3,  learning_rate = 5E-5, batch_size = 8, epsilon = 1e-6 to be improved...
        """
        self.model_name = model_name
        self.model_type = model_type

        # Cf transformers library, batch of 16 or 32 is advised for training. For memory issues, we will take 16. Gradient accumulation step has not lead
        # to great improvment and therefore won't be used here.
        if model_type == 'albert':
            self.batch_size = 8
        else:
            self.batch_size = 16

        available_model_name = ["xlnet-large-cased", "bert-large-uncased", "albert-xlarge-v2"]
        available_model_type = ["bert", "xlnet", "albert"]

        if self.model_name not in available_model_name:
            raise Exception("Error : model_name should be in", available_model_name)
        if self.model_type not in available_model_type:
            raise Exception("Error : model_name should be in", available_model_type)

        # Load BertForSequenceClassification, the pretrained BERT model with a single linear regression layer on top of the pooled output
        # Load our fined tune model: ex: BertForSequenceClassification.from_pretrained('./my_saved_model_directory/')
        if self.model_type == 'bert':
            self.config = BertConfig.from_pretrained(self.model_name, num_labels=1)  # num_labels=1 for regression task
            self.model = BertForSequenceClassification.from_pretrained(self.model_name, config=self.config)
        elif self.model_type == 'xlnet':
            self.config = XLNetConfig.from_pretrained(self.model_name, num_labels=1)
            self.model = XLNetForSequenceClassification.from_pretrained(self.model_name, config=self.config)
        elif self.model_type == 'albert':
            self.config = AlbertConfig.from_pretrained(self.model_name, num_labels=1)
            self.model = AlbertForSequenceClassification.from_pretrained(self.model_name, config=self.config)
        self.model.cuda()

        if self.model_name == 'xlnet-large-cased':
            self.epochs = 4
            self.lr = 1e-5
            self.eps = 1e-6

        elif self.model_name == 'bert-large-uncased':
            self.epochs = 4
            self.lr = 3e-5
            self.eps = 1e-8

        elif self.model_name == 'albert-xxlarge-v2':
            self.epochs = 3
            self.lr = 5e-5
            self.eps = 1e-6

        self.max_grad_norm = 1.0  # Gradient threshold, gradients norms that exceed this threshold are scaled down to match the norm.

        self.optimizer = AdamW(self.model.parameters(), lr=self.lr, eps=self.eps)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.n_gpu = torch.cuda.device_count()
        torch.cuda.get_device_name(0)
Пример #18
0
 def load_model(self, model_path: str, do_lower_case=True):
     config = AlbertConfig.from_pretrained(model_path + "/config.json")
     tokenizer = AlbertTokenizer.from_pretrained(model_path)
     #tokenizer = AlbertTokenizer.from_pretrained('albert-large-v2', do_lower_case=do_lower_case)
     model = AlbertForQuestionAnswering.from_pretrained(model_path,
                                                        from_tf=False,
                                                        config=config)
     return model, tokenizer
Пример #19
0
def load_model_and_tokenizer(manifest):
    zipped_model_path = download_zipped_model(manifest, assert_hash=True)
    unzipped_model_dir = get_unzipped_dir_path(zipped_model_path)
    config = AlbertConfig.from_pretrained(unzipped_model_dir)
    model = TFAlbertForSequenceClassification.from_pretrained(
        unzipped_model_dir, config=config)
    tokenizer = AlbertTokenizer.from_pretrained(unzipped_model_dir)
    return model, tokenizer
Пример #20
0
 def __init__(self, transformer_model, is_train):
     super(LMNER, self).__init__()
     config = AlbertConfig.from_pretrained(transformer_model)
     self.transformer_model = AlbertForMaskedLM.from_pretrained(
         transformer_model, config=config)
     # 是否对bert进行训练
     for name, param in self.transformer_model.named_parameters():
         param.requires_grad = is_train
Пример #21
0
def launch(training_flag, test_flag):
    tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
    if training_flag:
        model = AlbertForTokenClassification.from_pretrained(
            'albert-base-v2', num_labels=len(tags_vals))
        ## ---------12 . Optimizer -> weight regularization is  a solution to reduce the overfitting of a deep learning
        """ 
        Last keras optimization 2020 (rates from 0.01 seem to be best hyperparamater )for weight regularization for weights layers
            from keras.layers import LSTM
            from keras.regularizers import l2
        model.add(LSTM(32, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01))) 
        Note :  BERT not include beta an gamma parametres for optimization
        """
        FULL_FINETUNING = True
        if FULL_FINETUNING:
            param_optimizer = list(model.named_parameters())
            no_decay = ['bias', 'gamma', 'beta']
            optimizer_grouped_parameters = [{
                'params': [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay_rate':
                0.01
            }, {
                'params': [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay_rate':
                0.0
            }]
        else:
            param_optimizer = list(model.classifier.named_parameters())
            optimizer_grouped_parameters = [{
                "params": [p for n, p in param_optimizer]
            }]
        optimizer = Adam(optimizer_grouped_parameters, lr=args.lr)
        launch_training(training_path=args.training_data,
                        training_epochs=args.epochs,
                        valid_path=args.validate_data,
                        training_batch_size=1,
                        model=model,
                        model_path=model_path,
                        tokenizer=tokenizer,
                        optimizer=optimizer)
    if test_flag:
        if args.save:
            model_path = args.save + 'pytorch_model.bin'
            config = AlbertConfig.from_json_file(args.save + '/config.json')
            model = AlbertForTokenClassification.from_pretrained(args.save,
                                                                 config=config)
        else:
            model = AlbertForTokenClassification.from_pretrained(
                'albert-base-v2', num_labels=len(tags_vals))
        launch_test_directory(test_path=test_flag,
                              model=model,
                              tokenizer=tokenizer)
Пример #22
0
 def __init__(self, bert_model='bert-base-cased'):
     super(BERTRepresenter, self).__init__()
     if 'albert' in bert_model.lower():
         config = AlbertConfig()
         self.bert = AlbertModel(config).from_pretrained(bert_model)
     else:
         config = BertConfig()
         # config = BertConfig(vocab_size=24000, hidden_size=264)
         self.bert = BertModel(config).from_pretrained(bert_model)
Пример #23
0
    def __init__(self, config):
        super(Model, self).__init__()
        self.config = AlbertConfig.from_pretrained(config.albert_config_path)
        self.albert = AlbertModel.from_pretrained(config.albert_model_path,
                                                  config=self.config)
        for param in self.albert.parameters():
            param.requires_grad = True

        self.fc = nn.Linear(config.hidden_size, config.num_classes)
Пример #24
0
def load_pretrained_encoder(mpath,
                            config="albert_config.json",
                            model="albert_model.bin"):

    b_config = BC.from_pretrained(opt.join(mpath, config))
    encoder = AlbertModel.from_pretrained(opt.join(mpath, model),
                                          config=b_config)

    return encoder
Пример #25
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, albert_config_file,
                                     pytorch_dump_path):
    # Initialise PyTorch model
    config = AlbertConfig.from_json_file(albert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = AlbertForMaskedLM(config)
    load_tf_weights_in_albert(model, config, tf_checkpoint_path)
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Пример #26
0
    def __init__(self, args, train_dataloader, test_dataloader=None):
        self.args = args

        cuda_condition = torch.cuda.is_available() and args.with_cuda

        self.device = torch.device("cuda" if cuda_condition else "cpu")
        print('Current cuda device ', torch.cuda.current_device())  # check

        if args.weight_load:
            config = AutoConfig.from_pretrained(args.pre_trained_model_path)
            model_state_dict = torch.load(
                os.path.join(args.pre_trained_model_path, 'pytorch_model.bin'))
            self.model = CXRBERT.from_pretrained(args.pre_trained_model_path,
                                                 state_dict=model_state_dict,
                                                 config=config,
                                                 args=args).to(self.device)
            print('training restart with mid epoch')
            print(config)
        else:
            if args.bert_model == "albert-base-v2":
                config = AlbertConfig.from_pretrained(args.bert_model)
            elif args.bert_model == "emilyalsentzer/Bio_ClinicalBERT":
                config = AutoConfig.from_pretrained(args.bert_model)
            elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12":
                config = AutoConfig.from_pretrained(args.bert_model)
            elif args.bert_model == "bert-small-scratch":
                config = BertConfig.from_pretrained(
                    "google/bert_uncased_L-4_H-512_A-8")
            elif args.bert_model == "bert-base-scratch":
                config = BertConfig.from_pretrained("bert-base-uncased")
            else:
                config = BertConfig.from_pretrained(
                    args.bert_model)  # bert-base, small, tiny

            self.model = CXRBERT(config, args).to(self.device)

        wandb.watch(self.model)

        if args.with_cuda and torch.cuda.device_count() > 1:
            print("Using %d GPUS for BERT" % torch.cuda.device_count())
            self.model = nn.DataParallel(self.model,
                                         device_ids=args.cuda_devices)

        self.train_data = train_dataloader
        self.test_data = test_dataloader

        self.optimizer = AdamW(self.model.parameters(), lr=args.lr)

        self.mlm_criterion = nn.CrossEntropyLoss(ignore_index=-100)
        self.itm_criterion = nn.CrossEntropyLoss()

        self.log_freq = args.log_freq
        self.step_cnt = 0

        print("Total Parameters:",
              sum([p.nelement() for p in self.model.parameters()]))
Пример #27
0
def load_pretrained(mpath,
                    config="albert_config.json",
                    model="albert_model.bin"):

    b_config = BC.from_pretrained(opt.join(mpath, config))
    encoder = AlbertModel.from_pretrained(opt.join(mpath, model),
                                          config=b_config)
    tokenizer = BertTokenizer.from_pretrained(mpath)

    return encoder, tokenizer
Пример #28
0
def load_albert(path):
    """
    加载模型
    """
    vocab_file = os.path.join(path, 'vocab.txt')
    tokenizer = BertTokenizer.from_pretrained(vocab_file)
    # print(tokenizer)
    config = AlbertConfig.from_pretrained(path)
    model = AlbertModel.from_pretrained(path, config=config)
    return model, tokenizer
Пример #29
0
    def load_train_model(self):
        """
        初始化训练的模型
        :return:
        """
        parser = argparse.ArgumentParser()
        args = parser.parse_args()
        args.output_encoded_layers = True
        args.output_attention_layers = True
        args.output_att_score = True
        args.output_att_sum = True
        self.learning_rate = 2e-05
        #学习率 warmup的比例
        self.warmup_proportion = 0.1
        self.num_train_epochs = 1
        #使用的学习率scheduler
        self.schedule = 'slanted_triangular'
        self.s_opt1 = 30.0
        self.s_opt2 = 0.0
        self.s_opt3 = 1.0
        self.weight_decay_rate = 0.01
        #训练多少epcoh保存一次模型
        self.ckpt_frequency = 1
        #模型和日志保存的位置
        self.output_dir = "output_root_dir/train_api"
        #梯度累积步数
        self.gradient_accumulation_steps = 1
        self.args = args
        # 解析配置文件, 教师模型和student模型的vocab是不变的
        self.vocab_file = "albert_model/vocab.txt"
        self.bert_config_file_S = "albert_model/config.json"
        self.tuned_checkpoint_S = "albert_model/pytorch_model.bin"
        # 加载student的配置文件, 校验最大序列长度小于我们的配置中的序列长度
        bert_config_S = AlbertConfig.from_json_file(self.bert_config_file_S)

        # 加载tokenizer
        tokenizer = BertTokenizer(vocab_file=self.vocab_file)

        # 加载模型
        model_S = AlbertSPC(bert_config_S,
                            num_labels=self.num_labels,
                            args=self.args)
        state_dict_S = torch.load(self.tuned_checkpoint_S,
                                  map_location=self.device)
        state_weight = {
            k[5:]: v
            for k, v in state_dict_S.items() if k.startswith('bert.')
        }
        missing_keys, _ = model_S.bert.load_state_dict(state_weight,
                                                       strict=False)
        #验证下参数没有丢失
        assert len(missing_keys) == 0
        self.train_tokenizer = tokenizer
        self.train_model = model_S
        logger.info(f"训练模型{self.tuned_checkpoint_S}加载完成")
Пример #30
0
    def __init__(self, coordinator_args: CoordinatorArguments,
                 collab_optimizer_args: CollaborativeOptimizerArguments,
                 averager_args: AveragerArguments, dht: hivemind.DHT):
        self.save_checkpoint_step_interval = coordinator_args.save_checkpoint_step_interval
        self.repo_path = coordinator_args.repo_path
        self.upload_interval = coordinator_args.upload_interval
        self.previous_step = -1

        config = AlbertConfig.from_pretrained(
            coordinator_args.model_config_path)
        self.model = AlbertForPreTraining(config)

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.01,
            },
            {
                "params": [
                    p for n, p in self.model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay":
                0.0,
            },
        ]

        opt = Lamb(
            optimizer_grouped_parameters,
            lr=0.00176,
            weight_decay=0.01,
            clamp_value=10000.0,
            debias=True,
        )

        adjusted_target_batch_size = collab_optimizer_args.target_batch_size - collab_optimizer_args.batch_size_lead

        self.collaborative_optimizer = hivemind.CollaborativeOptimizer(
            opt=opt,
            dht=dht,
            prefix=experiment_prefix,
            compression_type=hivemind.utils.CompressionType.Value(
                collab_optimizer_args.compression),
            throughput=collab_optimizer_args.bandwidth,
            target_batch_size=adjusted_target_batch_size,
            client_mode=collab_optimizer_args.client_mode,
            verbose=True,
            start=True,
            **asdict(averager_args))
        self.previous_timestamp = time.time()