Пример #1
0
    def __init__(self,  num_classes, model_path='./albert_base'):
        super(AlbertCrf, self).__init__()

        self.albert = AlbertModel.from_pretrained(model_path)
        self.dropout = nn.Dropout(0.1)
        self.fc1 = nn.Linear(self.albert.config.hidden_size, 256)
        self.fc2 = nn.Linear(256, num_classes)
        self.crf = CRF(num_classes, batch_first=True)
Пример #2
0
 def __init__(self, config):
     super().__init__(config)
     self.num_labels = 2
     self.albert = AlbertModel(config)
     self.qa_outputs = nn.Linear(int(config.hidden_size / 4),
                                 config.num_labels)
     self.hypernet = HyberRnnNet(2048, 512, 1, 'lstm')
     self.infernet = InferenceRNN(64, 2048, 512, 512, 'lstm')
Пример #3
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.albert = AlbertModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()
 def __init__(self, ):
     super().__init__()
     self.tokenizer = BertTokenizer.from_pretrained('./alberttiny')
     self.model = AlbertModel.from_pretrained('./alberttiny').to(
         Config.device)
     self.is_src = None
     #self.w2v= gensim.models.KeyedVectors.load_word2vec_format('../news_comment/baike_26g_news_13g_novel_229g.bin', binary=True)
     '''
Пример #5
0
    def __init__(self):
        super(AlbertClassifier, self).__init__()
        D_in, H, D_out = 768, 50, 2
        self.albert = nn.DataParallel(
            AlbertModel.from_pretrained('albert-base-v1'))

        self.classifier = nn.Sequential(nn.Linear(D_in, H), nn.ReLU(),
                                        nn.Dropout(0.4), nn.Linear(H, D_out))
Пример #6
0
 def __init__(self, config, num_rel_labels):
     super(AlbertForRelation, self).__init__(config)
     self.num_labels = num_rel_labels
     self.albert = AlbertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.layer_norm = BertLayerNorm(config.hidden_size * 2)
     self.classifier = nn.Linear(config.hidden_size * 2, self.num_labels)
     self.init_weights()
Пример #7
0
 def __init__(self, config):
     super(AlbertCrfForNer, self).__init__(config)
     self.bert = AlbertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.crf = CRF(tagset_size=config.num_labels,
                    tag_dictionary=config.label2id,
                    is_bert=True)
     self.init_weights()
Пример #8
0
    def __init__(self, config, num_label):
        super(AlbertQA, self).__init__()
        self.albert = AlbertModel.from_pretrained('albert-xxlarge-v1')
        self.fc = nn.Linear(config.hidden_size, num_label)
        self.drop = nn.Dropout(config.hidden_dropout_prob)
        self.loss = nn.CrossEntropyLoss(reduction='sum')

        torch.nn.init.xavier_uniform_(self.fc.weight)
        torch.nn.init.constant_(self.fc.bias, 0.)
Пример #9
0
 def __init__(self, config):
     super().__init__(config)
     self.output_attentions = True
     self.output_hidden_states = True
     self.num_labels = config.num_labels
     self.albert = AlbertModel(config)
     self.dropout = nn.Dropout(config.classifier_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)
     self.init_weights()
Пример #10
0
    def __init__(self, config):
        super(Model, self).__init__()
        self.config = AlbertConfig.from_pretrained(config.albert_config_path)
        self.albert = AlbertModel.from_pretrained(config.albert_model_path,
                                                  config=self.config)
        for param in self.albert.parameters():
            param.requires_grad = True

        self.fc = nn.Linear(config.hidden_size, config.num_classes)
    def __init__(self,
                 args,
                 device,
                 d_model=256,
                 nhead=4,
                 d_ff=1024,
                 nlayers=2,
                 dropout=0.5):
        super(Autoencoder, self).__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model

        self.src_mask = None
        self.pos_encoder = PositionalEncoding(d_model,
                                              dropout)  # encoder's position
        self.pos_decoder = PositionalEncoding(d_model,
                                              dropout)  # decoder's position

        decoder_layers = TransformerDecoderLayer(d_model, nhead, d_ff, dropout)
        decoder_norm = nn.LayerNorm(d_model)
        self.transformer_decoder = TransformerDecoder(decoder_layers, nlayers,
                                                      decoder_norm)

        # self.bert_encoder = BertModel.from_pretrained(args.PRETRAINED_MODEL_NAME, output_hidden_states=args.distill_2)
        if args.use_albert:
            self.bert_encoder = AlbertModel.from_pretrained(
                "clue/albert_chinese_tiny")
            self.bert_embed = self.bert_encoder.embeddings
            # self.tgt_embed = self.bert_embed
            d_vocab = self.bert_encoder.config.vocab_size + 1
            self.tgt_embed = nn.Sequential(
                Embeddings(d_model, d_vocab),
                PositionalEncoding(d_model, dropout))
        elif args.use_tiny_bert:
            self.bert_encoder = AutoModel.from_pretrained(
                "google/bert_uncased_L-2_H-256_A-4")
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        elif args.use_distil_bert:
            configuration = DistilBertConfig()
            self.bert_encoder = DistilBertModel(configuration)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed
        # self.tgt_embed = self.bert.embeddings
        else:
            self.bert_encoder = BertModel.from_pretrained(
                args.PRETRAINED_MODEL_NAME,
                output_hidden_states=args.distill_2)
            self.bert_embed = self.bert_encoder.embeddings
            self.tgt_embed = self.bert_embed

        self.distill_2 = args.distill_2
        self.gru = nn.GRU(d_model, d_model, 1)
        self.lr = nn.Linear(d_model, self.bert_encoder.config.vocab_size + 1)
        self.sigmoid = nn.Sigmoid()
        self.device = device
        self.init_weights()
Пример #12
0
 def __init__(self, drop_prob=0):
     super(BertQA, self).__init__()
     self.bert = AlbertModel.from_pretrained("albert-large-v2")
     #for QA
     self.ans_se = nn.Linear(1024, 2)
     #for Beer, ten level sentiment
     self.sentiment = nn.Linear(1024, 1)
     self.sentiment_movie = nn.Linear(1024, 1)
     self.sigmoid = nn.Sigmoid()
Пример #13
0
 def __init__(self, bert_model='bert-base-cased'):
     super(BERTRepresenter, self).__init__()
     if 'albert' in bert_model.lower():
         config = AlbertConfig()
         self.bert = AlbertModel(config).from_pretrained(bert_model)
     else:
         config = BertConfig()
         # config = BertConfig(vocab_size=24000, hidden_size=264)
         self.bert = BertModel(config).from_pretrained(bert_model)
Пример #14
0
 def __init__(self,hidden_dim=768,num_tags=20):
     super(TagValueModel,self).__init__()
     self.albert = AlbertModel.from_pretrained("ALINEAR/albert-japanese-v2")
     self.dropout = nn.Dropout(0.1)
     self.tags = nn.Linear(768,num_tags)
     #self.tags_insentence = nn.Linear(768,20)
     self.starts = nn.Linear(768,num_tags)
     self.ends = nn.Linear(768,num_tags)
     self.num_tags = num_tags
Пример #15
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.albert = AlbertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        self.init_weights()
Пример #16
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.model = AlbertModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.loss_func = CrossEntropyLoss()
        self.init_weights()
Пример #17
0
    def __init__(self, config):
        super().__init__(config)
        self.albert = AlbertModel(config)
        self.relu = nn.ReLU()
        self.layer1 = nn.Linear(config.hidden_size, config.mlp_size)
        self.layer2 = nn.Linear(config.mlp_size, config.mlp_size)
        self.layer3 = nn.Linear(config.mlp_size, config.hidden_size)

        self.init_weights()
Пример #18
0
def load_pretrained_encoder(mpath,
                            config="albert_config.json",
                            model="albert_model.bin"):

    b_config = BC.from_pretrained(opt.join(mpath, config))
    encoder = AlbertModel.from_pretrained(opt.join(mpath, model),
                                          config=b_config)

    return encoder
    def __init__(self, config):
        super().__init__(config)

        self.albert = AlbertModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.output_layer = nn.Linear(config.hidden_size, 1)
        self.pooling_type = config.pooling_type

        self.init_weights()
Пример #20
0
    def __init__(self, config, label_size):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.model = AlbertModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.crf = CRF(num_tags=label_size, batch_first=True)
        self.init_weights()
Пример #21
0
 def __prepare_model_instance(self, config):
     self.config_class, self.pretrained_model_archive_map, self.base_model_prefix = self.CONF_REF[
         self.model_type]
     if self.model_type == "bert":
         self.model = BertModel(config)
     elif self.model_type == 'roberta':
         self.model = RobertaModel(config)
     elif self.model_type == 'albert':
         self.model = AlbertModel(config)
    def __init__(
        self,
        pretrained_model_name=None,
        config_filename=None,
        vocab_size=None,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        max_position_embeddings=512,
    ):
        super().__init__()

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0
        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, "
                + "or config_filename should be passed into the "
                + "ALBERT constructor."
            )

        # TK: The following code checks the same once again.
        if vocab_size is not None:
            config = AlbertConfig(
                vocab_size_or_config_json_file=vocab_size,
                vocab_size=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings,
            )
            model = AlbertModel(config)
        elif pretrained_model_name is not None:
            model = AlbertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = AlbertConfig.from_json_file(config_filename)
            model = AlbertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" + " be passed into the ALBERT constructor"
            )

        model.to(self._device)

        self.add_module("albert", model)
        self.config = model.config
        self._hidden_size = model.config.hidden_size
 def __init__(self, config, num_rel_labels):
     super(AlbertForRelation, self).__init__(config)
     self.num_labels = num_rel_labels
     self.albert = AlbertModel(self.config)
     # self.albert.resize_token_embeddings(vocab_size)
     self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
     self.layer_norm = torch.nn.LayerNorm(self.config.hidden_size * 2)
     self.classifier = nn.Linear(self.config.hidden_size * 2,
                                 self.num_labels)
     self.init_weights()
Пример #24
0
 def __init__(self, model_name, char_vectors, hidden_size, drop_prob):
     super(Embedding, self).__init__()
     size_map = {'albert-base-v2':768, 'albert-large-v2':1024, 'albert-xlarge-v2':2048, 'albert-xxlarge-v2':4096}
     self.drop_prob = drop_prob
     self.albert = AlbertModel.from_pretrained(model_name)
     self.char_embed = nn.Embedding.from_pretrained(char_vectors)
     self.proj = nn.Linear(size_map[model_name], hidden_size, bias=False)
     self.hwy = HighwayEncoder(2, hidden_size*2+2)
     self.avgatt = AverageAttn(hidden_size)
     self.cnn = CNN(hidden_size=hidden_size,embed_size=char_vectors.size(1))
Пример #25
0
def load_pretrained(mpath,
                    config="albert_config.json",
                    model="albert_model.bin"):

    b_config = BC.from_pretrained(opt.join(mpath, config))
    encoder = AlbertModel.from_pretrained(opt.join(mpath, model),
                                          config=b_config)
    tokenizer = BertTokenizer.from_pretrained(mpath)

    return encoder, tokenizer
Пример #26
0
def load_albert(path):
    """
    加载模型
    """
    vocab_file = os.path.join(path, 'vocab.txt')
    tokenizer = BertTokenizer.from_pretrained(vocab_file)
    # print(tokenizer)
    config = AlbertConfig.from_pretrained(path)
    model = AlbertModel.from_pretrained(path, config=config)
    return model, tokenizer
Пример #27
0
 def __init__(self, config, crf=None):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.albert = AlbertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.loss_fct = nn.CrossEntropyLoss()
     self.use_crf = False
     self.crf_layer = crf
     self.init_weights()
Пример #28
0
	def getAlBertEmbeddings(self):
		model = AlbertModel.from_pretrained('albert-base-v2')
		tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
		model.eval()
		tokens_tensor, segments_tensors = self.getIndexs(tokenizer)
		with torch.no_grad():
			last_hidden_states = model(tokens_tensor, attention_mask=segments_tensors)
		features = last_hidden_states[0][:,0,:].numpy()
		features = np.reshape(features,features.shape[1])
		return(features.tolist())
Пример #29
0
 def __init__(self, bert_name, num_class, bert_type='bert', drop_out=0.1):
     super(Bert, self).__init__()
     if bert_type == 'bert':
         self.bert = BertModel.from_pretrained(bert_name)
     elif bert_type == 'albert':
         self.bert = AlbertModel.from_pretrained(bert_name)
     else:
         raise Exception('Please enter the correct bert type.')
     self.drop_out = nn.Dropout(p=drop_out)
     self.classifier = nn.Linear(self.bert.config.hidden_size, num_class)
Пример #30
0
 def __init__(self, albert_model_name,
                    bio_vocab_size, bio_embed_size,
                    feat_vocab_size, feat_embed_size,
                    albert_cache_dir=None):
     super(AlbertFeatureRichEmbedding, self).__init__()
     self.bio_embed_size = bio_embed_size
     self.feat_embed_size = feat_embed_size
     self.albert_embeddings = AlbertModel.from_pretrained(albert_model_name, cache_dir=albert_cache_dir)
     self.bio_embeddings = nn.Embedding(bio_vocab_size, bio_embed_size)
     self.feat_embeddings = nn.Embedding(feat_vocab_size, feat_embed_size)