示例#1
0
 def __init__(self,
              args,
              gpu=-1,
              check_for_lowercase=True,
              embeddings_dim=0,
              verbose=True,
              unique_words_list=None):
     SeqIndexerBaseEmbeddings.__init__(
         self,
         gpu=gpu,
         check_for_lowercase=check_for_lowercase,
         zero_digits=True,
         pad='<pad>',
         unk='<unk>',
         load_embeddings=True,
         embeddings_dim=embeddings_dim,
         verbose=verbose)
     self.original_words_num = 0
     self.lowercase_words_num = 0
     self.zero_digits_replaced_num = 0
     self.zero_digits_replaced_lowercase_num = 0
     self.capitalize_word_num = 0
     self.uppercase_word_num = 0
     self.unique_words_list = unique_words_list
     self.args = args
示例#2
0
 def __init__(self,
              gpu=-1,
              check_for_lowercase=True,
              embeddings_dim=0,
              verbose=True,
              options_file='',
              weights_file='',
              num_layers_=2,
              dropout_=0.1):
     SeqIndexerBaseEmbeddings.__init__(
         self,
         gpu=gpu,
         check_for_lowercase=check_for_lowercase,
         zero_digits=True,
         pad='<pad>',
         unk='<unk>',
         load_embeddings=True,
         embeddings_dim=embeddings_dim,
         verbose=verbose,
         isElmo=True)
     print("create seq indexer elmo")
     self.no_context_base = True
     self.elmo = True
     self.options_fn = options_file
     self.weights_fn = weights_file
     self.emb = Elmo(options_file,
                     weights_file,
                     num_layers_,
                     dropout=dropout_)
     self.embeddings_dim = self.emb.get_output_dim
示例#3
0
    def __init__(
        self,
        gpu=-1,
        check_for_lowercase=True,
        embeddings_dim=0,
        verbose=True,
        path_to_pretrained="xlnet-base-cased",
        model_frozen=True,
        bos_token="<s>",
        eos_token="</s>",
        unk_token="<unk>",
        sep_token="<sep>",
        pad_token="<pad>",
        cls_token="<cls>",
        mask_token="<mask>",
    ):
        SeqIndexerBaseEmbeddings.__init__(
            self,
            gpu=gpu,
            check_for_lowercase=check_for_lowercase,
            zero_digits=True,
            bos_token=bos_token,
            eos_token=eos_token,
            pad=pad_token,
            unk=unk_token,
            sep_token=sep_token,
            cls_token=cls_token,
            mask_token=mask_token,
            load_embeddings=True,
            embeddings_dim=embeddings_dim,
            verbose=verbose,
            isBert=False,
            isXlNet=True)

        print("create seq indexer Transformers from Model {}".format(
            path_to_pretrained))

        self.xlnet = True

        self.path_to_pretrained = path_to_pretrained
        self.tokenizer = XLNetTokenizer.from_pretrained(path_to_pretrained)
        self.config = XLNetConfig.from_pretrained(path_to_pretrained)
        self.emb = XLNetModel.from_pretrained(path_to_pretrained)
        self.frozen = model_frozen
        for param in self.emb.parameters():
            param.requires_grad = False
        for elem in [
                self.emb.word_embedding, self.emb.layer, self.emb.dropout
        ]:
            for param in elem.parameters():
                param.requires_grad = False

        if (not self.frozen):
            for param in self.emb.pooler.parameters():
                param.requires_grad = True
        self.emb.eval()
        print("XLNET model loaded succesifully")
示例#4
0
 def __init__(self, gpu):
     SeqIndexerBaseEmbeddings.__init__(self,
                                       gpu=gpu,
                                       check_for_lowercase=False,
                                       zero_digits=False,
                                       pad='<pad>',
                                       unk='<unk>',
                                       load_embeddings=False,
                                       embeddings_dim=0,
                                       verbose=True)
示例#5
0
    def __init__(
            self,
            gpu=-1,
            check_for_lowercase=True,
            embeddings_dim=0,
            verbose=True,
            path_to_pretrained="/home/vika/targer/pretrained/uncased_L-12_H-768_A-12/",
            bert_type='bert-base-uncased',
            model_frozen=True):
        SeqIndexerBaseEmbeddings.__init__(
            self,
            gpu=gpu,
            check_for_lowercase=check_for_lowercase,
            zero_digits=True,
            pad='<pad>',
            unk='<unk>',
            load_embeddings=True,
            embeddings_dim=embeddings_dim,
            verbose=verbose,
            isBert=True)

        print("create seq indexer BERT")

        self.bert = True
        self.path_to_pretrained = path_to_pretrained
        #self.tokenizer = tokenizer_custom_bert.FullTokenizer(path_to_pretrained + 'vocab.txt')
        self.tokenizer = tokenizer_custom_bert.BertTokenizer.from_pretrained(
            "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt"
        )
        self.emb = BertModel.from_pretrained(path_to_pretrained)
        self.frozen = model_frozen
        for param in self.emb.parameters():
            param.requires_grad = False
        for elem in [
                self.emb.embeddings.word_embeddings,
                self.emb.embeddings.position_embeddings,
                self.emb.embeddings.token_type_embeddings,
                self.emb.embeddings.LayerNorm
        ]:
            for param in elem.parameters():
                param.requires_grad = False

        ## froze - unfroze layer of loaded bert pre-trained model. Now only pooler leayer is unfrozen. You can unfroze layers from encoders, decoders, etc.
        if (not self.frozen):
            #print ("loaded BERT model will be trained")
            #for i in [0]:
            #for param in self.emb.encoder.layer[i].parameters():
            #param.requires_grad = True
            for param in self.emb.pooler.parameters():
                param.requires_grad = True
        self.emb.eval()
        print("Bert model loaded succesifully")