def __init__(self, args, gpu=-1, check_for_lowercase=True, embeddings_dim=0, verbose=True, unique_words_list=None): SeqIndexerBaseEmbeddings.__init__( self, gpu=gpu, check_for_lowercase=check_for_lowercase, zero_digits=True, pad='<pad>', unk='<unk>', load_embeddings=True, embeddings_dim=embeddings_dim, verbose=verbose) self.original_words_num = 0 self.lowercase_words_num = 0 self.zero_digits_replaced_num = 0 self.zero_digits_replaced_lowercase_num = 0 self.capitalize_word_num = 0 self.uppercase_word_num = 0 self.unique_words_list = unique_words_list self.args = args
def __init__(self, gpu=-1, check_for_lowercase=True, embeddings_dim=0, verbose=True, options_file='', weights_file='', num_layers_=2, dropout_=0.1): SeqIndexerBaseEmbeddings.__init__( self, gpu=gpu, check_for_lowercase=check_for_lowercase, zero_digits=True, pad='<pad>', unk='<unk>', load_embeddings=True, embeddings_dim=embeddings_dim, verbose=verbose, isElmo=True) print("create seq indexer elmo") self.no_context_base = True self.elmo = True self.options_fn = options_file self.weights_fn = weights_file self.emb = Elmo(options_file, weights_file, num_layers_, dropout=dropout_) self.embeddings_dim = self.emb.get_output_dim
def __init__( self, gpu=-1, check_for_lowercase=True, embeddings_dim=0, verbose=True, path_to_pretrained="xlnet-base-cased", model_frozen=True, bos_token="<s>", eos_token="</s>", unk_token="<unk>", sep_token="<sep>", pad_token="<pad>", cls_token="<cls>", mask_token="<mask>", ): SeqIndexerBaseEmbeddings.__init__( self, gpu=gpu, check_for_lowercase=check_for_lowercase, zero_digits=True, bos_token=bos_token, eos_token=eos_token, pad=pad_token, unk=unk_token, sep_token=sep_token, cls_token=cls_token, mask_token=mask_token, load_embeddings=True, embeddings_dim=embeddings_dim, verbose=verbose, isBert=False, isXlNet=True) print("create seq indexer Transformers from Model {}".format( path_to_pretrained)) self.xlnet = True self.path_to_pretrained = path_to_pretrained self.tokenizer = XLNetTokenizer.from_pretrained(path_to_pretrained) self.config = XLNetConfig.from_pretrained(path_to_pretrained) self.emb = XLNetModel.from_pretrained(path_to_pretrained) self.frozen = model_frozen for param in self.emb.parameters(): param.requires_grad = False for elem in [ self.emb.word_embedding, self.emb.layer, self.emb.dropout ]: for param in elem.parameters(): param.requires_grad = False if (not self.frozen): for param in self.emb.pooler.parameters(): param.requires_grad = True self.emb.eval() print("XLNET model loaded succesifully")
def __init__(self, gpu): SeqIndexerBaseEmbeddings.__init__(self, gpu=gpu, check_for_lowercase=False, zero_digits=False, pad='<pad>', unk='<unk>', load_embeddings=False, embeddings_dim=0, verbose=True)
def __init__( self, gpu=-1, check_for_lowercase=True, embeddings_dim=0, verbose=True, path_to_pretrained="/home/vika/targer/pretrained/uncased_L-12_H-768_A-12/", bert_type='bert-base-uncased', model_frozen=True): SeqIndexerBaseEmbeddings.__init__( self, gpu=gpu, check_for_lowercase=check_for_lowercase, zero_digits=True, pad='<pad>', unk='<unk>', load_embeddings=True, embeddings_dim=embeddings_dim, verbose=verbose, isBert=True) print("create seq indexer BERT") self.bert = True self.path_to_pretrained = path_to_pretrained #self.tokenizer = tokenizer_custom_bert.FullTokenizer(path_to_pretrained + 'vocab.txt') self.tokenizer = tokenizer_custom_bert.BertTokenizer.from_pretrained( "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt" ) self.emb = BertModel.from_pretrained(path_to_pretrained) self.frozen = model_frozen for param in self.emb.parameters(): param.requires_grad = False for elem in [ self.emb.embeddings.word_embeddings, self.emb.embeddings.position_embeddings, self.emb.embeddings.token_type_embeddings, self.emb.embeddings.LayerNorm ]: for param in elem.parameters(): param.requires_grad = False ## froze - unfroze layer of loaded bert pre-trained model. Now only pooler leayer is unfrozen. You can unfroze layers from encoders, decoders, etc. if (not self.frozen): #print ("loaded BERT model will be trained") #for i in [0]: #for param in self.emb.encoder.layer[i].parameters(): #param.requires_grad = True for param in self.emb.pooler.parameters(): param.requires_grad = True self.emb.eval() print("Bert model loaded succesifully")