def __init__(self,bert_model,vocab_size,max_len,batch_size,lr,with_code = True,): self.vocab_size = vocab_size self.batch_size = batch_size self.lr = lr self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.max_len = max_len # 初始化bert训练模型 bertconfig = BertConfig(vocab_size=config["vocab_size"]) #初始化参数 self.bert_model = bert_model(config=bertconfig) self.bert_model.to(self.device) train_dataset = BERTDataset(corpus_path=config["train_corpus_path"], word2idx_path=config["word2idx_path"], seq_len=self.max_len, hidden_dim=bertconfig.hidden_size ) self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers= 0, collate_fn= lambda x:x) test_dataset = BERTDataset(corpus_path=config["test_corpus_path"], word2idx_path=config["word2idx_path"], seq_len=self.max_len, hidden_dim=bertconfig.hidden_size ) self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=0, collate_fn=lambda x: x) self.positional_enc = self.init_positional_encoding(hidden_dim=bertconfig.hidden_size,max_seq_len=self.max_len) self.positional_enc = torch.unsqueeze(self.positional_enc,dim=0) self.optimizer = torch.optim.Adam(list(self.bert_model.parameters()),lr=self.lr)
def __init__( self, bert_model, vocab_size, max_seq_len, batch_size, lr, with_cuda=True, ): # 词量, 注意在这里实际字(词)汇量 = vocab_size - 20, # 因为前20个token用来做一些特殊功能, 如padding等等 self.vocab_size = vocab_size self.batch_size = batch_size # 学习率 self.lr = lr # 是否使用GPU cuda_condition = torch.cuda.is_available() and with_cuda self.device = torch.device("cuda:0" if cuda_condition else "cpu") # 限定的单句最大长度 self.max_seq_len = max_seq_len # 初始化超参数的配置 bertconfig = BertConfig( vocab_size_or_config_json_file=config["vocab_size"]) # 初始化bert模型 self.bert_model = bert_model(config=bertconfig) self.bert_model.to(self.device) # 初始化训练数据集 train_dataset = BERTDataset( corpus_path=config["train_corpus_path"], word2idx_path=config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False, ) # 初始化训练dataloader self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) # 初始化测试数据集 test_dataset = BERTDataset( corpus_path=config["test_corpus_path"], word2idx_path=config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=True, ) # 初始化测试dataloader self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) # 初始化positional encoding self.positional_enc = self.init_positional_encoding( hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len) # 拓展positional encoding的维度为[1, max_seq_len, hidden_size] self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0) # 列举需要优化的参数并传入优化器 optim_parameters = list(self.bert_model.parameters()) self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr) print("Total Parameters:", sum([p.nelement() for p in self.bert_model.parameters()]))
def __init__(self, bert_model, vocab_size, max_seq_len, batch_size, lr, with_cuda=True): self.vocab_size = vocab_size self.max_seq_len = max_seq_len self.batch_size = batch_size self.lr = lr # 确定计算设备 cuda_condition = torch.cuda.is_available() and with_cuda self.device = torch.device('cuda:0' if cuda_condition else 'cpu') # 初始化模型配置信息 bertconfig = BertConfig(vocab_size=config['vocab_size']) # 初始化bert模型,并发送到计算设备 self.bert_model = bert_model(config=bertconfig) self.bert_model.to(self.device) # 准备训练和测试数据集 train_dataset = BERTDataset( corpus_path='./corpus/test_wiki.txt', word2idx_path='./corpus/bert_word2idx_extend.json', seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False) self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=config['num_workers'], collate_fn=lambda x: x) # for i in self.train_dataloader: # print(i) # break test_dataset = BERTDataset(corpus_path=config['test_corpus_path'], word2idx_path=config['word2idx_path'], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False) self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=config['num_workers'], collate_fn=lambda x: x) # 初始化位置编码 self.positional_enc = self.init_positional_encoding( hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len) # 升维 self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0) optim_parameters = list(self.bert_model.parameters()) # -------------------------------------- # 计算bert_model中的参数数目 # num=0 # for para in optim_parameters: # para_shape=para.detach().numpy().shape # dim_len=len(para_shape) # n=1 # for i in range(dim_len): # n=n*para_shape[i] # num+=n # bert_model_parameter_num=num # print(bert_model_parameter_num) # ----------------------------------------- self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr) print('Total Parameters:', sum([p.nelement() for p in self.bert_model.parameters()])) print(next(self.bert_model.parameters()).data.dtype)