def __init__( self, vocab_size, max_seq_len, batch_size, lr, with_cuda=True, ): self.vocab_size = vocab_size self.batch_size = batch_size self.lr = lr cuda_condition = torch.cuda.is_available() and with_cuda self.device = torch.device("cuda:0" if cuda_condition else "cpu") self.max_seq_len = max_seq_len bertconfig = BertConfig(vocab_size=config["vocab_size"]) self.bert_model = BertForPreTraining(config=bertconfig) self.bert_model.to(self.device) train_dataset = BERTDataset( config["train_corpus_path"], config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False, ) self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) test_dataset = BERTDataset( config["test_corpus_path"], config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=True, ) self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) self.hidden_dim = bertconfig.hidden_size self.positional_enc = self.init_positional_encoding() self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0) optim_parameters = list(self.bert_model.parameters()) self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr) print("Total Parameters:", sum([p.nelement() for p in self.bert_model.parameters()]))
def __init__(self, BertForPreTraining, vocab_size, max_seq_len, lr, batch_size): self.vocab_size = vocab_size self.max_seq_len = max_seq_len self.lr = lr self.batch_size = batch_size use_cuda = torch.cuda.is_available() self.device = torch.device("cuda:0" if use_cuda else "cpu") bertconfig = BertConfig(vocab_size=vocab_size) self.bert_model = BertForPreTraining(config=bertconfig) self.bert_model.to(self.device) train_dataset = BERTDataset(corpus_path=config["train_data"], word2idx_path=config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=config["on_memory"]) self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) test_dataset = BERTDataset(corpus_path=config["test_data"], word2idx_path=config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=config["on_memory"]) self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) #[max_seq_len,hidden_size] self.positional_enc = self.init_positional_encoding( hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len) self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0) #[1,max_seq_len,hidden_size] optim_paramers = list(self.bert_model.parameters()) self.optimizer = torch.optim.Adam(optim_paramers, lr=self.lr) #p.nelement()可以统计出tensor中张量的个数 print("Total Parameters:", sum([p.nelement() for p in self.bert_model.parameters()]))
def __init__( self, bert_model, vocab_size, max_seq_len, batch_size, lr, with_cuda=True, ): # 词量, 注意在这里实际字(词)汇量 = vocab_size - 20, # 因为前20个token用来做一些特殊功能, 如padding等等 self.vocab_size = vocab_size self.batch_size = batch_size # 学习率 self.lr = lr # 是否使用GPU cuda_condition = torch.cuda.is_available() and with_cuda self.device = torch.device("cuda:0" if cuda_condition else "cpu") # 限定的单句最大长度 self.max_seq_len = max_seq_len # 初始化超参数的配置 bertconfig = BertConfig( vocab_size_or_config_json_file=config["vocab_size"]) # 初始化bert模型 self.bert_model = bert_model(config=bertconfig) self.bert_model.to(self.device) # 初始化训练数据集 train_dataset = BERTDataset( corpus_path=config["train_corpus_path"], word2idx_path=config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False, ) # 初始化训练dataloader self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) # 初始化测试数据集 test_dataset = BERTDataset( corpus_path=config["test_corpus_path"], word2idx_path=config["word2idx_path"], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=True, ) # 初始化测试dataloader self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=config["num_workers"], collate_fn=lambda x: x) # 初始化positional encoding self.positional_enc = self.init_positional_encoding( hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len) # 拓展positional encoding的维度为[1, max_seq_len, hidden_size] self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0) # 列举需要优化的参数并传入优化器 optim_parameters = list(self.bert_model.parameters()) self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr) print("Total Parameters:", sum([p.nelement() for p in self.bert_model.parameters()]))
def __init__(self, bert_model, vocab_size, max_seq_len, batch_size, lr, with_cuda=True): self.vocab_size = vocab_size self.max_seq_len = max_seq_len self.batch_size = batch_size self.lr = lr # 确定计算设备 cuda_condition = torch.cuda.is_available() and with_cuda self.device = torch.device('cuda:0' if cuda_condition else 'cpu') # 初始化模型配置信息 bertconfig = BertConfig(vocab_size=config['vocab_size']) # 初始化bert模型,并发送到计算设备 self.bert_model = bert_model(config=bertconfig) self.bert_model.to(self.device) # 准备训练和测试数据集 train_dataset = BERTDataset( corpus_path='./corpus/test_wiki.txt', word2idx_path='./corpus/bert_word2idx_extend.json', seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False) self.train_dataloader = DataLoader(train_dataset, batch_size=self.batch_size, num_workers=config['num_workers'], collate_fn=lambda x: x) # for i in self.train_dataloader: # print(i) # break test_dataset = BERTDataset(corpus_path=config['test_corpus_path'], word2idx_path=config['word2idx_path'], seq_len=self.max_seq_len, hidden_dim=bertconfig.hidden_size, on_memory=False) self.test_dataloader = DataLoader(test_dataset, batch_size=self.batch_size, num_workers=config['num_workers'], collate_fn=lambda x: x) # 初始化位置编码 self.positional_enc = self.init_positional_encoding( hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len) # 升维 self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0) optim_parameters = list(self.bert_model.parameters()) # -------------------------------------- # 计算bert_model中的参数数目 # num=0 # for para in optim_parameters: # para_shape=para.detach().numpy().shape # dim_len=len(para_shape) # n=1 # for i in range(dim_len): # n=n*para_shape[i] # num+=n # bert_model_parameter_num=num # print(bert_model_parameter_num) # ----------------------------------------- self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr) print('Total Parameters:', sum([p.nelement() for p in self.bert_model.parameters()])) print(next(self.bert_model.parameters()).data.dtype)