Пример #1
0
    def __init__(
        self,
        vocab_size,
        max_seq_len,
        batch_size,
        lr,
        with_cuda=True,
    ):
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.lr = lr
        cuda_condition = torch.cuda.is_available() and with_cuda
        self.device = torch.device("cuda:0" if cuda_condition else "cpu")
        self.max_seq_len = max_seq_len
        bertconfig = BertConfig(vocab_size=config["vocab_size"])
        self.bert_model = BertForPreTraining(config=bertconfig)
        self.bert_model.to(self.device)
        train_dataset = BERTDataset(
            config["train_corpus_path"],
            config["word2idx_path"],
            seq_len=self.max_seq_len,
            hidden_dim=bertconfig.hidden_size,
            on_memory=False,
        )
        self.train_dataloader = DataLoader(train_dataset,
                                           batch_size=self.batch_size,
                                           num_workers=config["num_workers"],
                                           collate_fn=lambda x: x)
        test_dataset = BERTDataset(
            config["test_corpus_path"],
            config["word2idx_path"],
            seq_len=self.max_seq_len,
            hidden_dim=bertconfig.hidden_size,
            on_memory=True,
        )
        self.test_dataloader = DataLoader(test_dataset,
                                          batch_size=self.batch_size,
                                          num_workers=config["num_workers"],
                                          collate_fn=lambda x: x)
        self.hidden_dim = bertconfig.hidden_size
        self.positional_enc = self.init_positional_encoding()
        self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0)

        optim_parameters = list(self.bert_model.parameters())

        self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr)

        print("Total Parameters:",
              sum([p.nelement() for p in self.bert_model.parameters()]))
 def __init__(self, BertForPreTraining, vocab_size, max_seq_len, lr,
              batch_size):
     self.vocab_size = vocab_size
     self.max_seq_len = max_seq_len
     self.lr = lr
     self.batch_size = batch_size
     use_cuda = torch.cuda.is_available()
     self.device = torch.device("cuda:0" if use_cuda else "cpu")
     bertconfig = BertConfig(vocab_size=vocab_size)
     self.bert_model = BertForPreTraining(config=bertconfig)
     self.bert_model.to(self.device)
     train_dataset = BERTDataset(corpus_path=config["train_data"],
                                 word2idx_path=config["word2idx_path"],
                                 seq_len=self.max_seq_len,
                                 hidden_dim=bertconfig.hidden_size,
                                 on_memory=config["on_memory"])
     self.train_dataloader = DataLoader(train_dataset,
                                        batch_size=self.batch_size,
                                        num_workers=config["num_workers"],
                                        collate_fn=lambda x: x)
     test_dataset = BERTDataset(corpus_path=config["test_data"],
                                word2idx_path=config["word2idx_path"],
                                seq_len=self.max_seq_len,
                                hidden_dim=bertconfig.hidden_size,
                                on_memory=config["on_memory"])
     self.test_dataloader = DataLoader(test_dataset,
                                       batch_size=self.batch_size,
                                       num_workers=config["num_workers"],
                                       collate_fn=lambda x: x)
     #[max_seq_len,hidden_size]
     self.positional_enc = self.init_positional_encoding(
         hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len)
     self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0)
     #[1,max_seq_len,hidden_size]
     optim_paramers = list(self.bert_model.parameters())
     self.optimizer = torch.optim.Adam(optim_paramers, lr=self.lr)
     #p.nelement()可以统计出tensor中张量的个数
     print("Total Parameters:",
           sum([p.nelement() for p in self.bert_model.parameters()]))
Пример #3
0
    def __init__(
        self,
        bert_model,
        vocab_size,
        max_seq_len,
        batch_size,
        lr,
        with_cuda=True,
    ):
        # 词量, 注意在这里实际字(词)汇量 = vocab_size - 20,
        # 因为前20个token用来做一些特殊功能, 如padding等等
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        # 学习率
        self.lr = lr
        # 是否使用GPU
        cuda_condition = torch.cuda.is_available() and with_cuda
        self.device = torch.device("cuda:0" if cuda_condition else "cpu")
        # 限定的单句最大长度
        self.max_seq_len = max_seq_len
        # 初始化超参数的配置
        bertconfig = BertConfig(
            vocab_size_or_config_json_file=config["vocab_size"])
        # 初始化bert模型
        self.bert_model = bert_model(config=bertconfig)
        self.bert_model.to(self.device)
        # 初始化训练数据集
        train_dataset = BERTDataset(
            corpus_path=config["train_corpus_path"],
            word2idx_path=config["word2idx_path"],
            seq_len=self.max_seq_len,
            hidden_dim=bertconfig.hidden_size,
            on_memory=False,
        )
        # 初始化训练dataloader
        self.train_dataloader = DataLoader(train_dataset,
                                           batch_size=self.batch_size,
                                           num_workers=config["num_workers"],
                                           collate_fn=lambda x: x)
        # 初始化测试数据集
        test_dataset = BERTDataset(
            corpus_path=config["test_corpus_path"],
            word2idx_path=config["word2idx_path"],
            seq_len=self.max_seq_len,
            hidden_dim=bertconfig.hidden_size,
            on_memory=True,
        )
        # 初始化测试dataloader
        self.test_dataloader = DataLoader(test_dataset,
                                          batch_size=self.batch_size,
                                          num_workers=config["num_workers"],
                                          collate_fn=lambda x: x)
        # 初始化positional encoding
        self.positional_enc = self.init_positional_encoding(
            hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len)
        # 拓展positional encoding的维度为[1, max_seq_len, hidden_size]
        self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0)

        # 列举需要优化的参数并传入优化器
        optim_parameters = list(self.bert_model.parameters())
        self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr)

        print("Total Parameters:",
              sum([p.nelement() for p in self.bert_model.parameters()]))
    def __init__(self,
                 bert_model,
                 vocab_size,
                 max_seq_len,
                 batch_size,
                 lr,
                 with_cuda=True):
        self.vocab_size = vocab_size
        self.max_seq_len = max_seq_len
        self.batch_size = batch_size
        self.lr = lr
        # 确定计算设备
        cuda_condition = torch.cuda.is_available() and with_cuda
        self.device = torch.device('cuda:0' if cuda_condition else 'cpu')

        # 初始化模型配置信息
        bertconfig = BertConfig(vocab_size=config['vocab_size'])
        # 初始化bert模型,并发送到计算设备
        self.bert_model = bert_model(config=bertconfig)
        self.bert_model.to(self.device)
        # 准备训练和测试数据集
        train_dataset = BERTDataset(
            corpus_path='./corpus/test_wiki.txt',
            word2idx_path='./corpus/bert_word2idx_extend.json',
            seq_len=self.max_seq_len,
            hidden_dim=bertconfig.hidden_size,
            on_memory=False)

        self.train_dataloader = DataLoader(train_dataset,
                                           batch_size=self.batch_size,
                                           num_workers=config['num_workers'],
                                           collate_fn=lambda x: x)
        # for i in self.train_dataloader:
        #     print(i)
        #     break

        test_dataset = BERTDataset(corpus_path=config['test_corpus_path'],
                                   word2idx_path=config['word2idx_path'],
                                   seq_len=self.max_seq_len,
                                   hidden_dim=bertconfig.hidden_size,
                                   on_memory=False)

        self.test_dataloader = DataLoader(test_dataset,
                                          batch_size=self.batch_size,
                                          num_workers=config['num_workers'],
                                          collate_fn=lambda x: x)

        # 初始化位置编码
        self.positional_enc = self.init_positional_encoding(
            hidden_dim=bertconfig.hidden_size, max_seq_len=self.max_seq_len)

        # 升维
        self.positional_enc = torch.unsqueeze(self.positional_enc, dim=0)
        optim_parameters = list(self.bert_model.parameters())

        # --------------------------------------
        # 计算bert_model中的参数数目
        # num=0
        # for para in optim_parameters:
        #     para_shape=para.detach().numpy().shape
        #     dim_len=len(para_shape)
        #     n=1
        #     for i in range(dim_len):
        #         n=n*para_shape[i]
        #     num+=n
        # bert_model_parameter_num=num
        # print(bert_model_parameter_num)
        # -----------------------------------------

        self.optimizer = torch.optim.Adam(optim_parameters, lr=self.lr)
        print('Total Parameters:',
              sum([p.nelement() for p in self.bert_model.parameters()]))

        print(next(self.bert_model.parameters()).data.dtype)