import numpy as np start_debugger_on_exception() train_dataset = DataSetBert(data_file='./data/data_train/train.csv') val_dataset = DataSetBert(data_file='./data/data_train/val.csv') test_dataset = DataSetBert(data_file='./data/data_train/test.csv') from torch.utils.data import DataLoader device = torch.device('cuda:6') train_dataloader = DataLoader(train_dataset, batch_size=11, shuffle=True) val_dataloader = DataLoader(val_dataset, batch_size=11, shuffle=True) test_dataloader = DataLoader(test_dataset, batch_size=11, shuffle=True) model_config = BertConfig.from_pretrained('bert-base-chinese') model_config.num_hidden_layers = 3 model = BertForSequenceClassification(model_config) from transformers import BertTokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-chinese') model.resize_token_embeddings(len(tokenizer)) model.config.pad_token_id = model.config.eos_token_id model.config.max_position_embeddings = 1024 model.to(device) model.train() model.to(device) import pdb pdb.set_trace() from transformers import AdamW optimizer = AdamW(model.parameters(), lr=1e-5) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ],