from transformers import BertForSequenceClassification from torch.utils.data import DataLoader import torch from tools import start_debugger_on_exception from dataset import DataSetBert import numpy as np start_debugger_on_exception() train_dataset = DataSetBert(data_file='./data/data_train/train.csv') val_dataset = DataSetBert(data_file='./data/data_train/val.csv') from torch.utils.data import DataLoader device = torch.device('cuda:6') train_dataloader = DataLoader(train_dataset, batch_size=11, shuffle=True) val_dataloader = DataLoader(val_dataset, batch_size=11, shuffle=True) model = BertForSequenceClassification.from_pretrained('bert-base-chinese') model.to(device) model.train() model.to(device) from transformers import AdamW optimizer = AdamW(model.parameters(), lr=1e-5) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, {
from transformers import BertForSequenceClassification from torch.utils.data import DataLoader import torch from tools import start_debugger_on_exception from dataset import DataSetBert import numpy as np import torchtext from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Iterator start_debugger_on_exception() train_dataset = DataSetBert(data_file= './data/IMDBs.csv') val_dataset = DataSetBert(data_file= './data/IMDBs.csv') from torch.utils.data import DataLoader device = torch.device('cuda:6') train_dataloader = DataLoader(train_dataset, batch_size=11, shuffle=True) test_dataloader = DataLoader(val_dataset, batch_size=11, shuffle=True) model = BertForSequenceClassification.from_pretrained('bert-base-uncased') model.to(device) model.train() model.to(device) from transformers import AdamW MAX_SEQ_LEN = 128 from transformers import BertTokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token) UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token) # Fields label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float) text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True, fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)