Exemplo n.º 1
0
from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader
import torch
from tools import start_debugger_on_exception
from dataset import DataSetBert
import numpy as np

start_debugger_on_exception()
train_dataset = DataSetBert(data_file='./data/data_train/train.csv')
val_dataset = DataSetBert(data_file='./data/data_train/val.csv')
from torch.utils.data import DataLoader

device = torch.device('cuda:6')
train_dataloader = DataLoader(train_dataset, batch_size=11, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=11, shuffle=True)
model = BertForSequenceClassification.from_pretrained('bert-base-chinese')
model.to(device)
model.train()
model.to(device)
from transformers import AdamW

optimizer = AdamW(model.parameters(), lr=1e-5)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if not any(nd in n for nd in no_decay)
    ],
    'weight_decay':
    0.01
}, {
Exemplo n.º 2
0
from transformers import BertForSequenceClassification
from torch.utils.data import DataLoader
import torch
from tools import start_debugger_on_exception
from dataset import DataSetBert
import numpy as np
import torchtext
from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Iterator
start_debugger_on_exception()
train_dataset = DataSetBert(data_file= './data/IMDBs.csv')
val_dataset = DataSetBert(data_file= './data/IMDBs.csv')
from torch.utils.data import DataLoader
device = torch.device('cuda:6') 
train_dataloader = DataLoader(train_dataset, batch_size=11, shuffle=True)
test_dataloader = DataLoader(val_dataset, batch_size=11, shuffle=True)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
model.to(device)  
model.train()
model.to(device)  
from transformers import AdamW
MAX_SEQ_LEN = 128
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)

# Fields

label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True,
                           fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)