Exemplo n.º 1
0
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

# get args from cmdline
parser = get_train_parser()
options = parser.parse_args()

# make transforms using only bert tokenizer!
tokenizer = T5Tokenizer.from_pretrained('t5-base')
# CLS token will work as BOS token
# tokenizer.bos_token = tokenizer.cls_token
# SEP token will work as EOS token
# tokenizer.eos_token = tokenizer.sep_token

# load dataset
dataset = Task71Dataset("train", tokenizer=tokenizer)

collator_fn = Task71aCollatorFeatures(device='cpu')
loader = DataLoader(dataset, batch_size=options.batch_size,
                    drop_last=False, shuffle=True,
                    collate_fn=collator_fn)


# create model
encoder = T5Model.from_pretrained('t5-base')

# change config if you want
# encoder.config.output_hidden_states = True
model = T5ClassificationHead(encoder.encoder, encoder.config.hidden_size,
                               num_classes=2, drop=0.2)
if options.modelckpt is not None:
Exemplo n.º 2
0
        for id, out in zip(ids_list, outs_list):
            csv_writer.writerow([id, int(out)])


DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

# get args from cmdline
parser = get_test_parser()
options = parser.parse_args()

# make transforms using only bert tokenizer!
tokenizer = T5Tokenizer.from_pretrained('t5-base')

# load dataset
test_dataset = Task71Dataset("dev", tokenizer=tokenizer)

collator_fn = Task71aCollatorTest(device='cpu')
test_loader = DataLoader(test_dataset,
                         batch_size=options.batch_size,
                         drop_last=False,
                         shuffle=True,
                         collate_fn=collator_fn)

# create model
model = T5Model.from_pretrained('t5-base')
model = T5ClassificationHead(model.encoder,
                             model.config.hidden_size,
                             num_classes=2,
                             drop=0.2,
                             act='none')