def train(opts): tokenizer = BertTokenizer.from_pretrained(opts.pretrained_path) # get dataset dataset = TASK_NAMES[opts.task_name](opts.data_dir, tokenizer, opts.max_seq_len) X_train, y_train = dataset.get_train_datasets() X_dev, y_dev = dataset.get_dev_datasets() opts.num_labels = len(dataset.get_labels()) # build model optimizer = tf.keras.optimizers.Adam(lr=opts.lr, epsilon=1e-08) model = BertForSequenceClassification().build(opts) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() # callbacks: save model filepath = os.path.join(opts.save_dir, "{epoch:02d}-{val_acc:.4f}.hdf5") checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=False, mode='max') # callbacks: tensorboard tensorboard_dir = os.path.join(opts.log_dir, datetime.now().strftime("%Y%m%d-%H%M")) tensorboard = TensorBoard(log_dir=tensorboard_dir) model.fit(X_train, y_train, batch_size=opts.batch_size, epochs=opts.epochs, validation_data=(X_dev, y_dev), shuffle=True, callbacks=[checkpoint, tensorboard]) X_test, y_test = dataset.get_test_datasets() score, acc = model.evaluate(X_test, y_test, batch_size=opts.batch_size) print('test score:', score) print('test accuracy:', acc)
'SEP': 59, 'PAD': 60, 'Other': 61 } idx_to_event = {value: key for key, value in event_type_dict.items()} idx_to_role = {value: key for key, value in role_type_dict.items()} idx_to_entity = {value: key for key, value in entity_type_dict.items()} print(event_type_dict) print(entity_type_dict) print(role_type_dict) """set up tokenizer""" tokenizer = BertTokenizer.from_pretrained('bert-base-cased') """set up model""" model = BertForSequenceClassification.from_pretrained( './model_save_ACE05_doc_time_order_pretrain_batch16') model.cuda(2) def trim_batch(input_ids, pad_token_id, role_type_ids, entity_type_ids, labels, attention_mask=None): """Remove columns that are populated exclusively by pad_token_id""" keep_column_mask = input_ids.ne(pad_token_id).any(dim=0) if attention_mask is None: return (input_ids[:, keep_column_mask], None, role_type_ids[:, keep_column_mask], entity_type_ids[:, keep_column_mask], labels)
# We'll take training samples in random order. # train_dataloader = DataLoader( # train_dataset, # The training samples. # sampler = RandomSampler(train_dataset), # Select batches randomly # batch_size = batch_size # Trains with this batch size. # ) # For validation the order doesn't matter, so we'll just read them sequentially. validation_dataloader = DataLoader( val_dataset, # The validation samples. sampler = SequentialSampler(val_dataset), # Pull out batches sequentially. batch_size = batch_size # Evaluate with this batch size. ) """setup model, optimizer""" model = BertForSequenceClassification.from_pretrained('./model_save_pretrain_time_classifier_temp') # optimizer = AdamW(model.parameters(), lr=1e-5) # model.config = model.config.from_dict(pretrain_config) """test""" # outputs = model(b_input_ids, attention_mask=b_input_mask,role_type_ids=b_role_type_ids,entity_type_ids=b_entity_type_ids, labels=b_labels) # input_sent = "The death toll climbed to 99 on Sunday after a suicide car bomb exploded Friday in the middle of a group of men playing volleyball in northwest Pakistan, police said." # input_tokens = [ # "The", # "death", # "toll", # "climbed", # "to", # "99", # "on", # "Sunday", # "after",
from tqdm import tqdm from configuration_bert import BertConfig from modeling_bert import BertForSequenceClassification from torch.utils.data import DataLoader # Load Data batch_size = 32 train_dataset, val_dataset = patents('./') train_dataloader = DataLoader(train_dataset, batch_size=batch_size) valid_dataloader = DataLoader(val_dataset, batch_size=batch_size) # Load model config = BertConfig() config.num_labels = 4 model = BertForSequenceClassification.from_pretrained('bert-base-uncased', config=config) # Training optimizer = optim.Adam(model.parameters(), lr=2e-5, eps=1e-8) device = 'cuda' if torch.cuda.is_available() else 'cpu' epoches = 8 model.to(device) def accuracy(logits, labels): pred_labels = F.log_softmax(logits, dim=-1).argmax(-1) return torch.eq(pred_labels, labels).sum().item() / len(labels) def train(): train_loss = 0
# We'll take training samples in random order. train_dataloader = DataLoader( train_dataset, # The training samples. sampler = RandomSampler(train_dataset), # Select batches randomly batch_size = batch_size # Trains with this batch size. ) # For validation the order doesn't matter, so we'll just read them sequentially. validation_dataloader = DataLoader( val_dataset, # The validation samples. sampler = SequentialSampler(val_dataset), # Pull out batches sequentially. batch_size = batch_size # Evaluate with this batch size. ) """setup model, optimizer""" model = BertForSequenceClassification.from_pretrained(pretrained_model_name,return_dict=True,output_hidden_states = True, num_labels = 4) optimizer = AdamW(model.parameters(), lr=1e-5) # model.config = model.config.from_dict(pretrain_config) """test""" # outputs = model(b_input_ids, attention_mask=b_input_mask,role_type_ids=b_role_type_ids,entity_type_ids=b_entity_type_ids, labels=b_labels) # input_sent = "The death toll climbed to 99 on Sunday after a suicide car bomb exploded Friday in the middle of a group of men playing volleyball in northwest Pakistan, police said." # input_tokens = [ # "The", # "death", # "toll", # "climbed", # "to", # "99", # "on", # "Sunday", # "after",
pretrain_config['use_return_dict'] = True pretrain_config['output_hidden_states'] = True pretrain_config['num_labels'] = len(event_type_dict) print('event_type_dict length:',len(event_type_dict)) configuration = BertConfig.from_dict(pretrain_config) configuration.update(pretrain_config) # print(configuration) # quit() # pretrained_model_from_ACE = './model_save_ACE05_Haoyang_4tuple_finetune_on_doctimeorder_spanBert_base' # pretrained_model_from_ACE = './model_save_ACE05_doc_time_order_pretrain_batch16' # model = BertForSequenceClassification.from_pretrained('bert-base-cased',config=configuration) # model = BertForSequenceClassification.from_pretrained(pretrained_model_from_ACE,config = configuration) model = BertForSequenceClassification.from_pretrained(pretrained_model_name,config=configuration) # model = BertForSequenceClassification.from_pretrained('bert-large-cased-whole-word-masking',config=configuration) optimizer = AdamW(model.parameters(), lr=1e-5) # model.config = model.config.from_dict(pretrain_config) # use cuda if torch.cuda.is_available(): dev = "cuda:2" else: dev = "cpu" CUDA_VISIBLE_DEVICES=0,1,2 device = torch.device(dev)
pretrain_config['add_cross_attention'] = False pretrain_config['use_return_dict'] = True pretrain_config['output_hidden_states'] = True pretrain_config['num_labels'] = len(event_type_dict) print('event_type_dict length:', len(event_type_dict)) configuration = BertConfig.from_dict(pretrain_config) configuration.update(pretrain_config) # print(configuration) # quit() pretrained_model_from_ACE = './model_save_ACE05_Haoyang_4tuple_pretrain_with_trigger_role_embed_bert_base_9_8' # pretrained_model_from_ACE = './model_save_ACE05_doc_time_order_pretrain_batch16' # model = BertForSequenceClassification.from_pretrained('bert-base-cased',config=configuration) # model = BertForSequenceClassification.from_pretrained(pretrained_model_from_ACE,config = configuration) model = BertForSequenceClassification.from_pretrained( pretrained_model_from_ACE) # model = BertForSequenceClassification.from_pretrained('bert-large-cased-whole-word-masking',config=configuration) optimizer = AdamW(model.parameters(), lr=1e-5) # model.config = model.config.from_dict(pretrain_config) # use cuda if torch.cuda.is_available(): dev = "cuda:2" else: dev = "cpu" CUDA_VISIBLE_DEVICES = 0, 1, 2 device = torch.device(dev)