# Define the sentence encoder sentence_encoder = opennre.encoder.BERTEntityEncoder( max_length=80, pretrain_path=os.path.join(root_path, 'pretrain/bert-base-uncased')) # Define the model model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) # Define the whole training framework framework = opennre.framework.SentenceRE( train_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_train.txt'), val_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_val.txt'), test_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_val.txt'), model=model, ckpt=ckpt, batch_size=64, # Modify the batch size w.r.t. your device max_epoch=10, lr=2e-5, opt='adamw') # Train the model framework.train_model() # Test the model framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) # Print the result print('Accuracy on test set: {}'.format(result['acc']))
import torch import numpy as np import json import opennre from opennre import encoder, model, framework ckpt = 'ckpt/semeval_bertentity_softmax.pth.tar' rel2id = json.load(open('benchmark/semeval/semeval_rel2id.json')) sentence_encoder = opennre.encoder.BERTEntityEncoder( max_length=80, pretrain_path='pretrain/bert-base-uncased') model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) framework = opennre.framework.SentenceRE( train_path='benchmark/semeval/semeval_train.txt', val_path='benchmark/semeval/semeval_val.txt', test_path='benchmark/semeval/semeval_test.txt', model=model, ckpt=ckpt, batch_size=64, max_epoch=10, lr=3e-5, opt='bert_adam') # Train framework.train_model(metric='micro_f1') # Test framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) print('Accuracy on test set: {}'.format(result['acc'])) print('Micro Precision: {}'.format(result['micro_p'])) print('Micro Recall: {}'.format(result['micro_r'])) print('Micro F1: {}'.format(result['micro_f1']))
model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) # Define the whole training framework framework = opennre.framework.SentenceRE(train_path=args.train_file, val_path=args.val_file, test_path=args.test_file, model=model, ckpt=ckpt, batch_size=args.batch_size, max_epoch=args.max_epoch, lr=args.lr, weight_decay=args.weight_decay, opt='sgd') # Train the model if not args.only_test: framework.train_model(args.metric) # Test framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) # Print the result logging.info('Test set results:') if args.metric == 'acc': logging.info('Accuracy: {}'.format(result['acc'])) else: logging.info('Micro precision: {}'.format(result['micro_p'])) logging.info('Micro recall: {}'.format(result['micro_r'])) logging.info('Micro F1: {}'.format(result['micro_f1']))
raise NotImplementedError # Define the model model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) # Define the whole training framework framework = opennre.framework.SentenceRE(train_path=args.train_file, val_path=args.val_file, test_path=args.test_file, model=model, ckpt=ckpt, batch_size=args.batch_size, max_epoch=args.max_epoch, lr=args.lr, opt='adamw') # Train the model if not args.only_test: framework.train_model('micro_f1') # Test framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) # Print the result logging.info('Test set results:') logging.info('Accuracy: {}'.format(result['acc'])) logging.info('Micro precision: {}'.format(result['micro_p'])) logging.info('Micro recall: {}'.format(result['micro_r'])) logging.info('Micro F1: {}'.format(result['micro_f1']))
print('args.batch_size,', args.batch_size) # Define the whole training framework framework = opennre.framework.SentenceRE( # SentenceRE.SentenceRELoader.SentenceREDataset train_path=args. train_file, # opennre/framework/data_loader.py:106 # num_workers=0 val_path=args.val_file, test_path=args.test_file, model=model, ckpt=ckpt, batch_size=args.batch_size, # todo bs / num_workers=0 max_epoch=args.max_epoch, lr=args.lr, opt='adamw') # Train the model if not args.only_test: framework.train_model('micro_f1') # todo train val print('train-model saved') # Test framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) # Print the result logging.info('Test set results:') logging.info('Accuracy: {}'.format(result['acc'])) logging.info('Micro precision: {}'.format(result['micro_p'])) logging.info('Micro recall: {}'.format(result['micro_r'])) logging.info('Micro F1: {}'.format(result['micro_f1']))
def train(args): # Some basic settings # root_path = '.' root_path = args.data_dir sys.path.append(root_path) if not os.path.exists('ckpt'): os.mkdir('ckpt') if len(args.ckpt) == 0: args.ckpt = '{}_{}_{}'.format(args.dataset, args.pretrain_path.split('/')[-1], args.pooler) ckpt = os.path.join(args.model_dir, 'ckpt/{}.pth.tar'.format(args.ckpt)) if args.dataset != 'none': try: opennre.download(args.dataset, root_path=root_path) except: pass args.train_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_train.txt'.format(args.dataset)) args.val_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_val.txt'.format(args.dataset)) args.test_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_test.txt'.format(args.dataset)) if not os.path.exists(args.test_file): logging.warn( "Test file {} does not exist! Use val file instead".format( args.test_file)) args.test_file = args.val_file args.rel2id_file = os.path.join(root_path, 'benchmark', args.dataset, '{}_rel2id.json'.format(args.dataset)) if args.dataset == 'wiki80': args.metric = 'acc' else: args.metric = 'micro_f1' else: if not (os.path.exists(args.train_file) and os.path.exists( args.val_file) and os.path.exists(args.test_file) and os.path.exists(args.rel2id_file)): raise Exception( '--train_file, --val_file, --test_file and --rel2id_file are not specified or files do not exist. Or specify --dataset' ) logging.info('Arguments:') for arg in vars(args): logging.info(' {}: {}'.format(arg, getattr(args, arg))) # rel2id = json.load(open(args.rel2id_file)) rel2id = json.load( open( os.path.join(root_path, 'benchmark', args.dataset, 'finre_rel2id.json'))) # Define the sentence encoder if args.pooler == 'entity': sentence_encoder = opennre.encoder.BERTEntityEncoder( max_length=args.max_length, pretrain_path=args.pretrain_path, mask_entity=args.mask_entity) elif args.pooler == 'cls': sentence_encoder = opennre.encoder.BERTEncoder( max_length=args.max_length, pretrain_path=args.pretrain_path, mask_entity=args.mask_entity) else: raise NotImplementedError # Define the model model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id) # Define the whole training framework framework = opennre.framework.SentenceRE(train_path=args.train_file, val_path=args.val_file, test_path=args.test_file, model=model, ckpt=ckpt, batch_size=args.batch_size, max_epoch=args.max_epoch, lr=args.lr, opt='adamw') # Train the model if not args.only_test: framework.train_model('micro_f1') # Test framework.load_state_dict(torch.load(ckpt)['state_dict']) result = framework.eval_model(framework.test_loader) # Print the result logging.info('Test set results:') logging.info('Accuracy: {}'.format(result['acc'])) logging.info('Micro precision: {}'.format(result['micro_p'])) logging.info('Micro recall: {}'.format(result['micro_r'])) logging.info('Micro F1: {}'.format(result['micro_f1']))