示例#1
0
def download_dataset(args):
    if args.dataset is not None:
        if args.dataset in ['wiki80', 'semeval']:
            opennre.download(args.dataset, root_path=args.root_path)
        elif args.dataset == 'tacred':
            logging.warning(
                'TACRED is released via the Linguistic Data Consortium (LDC). Please download it from https://catalog.ldc.upenn.edu/LDC2018T24'
            )
        else:
            raise Exception(
                'For sentence-level RE, Dataset must be one of [`wiki80`, `tacred`, `semeval`].'
            )
import sys, json
import torch
import os
import numpy as np
import opennre
from opennre import encoder, model, framework

# Some basic settings
root_path = '.'
sys.path.append(root_path)
if not os.path.exists('ckpt'):
    os.mkdir('ckpt')
ckpt = 'ckpt/wiki80_bertentity_softmax.pth.tar'

# Check data
opennre.download('wiki80', root_path=root_path)
opennre.download('bert_base_uncased', root_path=root_path)
rel2id = json.load(
    open(os.path.join(root_path, 'benchmark/wiki80/wiki80_rel2id.json')))

# Define the sentence encoder
sentence_encoder = opennre.encoder.BERTEntityEncoder(
    max_length=80,
    pretrain_path=os.path.join(root_path, 'pretrain/bert-base-uncased'))

# Define the model
model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)

# Define the whole training framework
framework = opennre.framework.SentenceRE(
    train_path=os.path.join(root_path, 'benchmark/wiki80/wiki80_train.txt'),
示例#3
0
                    help='Weight decay')
parser.add_argument('--max_epoch',
                    default=100,
                    type=int,
                    help='Max number of training epochs')

args = parser.parse_args()

# Some basic settings
root_path = '.'
if not os.path.exists('ckpt'):
    os.mkdir('ckpt')
ckpt = 'ckpt/{}.pth.tar'.format(args.ckpt)

# Check data
opennre.download('nyt10', root_path=root_path)
opennre.download('glove', root_path=root_path)
rel2id = json.load(
    open(os.path.join(root_path, 'benchmark/nyt10/nyt10_rel2id.json')))
wordi2d = json.load(
    open(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_word2id.json')))
word2vec = np.load(
    os.path.join(root_path, 'pretrain/glove/glove.6B.50d_mat.npy'))

# Define the sentence encoder
sentence_encoder = opennre.encoder.PCNNEncoder(token2id=wordi2d,
                                               max_length=120,
                                               word_size=50,
                                               position_size=5,
                                               hidden_size=230,
                                               blank_padding=True,
                    type=int,
                    help='Max number of training epochs')

args = parser.parse_args()

# Some basic settings
root_path = '.'
sys.path.append(root_path)
if not os.path.exists('./ckpt'):
    os.mkdir('./ckpt')
if len(args.ckpt) == 0:
    args.ckpt = '{}_{}'.format(args.dataset, 'cnn')
ckpt = 'ckpt/{}.pth.tar'.format(args.ckpt)

if args.dataset != 'none':
    opennre.download(args.dataset, root_path=root_path)
    args.train_file = os.path.join(root_path, 'benchmark', args.dataset,
                                   '{}_train.txt'.format(args.dataset))
    args.val_file = os.path.join(root_path, 'benchmark', args.dataset,
                                 '{}_val.txt'.format(args.dataset))
    args.test_file = os.path.join(root_path, 'benchmark', args.dataset,
                                  '{}_test.txt'.format(args.dataset))
    args.rel2id_file = os.path.join(root_path, 'benchmark', args.dataset,
                                    '{}_rel2id.json'.format(args.dataset))
    if args.dataset == 'wiki80':
        args.metric = 'acc'
    else:
        args.metric = 'micro_f1'
else:
    if not (os.path.exists(args.train_file) and os.path.exists(args.val_file)
            and os.path.exists(args.test_file)
示例#5
0
def download_pretrain(args):
    if 'bert' in args.encoder:
        opennre.download('bert_base_uncased', root_path=args.root_path)
    elif 'cnn' in args.encoder:
        opennre.download('glove', root_path=args.root_path)
示例#6
0
import sys, json
import torch
import os
import numpy as np
import opennre
from opennre import encoder, model, framework

# Some basic settings
root_path = '.'
if not os.path.exists('ckpt'):
    os.mkdir('ckpt')
ckpt = 'ckpt/wiki80_cnn_softmax.pth.tar'

# Check data
opennre.download('wiki80', root_path=root_path)
opennre.download('glove', root_path=root_path)
rel2id = json.load(open(os.path.join(root_path, 'benchmark/wiki80/wiki80_rel2id.json')))
wordi2d = json.load(open(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_word2id.json')))
word2vec = np.load(os.path.join(root_path, 'pretrain/glove/glove.6B.50d_mat.npy'))

# Define the sentence encoder
sentence_encoder = opennre.encoder.CNNEncoder(
    token2id=wordi2d,
    max_length=40,
    word_size=50,
    position_size=5,
    hidden_size=230,
    blank_padding=True,
    kernel_size=3,
    padding_size=1,
    word2vec=word2vec,
示例#7
0
def train(args):
    # Some basic settings
    # root_path = '.'
    root_path = args.data_dir
    sys.path.append(root_path)
    if not os.path.exists('ckpt'):
        os.mkdir('ckpt')
    if len(args.ckpt) == 0:
        args.ckpt = '{}_{}_{}'.format(args.dataset,
                                      args.pretrain_path.split('/')[-1],
                                      args.pooler)
    ckpt = os.path.join(args.model_dir, 'ckpt/{}.pth.tar'.format(args.ckpt))

    if args.dataset != 'none':
        try:
            opennre.download(args.dataset, root_path=root_path)
        except:
            pass
        args.train_file = os.path.join(root_path, 'benchmark', args.dataset,
                                       '{}_train.txt'.format(args.dataset))
        args.val_file = os.path.join(root_path, 'benchmark', args.dataset,
                                     '{}_val.txt'.format(args.dataset))
        args.test_file = os.path.join(root_path, 'benchmark', args.dataset,
                                      '{}_test.txt'.format(args.dataset))
        if not os.path.exists(args.test_file):
            logging.warn(
                "Test file {} does not exist! Use val file instead".format(
                    args.test_file))
            args.test_file = args.val_file
        args.rel2id_file = os.path.join(root_path, 'benchmark', args.dataset,
                                        '{}_rel2id.json'.format(args.dataset))
        if args.dataset == 'wiki80':
            args.metric = 'acc'
        else:
            args.metric = 'micro_f1'
    else:
        if not (os.path.exists(args.train_file) and os.path.exists(
                args.val_file) and os.path.exists(args.test_file)
                and os.path.exists(args.rel2id_file)):
            raise Exception(
                '--train_file, --val_file, --test_file and --rel2id_file are not specified or files do not exist. Or specify --dataset'
            )

    logging.info('Arguments:')
    for arg in vars(args):
        logging.info('    {}: {}'.format(arg, getattr(args, arg)))

    # rel2id = json.load(open(args.rel2id_file))
    rel2id = json.load(
        open(
            os.path.join(root_path, 'benchmark', args.dataset,
                         'finre_rel2id.json')))

    # Define the sentence encoder
    if args.pooler == 'entity':
        sentence_encoder = opennre.encoder.BERTEntityEncoder(
            max_length=args.max_length,
            pretrain_path=args.pretrain_path,
            mask_entity=args.mask_entity)
    elif args.pooler == 'cls':
        sentence_encoder = opennre.encoder.BERTEncoder(
            max_length=args.max_length,
            pretrain_path=args.pretrain_path,
            mask_entity=args.mask_entity)
    else:
        raise NotImplementedError

    # Define the model
    model = opennre.model.SoftmaxNN(sentence_encoder, len(rel2id), rel2id)

    # Define the whole training framework
    framework = opennre.framework.SentenceRE(train_path=args.train_file,
                                             val_path=args.val_file,
                                             test_path=args.test_file,
                                             model=model,
                                             ckpt=ckpt,
                                             batch_size=args.batch_size,
                                             max_epoch=args.max_epoch,
                                             lr=args.lr,
                                             opt='adamw')

    # Train the model
    if not args.only_test:
        framework.train_model('micro_f1')

    # Test
    framework.load_state_dict(torch.load(ckpt)['state_dict'])
    result = framework.eval_model(framework.test_loader)

    # Print the result
    logging.info('Test set results:')
    logging.info('Accuracy: {}'.format(result['acc']))
    logging.info('Micro precision: {}'.format(result['micro_p']))
    logging.info('Micro recall: {}'.format(result['micro_r']))
    logging.info('Micro F1: {}'.format(result['micro_f1']))