Exemplo n.º 1
0
   in the meantime, we do preprocess like capitalize the first character of a sentence or normalize digits
'''
import os

from collections import Counter
from nltk.parse import CoreNLPDependencyParser
import numpy as np
import argparse
from tqdm import tqdm

from io_utils import read_yaml, save_pickle, read_pickle
from str_utils import normalize_tok
from vocab import Vocab
from sklearn.model_selection import train_test_split

config = read_yaml('config.yaml')

parser = argparse.ArgumentParser(description='this is a description')
parser.add_argument('--seed',
                    '-s',
                    required=False,
                    type=int,
                    default=config['random_seed'])
args = parser.parse_args()
config['random_seed'] = args.seed
print('seed:', config['random_seed'])
print('the current file path:', os.getcwd())

np.random.seed(config['random_seed'])

data_dir = config['data_dir']
Exemplo n.º 2
0
from io_utils import read_yaml
joint_config = read_yaml('joint_config.yaml')
data_config = read_yaml('data_config.yaml')

import numpy as np
import random
import dynet_config

print('seed:', joint_config['random_seed'])

random.seed(joint_config['random_seed'])
np.random.seed(joint_config['random_seed'])
dynet_config.set(autobatch=1,
                 mem='4096',
                 random_seed=joint_config['random_seed'])

import dynet as dy
import nn
import ops
from dy_utils import ParamManager as pm
from dy_utils import AdamTrainer
from event_eval import EventEval
from io_utils import to_set, get_logger
from shift_reduce import ShiftReduce

logger = get_logger('transition', log_dir='log', log_name='trains.log')

sent_vec_dim = 0
if joint_config['use_sentence_vec']:
    train_sent_file = data_config['train_sent_file']
    test_sent_file = data_config['test_sent_file']
Exemplo n.º 3
0
from flair.data import Sentence
from flair.models import SequenceTagger

from flair.embeddings import CharLMEmbeddings, StackedEmbeddings, BertEmbeddings
import os
import pickle

import numpy as np
from io_utils import read_yaml, read_lines, read_json_lines

data_config = read_yaml('data_config.yaml')

data_dir = data_config['data_dir']
ace05_event_dir = data_config['ace05_event_dir']

train_list = read_json_lines(
    os.path.join(ace05_event_dir, 'train_nlp_ner.json'))
dev_list = read_json_lines(os.path.join(ace05_event_dir, 'dev_nlp_ner.json'))
test_list = read_json_lines(os.path.join(ace05_event_dir, 'test_nlp_ner.json'))

train_sent_file = data_config['train_sent_file']

bert = BertEmbeddings(layers='-1',
                      bert_model_or_path='bert-base-uncased').to('cuda:0')


def save_bert(inst_list, filter_tri=True, name='train'):
    sents = []
    sent_lens = []
    for inst in inst_list:
        words, trigger_list, ent_list, arg_list = inst['nlp_words'], inst[