Пример #1
0
frameBERT_dir = '/disk/data/models/frameBERT/frameBERT_en'

frameBERT = FrameBERT.from_pretrained(frameBERT_dir,
                                      num_senses=len(bert_io.sense2idx),
                                      num_args=len(bert_io.bio_arg2idx),
                                      lufrmap=bert_io.lufrmap,
                                      frargmap=bert_io.bio_frargmap)
frameBERT.to(device)
frameBERT.eval()

# In[6]:

print('... loading FN data')
tic()
trn, dev, tst = dataio.load_data(srl='framenet', language='en', exem=True)
# trn = random.sample(trn, k=500)
# dev = random.sample(trn, k=100)
# tst = random.sample(tst, k=100)
print('... converting FN data to BERT')
trn_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(trn)
dev_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(dev)
tst_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(tst)

with open('./koreanframenet/resource/info/fn1.7_frame2idx.json', 'r') as f:
    frame2idx = json.load(f)
with open('./koreanframenet/resource/info/fn1.7_frame_definitions.json',
          'r') as f:
    frame2definition = json.load(f)

def_data, def_y = bert_io.convert_to_bert_input_label_definition(
Пример #2
0
# print('\t(ko):', len(trn))
# print('BATCH_SIZE:', batch_size)
# print('MAX_LEN:', MAX_LEN)
# print('')

# bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL)
# train()

# # (3) fine-tuning by Korean FrameNet

# In[9]:

# by 100%

model_dir = '/disk/data/models/dict_framenet/mulModel-100/'
trn, dev, tst = dataio.load_data(srl=srl, language='ko')
# trn = random.sample(trn, k=20)
epochs = 50

print('\nFineTuning Multilingual')
print('### TRAINING')
print('MODEL:', srl)
print('LANGUAGE:', language)
print('PRETRAINED BERT:', PRETRAINED_MODEL)
print('training data:')
print('\t(ko):', len(trn))
print('BATCH_SIZE:', batch_size)
print('MAX_LEN:', MAX_LEN)
print('')

bert_io = utils.for_BERT(mode='train',