frameBERT_dir = '/disk/data/models/frameBERT/frameBERT_en' frameBERT = FrameBERT.from_pretrained(frameBERT_dir, num_senses=len(bert_io.sense2idx), num_args=len(bert_io.bio_arg2idx), lufrmap=bert_io.lufrmap, frargmap=bert_io.bio_frargmap) frameBERT.to(device) frameBERT.eval() # In[6]: print('... loading FN data') tic() trn, dev, tst = dataio.load_data(srl='framenet', language='en', exem=True) # trn = random.sample(trn, k=500) # dev = random.sample(trn, k=100) # tst = random.sample(tst, k=100) print('... converting FN data to BERT') trn_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(trn) dev_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(dev) tst_data = bert_io.convert_to_bert_input_JointShallowSemanticParsing(tst) with open('./koreanframenet/resource/info/fn1.7_frame2idx.json', 'r') as f: frame2idx = json.load(f) with open('./koreanframenet/resource/info/fn1.7_frame_definitions.json', 'r') as f: frame2definition = json.load(f) def_data, def_y = bert_io.convert_to_bert_input_label_definition(
# print('\t(ko):', len(trn)) # print('BATCH_SIZE:', batch_size) # print('MAX_LEN:', MAX_LEN) # print('') # bert_io = utils.for_BERT(mode='train', srl=srl, language=language, masking=masking, fnversion=fnversion, pretrained=PRETRAINED_MODEL) # train() # # (3) fine-tuning by Korean FrameNet # In[9]: # by 100% model_dir = '/disk/data/models/dict_framenet/mulModel-100/' trn, dev, tst = dataio.load_data(srl=srl, language='ko') # trn = random.sample(trn, k=20) epochs = 50 print('\nFineTuning Multilingual') print('### TRAINING') print('MODEL:', srl) print('LANGUAGE:', language) print('PRETRAINED BERT:', PRETRAINED_MODEL) print('training data:') print('\t(ko):', len(trn)) print('BATCH_SIZE:', batch_size) print('MAX_LEN:', MAX_LEN) print('') bert_io = utils.for_BERT(mode='train',