Пример #1
0
 def feature_to_torch(self, all_data):
     for i, data in enumerate(all_data):
         if "example_id" not in data:
             data["example_id"] = ''.join([
                 random.choice(string.ascii_letters + string.digits)
                 for n in range(8)
             ])
         data["snt_id"] = seq_to_id(self.dicts["word_dict"], data["tok"])[0]
         data["lemma_id"] = seq_to_id(self.dicts["lemma_dict"],
                                      data["lem"])[0]
         data["pos_id"] = seq_to_id(self.dicts["pos_dict"], data["pos"])[0]
         data["ner_id"] = seq_to_id(self.dicts["ner_dict"], data["ner"])[0]
     # when using all_data, batch_size is all
     if self.opt.bert_model:
         data_iterator = BertDataIterator([],
                                          self.opt,
                                          self.dicts["ucca_rel_dict"],
                                          all_data=all_data)
         order, idsBatch, srcBatch, src_charBatch, sourceBatch, srcBertBatch = data_iterator[
             0]
     else:
         data_iterator = DataIterator([],
                                      self.opt,
                                      self.dicts["ucca_rel_dict"],
                                      all_data=all_data)
         order, idsBatch, srcBatch, src_charBatch, sourceBatch = data_iterator[
             0]
         srcBertBatch = None
     return order, idsBatch, srcBatch, src_charBatch, sourceBatch, srcBertBatch, data_iterator
Пример #2
0
def main(opt):
    dicts = read_dicts()
    assert opt.train_from
    with_jamr = "_with_jamr" if opt.jamr else "_without_jamr"
    suffix = ".pickle"+with_jamr+"_processed"
    trainFolderPath = opt.folder+"/training/"
    trainingFilesPath = folder_to_files_path(trainFolderPath,suffix)

    devFolderPath = opt.folder+"/dev/"
    devFilesPath = folder_to_files_path(devFolderPath,suffix)

    testFolderPath = opt.folder+"/test/"
    testFilesPath = folder_to_files_path(testFolderPath,suffix)



    AmrDecoder = parser.AMRProcessors.AMRDecoder(opt,dicts)
    AmrDecoder.eval()
    AmrModel,parameters,optt =  load_old_model(dicts,opt,True)
    opt.start_epoch =  1

    out = "/".join(testFilesPath[0].split("/")[:-2])+ "/model"
    with open(out, 'w') as outfile:
        outfile.write(opt.train_from+"\n")
        outfile.write(str(AmrModel)+"\n")
        outfile.write(str(optt)+"\n")
        outfile.write(str(opt))

    print('processing testing')
    for file in testFilesPath:
        dev_data = DataIterator([file],opt,dicts["rel_dict"],volatile = True)
        concept_scores,rel_scores,output =generate_graph(AmrModel,AmrDecoder,dev_data,dicts,file)

    print('processing validation')
    for file in devFilesPath:
        dev_data = DataIterator([file],opt,dicts["rel_dict"],volatile = True)
        concept_scores,rel_scores,output =generate_graph(AmrModel,AmrDecoder,dev_data,dicts,file)



    print('processing training')
    for file in trainingFilesPath:
        dev_data = DataIterator([file],opt,dicts["rel_dict"],volatile = True)
        concept_scores,rel_scores,output =generate_graph(AmrModel,AmrDecoder,dev_data,dicts,file)
 def feature_to_torch(self, all_data):
     for i, data in enumerate(all_data):
         data["snt_id"] = seq_to_id(self.dicts["word_dict"], data["tok"])[0]
         data["lemma_id"] = seq_to_id(self.dicts["lemma_dict"],
                                      data["lem"])[0]
         data["pos_id"] = seq_to_id(self.dicts["pos_dict"], data["pos"])[0]
         data["ner_id"] = seq_to_id(self.dicts["ner_dict"], data["ner"])[0]
     data_iterator = DataIterator([],
                                  self.opt,
                                  self.dicts["rel_dict"],
                                  volatile=True,
                                  all_data=all_data)
     order, srcBatch, src_sourceBatch = data_iterator[0]
     return order, srcBatch, src_sourceBatch, data_iterator