def add_JAMR_align(instances,aligned_amr_file):
    comments,amr_strings = readAMR(aligned_amr_file)
    for i in range(len(instances)):
        amr = AMR.parse_string(amr_strings[i])
        alignment = Aligner.readJAMRAlignment(amr,comments[i]['alignments'])
        ggraph = SpanGraph.init_ref_graph(amr,alignment,instances[i].tokens)
        ggraph.pre_merge_netag(instances[i])
        #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples())
        instances[i].addAMR(amr)
        instances[i].addGoldGraph(ggraph)
Exemplo n.º 2
0
def add_JAMR_align(instances, aligned_amr_file):
    comments, amr_strings = readAMR(aligned_amr_file)
    for i in range(len(instances)):
        amr = AMR.parse_string(amr_strings[i])
        alignment = Aligner.readJAMRAlignment(amr, comments[i]['alignments'])
        ggraph = SpanGraph.init_ref_graph(amr, alignment, instances[i].tokens)
        ggraph.pre_merge_netag(instances[i])
        #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples())
        instances[i].addAMR(amr)
        instances[i].addGoldGraph(ggraph)
Exemplo n.º 3
0
def _init_instances(sent_file, amr_strings, comments):
    print >> log, "Preprocess 1:pos, ner and dependency using stanford parser..."
    proc = StanfordCoreNLP()
    instances = proc.parse(sent_file)

    print >> log, "Preprocess 2:adding amr and generating gold graph"
    assert len(instances) == len(amr_strings)
    for i in range(len(instances)):
        amr = AMR.parse_string(amr_strings[i])
        instances[i].addAMR(amr)
        alignment = Aligner.readJAMRAlignment(amr, comments[i]['alignments'])
        ggraph = SpanGraph.init_ref_graph(amr, alignment, comments[i]['snt'])
        ggraph.pre_merge_netag(instances[i])
        instances[i].addGoldGraph(ggraph)

    return instances
def _init_instances(sent_file,amr_strings,comments):
    print >> log, "Preprocess 1:pos, ner and dependency using stanford parser..."
    proc = StanfordCoreNLP()
    instances = proc.parse(sent_file)
    
    
    print >> log, "Preprocess 2:adding amr and generating gold graph"
    assert len(instances) == len(amr_strings)
    for i in range(len(instances)):
        amr = AMR.parse_string(amr_strings[i])
        instances[i].addAMR(amr)
        alignment = Aligner.readJAMRAlignment(amr,comments[i]['alignments'])
        ggraph = SpanGraph.init_ref_graph(amr,alignment,comments[i]['snt'])
        ggraph.pre_merge_netag(instances[i])
        instances[i].addGoldGraph(ggraph)

    return instances
def preprocess(amr_file,START_SNLP=True):
    '''nasty function'''
    aligned_amr_file = amr_file + '.aligned'
    if os.path.exists(aligned_amr_file):
        comments,amr_strings = readAMR(aligned_amr_file)
    else:
        comments,amr_strings = readAMR(amr_file)
    #comments,amr_strings = readAMR(aligned_amr_file)
    sentences = [c['snt'] for c in comments]
    tmp_sentence_file = amr_file+'.sent'
    if not os.path.exists(tmp_sentence_file):
        _write_sentences(tmp_sentence_file,sentences)

    print >> log, "pos, ner and dependency..."
    proc = StanfordCoreNLP()
    if START_SNLP: proc.setup()
    instances = proc.parse(tmp_sentence_file)

    tok_amr_filename = amr_file + '.tok'
    if not os.path.exists(tok_amr_filename):
        _write_tok_amr(tok_amr_filename,amr_file,instances)
    
    SpanGraph.graphID = 0
    for i in range(len(instances)):

        amr = AMR.parse_string(amr_strings[i])
        
        alignment = Aligner.readJAMRAlignment(amr,comments[i]['alignments'])
        ggraph = SpanGraph.init_ref_graph(amr,alignment,instances[i].tokens)
        #ggraph.pre_merge_netag(instances[i])
        #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples())
        instances[i].addAMR(amr)
        instances[i].addGoldGraph(ggraph)
        
    #print >> log, "adding amr"
    #_add_amr(instances,amr_strings)
    #if writeToFile:
    #    output_file = amr_file.rsplit('.',1)[0]+'_dataInst.p'
    #    pickle.dump(instances,open(output_file,'wb'),pickle.HIGHEST_PROTOCOL)
        
    return instances
Exemplo n.º 6
0
def preprocess(amr_file, START_SNLP=True):
    '''nasty function'''
    aligned_amr_file = amr_file + '.aligned'
    if os.path.exists(aligned_amr_file):
        comments, amr_strings = readAMR(aligned_amr_file)
    else:
        comments, amr_strings = readAMR(amr_file)
    #comments,amr_strings = readAMR(aligned_amr_file)
    sentences = [c['snt'] for c in comments]
    tmp_sentence_file = amr_file + '.sent'
    if not os.path.exists(tmp_sentence_file):
        _write_sentences(tmp_sentence_file, sentences)

    print >> log, "pos, ner and dependency..."
    proc = StanfordCoreNLP()
    if START_SNLP: proc.setup()
    instances = proc.parse(tmp_sentence_file)

    tok_amr_filename = amr_file + '.tok'
    if not os.path.exists(tok_amr_filename):
        _write_tok_amr(tok_amr_filename, amr_file, instances)

    SpanGraph.graphID = 0
    for i in range(len(instances)):

        amr = AMR.parse_string(amr_strings[i])

        alignment = Aligner.readJAMRAlignment(amr, comments[i]['alignments'])
        ggraph = SpanGraph.init_ref_graph(amr, alignment, instances[i].tokens)
        #ggraph.pre_merge_netag(instances[i])
        #print >> log, "Graph ID:%s\n%s\n"%(ggraph.graphID,ggraph.print_tuples())
        instances[i].addAMR(amr)
        instances[i].addGoldGraph(ggraph)

    #print >> log, "adding amr"
    #_add_amr(instances,amr_strings)
    #if writeToFile:
    #    output_file = amr_file.rsplit('.',1)[0]+'_dataInst.p'
    #    pickle.dump(instances,open(output_file,'wb'),pickle.HIGHEST_PROTOCOL)

    return instances