Пример #1
0
    if direction == 'left':
        neighbor.append('*ST*')
    else:
        neighbor.append('*EN*')
    return neighbor


ed = EditDistance(None)

if __name__ == '__main__':
    # annotation_file = sys.argv[1]
    annotations = codecs.open('../web/annotation.txt', 'r', 'utf-8').read().strip()
    sentence_obj_list = []
    for s_idx, sentence in enumerate(annotations.split('===')):
        if sentence.strip() != '':
            S = Sentence(s_idx, '', '', '')
            prev_matches = None
            for sent in sentence.split('\n'):
                if sent.strip() != '':
                    action = sent.split(':')[1].strip()
                    matches = re.findall(r'\[.*?\]', sent.strip())
                    matches = [m[1:-1] for m in matches]
                    if action.strip() == '':
                        S.graphs = []
                        nodes_in_visible_order = []
                        for m_idx, m in enumerate(matches):
                            g = Graph(m_idx)
                            S.graphs.append(g)
                            for w_idx, w in enumerate(m.split()):
                                n = Node(id=len(g.nodes), s=w, en_id=w_idx, de_id=None, lang='en', visible=True)
                                n.visible = True
Пример #2
0
        neighbor.append('*ST*')
    else:
        neighbor.append('*EN*')
    return neighbor


ed = EditDistance(None)

if __name__ == '__main__':
    # annotation_file = sys.argv[1]
    annotations = codecs.open('../web/annotation.txt', 'r',
                              'utf-8').read().strip()
    sentence_obj_list = []
    for s_idx, sentence in enumerate(annotations.split('===')):
        if sentence.strip() != '':
            S = Sentence(s_idx, '', '', '')
            prev_matches = None
            for sent in sentence.split('\n'):
                if sent.strip() != '':
                    action = sent.split(':')[1].strip()
                    matches = re.findall(r'\[.*?\]', sent.strip())
                    matches = [m[1:-1] for m in matches]
                    if action.strip() == '':
                        S.graphs = []
                        nodes_in_visible_order = []
                        for m_idx, m in enumerate(matches):
                            g = Graph(m_idx)
                            S.graphs.append(g)
                            for w_idx, w in enumerate(m.split()):
                                n = Node(id=len(g.nodes),
                                         s=w,
Пример #3
0
        sys.stderr.write('SENT' + str(sent_idx) + '\n')
        input_sent = input_line.strip().split()
        output_items = output_line.strip().split('|')
        output_phrases = [oi.strip() for idx, oi in enumerate(output_items) if idx % 2 == 0 and oi.strip() != '']
        output_sent = ' '.join(output_phrases).split()
        output_spans = get_output_phrase_as_spans(output_phrases)
        output_meta = [tuple(om.split(',wa=')) for idx, om in enumerate(output_items) if idx % 2 != 0]
        input_spans = [tuple([int(i) for i in om[0].split('-')]) for om in output_meta]
        wa_per_span = [[tuple([int(i) for i in a.split('-')]) for a in om[1].split()] for om in output_meta]
        input_tok_group = [-1] * len(input_sent)
        output_tok_group = [-1] * len(output_sent)

        sys.stderr.write('input sent:' + ' '.join(input_sent) + '\n')
        sys.stderr.write('output sent:' + ' '.join(output_sent) + '\n')

        coe_sentence = Sentence(sent_idx, ' '.join(input_sent), ' '.join(output_sent), None)
        coe_sentence.initial_order_by = VIS_LANG
        sent_idx += 1
        assert len(wa_per_span) == len(input_spans) == len(output_spans)
        phrase_dict = {}
        input_coverage = [0] * len(input_sent)
        group_idx = 0
        for idx, (out_span, inp_span, wa) in enumerate(zip(output_spans, input_spans, wa_per_span)):
            out_phrase = output_sent[out_span[0]:out_span[1] + 1]
            inp_phrase = input_sent[inp_span[0]:inp_span[1] + 1]
            # print '\t phrases:', input_sent[inp_span[0]:inp_span[1] + 1], '-', output_sent[out_span[0]:out_span[1] + 1]
            # print '\t phrase spans:', inp_span, '-', out_span
            # print '\twa:', wa
            wa_no_null = insert_epsilon_edge(wa, input_sent[inp_span[0]:inp_span[1] + 1],
                                             output_sent[out_span[0]:out_span[1] + 1])
            sym_coverage, sym_wa = make_symmetric(wa_no_null)
Пример #4
0
            tuple(om.split(',wa=')) for idx, om in enumerate(output_items)
            if idx % 2 != 0
        ]
        input_spans = [
            tuple([int(i) for i in om[0].split('-')]) for om in output_meta
        ]
        wa_per_span = [[
            tuple([int(i) for i in a.split('-')]) for a in om[1].split()
        ] for om in output_meta]
        input_tok_group = [-1] * len(input_sent)
        output_tok_group = [-1] * len(output_sent)

        sys.stderr.write('input sent:' + ' '.join(input_sent) + '\n')
        sys.stderr.write('output sent:' + ' '.join(output_sent) + '\n')

        coe_sentence = Sentence(sent_idx, ' '.join(input_sent),
                                ' '.join(output_sent), None)
        coe_sentence.initial_order_by = VIS_LANG
        sent_idx += 1
        assert len(wa_per_span) == len(input_spans) == len(output_spans)
        phrase_dict = {}
        input_coverage = [0] * len(input_sent)
        group_idx = 0
        for idx, (out_span, inp_span,
                  wa) in enumerate(zip(output_spans, input_spans,
                                       wa_per_span)):
            out_phrase = output_sent[out_span[0]:out_span[1] + 1]
            inp_phrase = input_sent[inp_span[0]:inp_span[1] + 1]
            # print '\t phrases:', input_sent[inp_span[0]:inp_span[1] + 1], '-', output_sent[out_span[0]:out_span[1] + 1]
            # print '\t phrase spans:', inp_span, '-', out_span
            # print '\twa:', wa
            wa_no_null = insert_epsilon_edge(