示例#1
0
    def run(self, config, from_file=False):
        working_dir = os.path.expanduser(config.get('Data', 'working_dir'))
        tgt_path = working_dir + '/' + 'tgt.parse'
        ref_path = working_dir + '/' + 'ref.parse'

        if os.path.exists(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out'):
            print("Alignments already exist.\n Aligner will not run.")
            return

        targets = StanfordParseLoader.parsed_sentences(tgt_path)
        references = StanfordParseLoader.parsed_sentences(ref_path)

        aligner = AlignerStanford('english')
        alignments = []

        for i, sentence in enumerate(targets):
            alignments.append(aligner.align(sentence, references[i]))

        output = codecs.open(os.path.expanduser(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out'), 'w', 'utf-8')

        for i, alignment in enumerate(alignments):
            print('Sentence #' + str(i + 1), file=output)

            for a in sorted(alignment[0], key=lambda x: x[0]):
                output.write('[' + str(targets[i][a[0] - 1].index) + ', ' + str(references[i][a[1] - 1].index) + ']' + ' : ' +
                             '[' + targets[i][a[0] - 1].form + ', ' + references[i][a[1] - 1].form + ']' + ' : ' +
                             alignment[1][(a[0], a[1])] + '\n')

            output.write('\n')
        output.close()
示例#2
0
    def get(self, config, from_file=False):

        working_dir = os.path.expanduser(config.get('Data', 'working_dir'))

        result_tgt = StanfordParseLoader.parsed_sentences(working_dir + '/' + 'tgt.parse')
        result_ref = StanfordParseLoader.parsed_sentences(working_dir + '/' + 'ref.parse')

        AbstractProcessor.set_result_tgt(self, result_tgt)
        AbstractProcessor.set_result_ref(self, result_ref)
示例#3
0
    def run(self, config, from_file=False):
        working_dir = os.path.expanduser(config.get('Data', 'working_dir'))
        tgt_path = working_dir + '/' + 'tgt.parse'
        ref_path = working_dir + '/' + 'ref.parse'

        if os.path.exists(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford-context-diff.out'):
            print("Context difference already compiled.\n Context difference compiler will not run.")
            return

        reader = CobaltAlignReader()

        alignment_result = reader.read(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford.out')
        targets = StanfordParseLoader.parsed_sentences(tgt_path)
        references = StanfordParseLoader.parsed_sentences(ref_path)

        compiler = ContextInfoCompiler('english')
        info = []

        for i, sentence in enumerate(targets):
            info.append(compiler.compile_context_info(sentence, references[i], alignment_result[i][0]))

        output = codecs.open(os.path.expanduser(working_dir + '/' + tgt_path.split('/')[-1] + '.' + ref_path.split('/')[-1] + '.cobalt-align-stanford-context-diff.out'), 'w', 'utf-8')

        for i, context_info in enumerate(info):
            print('Sentence #' + str(i + 1), file=output)

            for j, a in enumerate(alignment_result[i][0]):
                output.write('[' + str(targets[i][a[0] - 1].index) + ', ' + str(references[i][a[1] - 1].index) + ']' + ' : ')
                output.write('[' + targets[i][a[0] - 1].form + ', ' + references[i][a[1] - 1].form + ']' + ' : ')
                output.write(alignment_result[i][2][j] + ' : ')
                output.write('srcDiff=' + ','.join(context_info[j]['srcDiff']) + ';')
                output.write('srcCon=' + ','.join(context_info[j]['srcCon']) + ';')
                output.write('tgtDiff=' + ','.join(context_info[j]['tgtDiff']) + ';')
                output.write('tgtCon=' + ','.join(context_info[j]['tgtCon']) + '\n')

            output.write('\n')
        output.close()
示例#4
0
import codecs
import sys

from utils.parsed_sentences_loader import ParsedSentencesLoader
from utils.stanford_format import StanfordParseLoader
from utils.conll_format import CONNL
from alignment.context_evidence import ContextEvidence

with codecs.open('data_test/test.parse', 'r', 'utf8') as f:
    text = f.read()

loader = ParsedSentencesLoader()
sentences = loader.load(text)
parsed = []

for sentence in sentences['sentences']:
    parsed.append(StanfordParseLoader.process_parse_result(sentence))


sys.exit()
parsed = CONNL.load('/home/marina/workspace/data/TRJuly/txtfile.output.tok.parse')
print(str(len(parsed)))

with codecs.open('data_test/test.parse.out', 'w', 'utf8') as o:
    for i, sentence in enumerate(parsed):
        o.write('Sentence: {}\n'.format(i + 1))
        for word in sentence:
            o.write('{}\t{}\t{}\t{}\n'.format(word.index, word.form, word.dep, -1 if word.head is None else word.head.index))
        o.write('\n')
o.close()
示例#5
0
import codecs

from utils.meteor_align_reader import MeteorAlignReader
from utils.stanford_format import StanfordParseLoader
from alignment.context_evidence import ContextEvidence


parsed_target = StanfordParseLoader.parsed_sentences('data_test/tgt.parse')
parsed_ref = StanfordParseLoader.parsed_sentences('data_test/ref.parse')

meteor_alignments = MeteorAlignReader.read('data_test/tgt.meteor-align.out')
alignments = MeteorAlignReader.alignments(meteor_alignments)

context = ContextEvidence()

for i, alignment in enumerate(alignments):
    for word_pair in alignment:
        word_pair.context_difference = context.context_differences(word_pair.left_word,
                                                                   word_pair.right_word,
                                                                   parsed_target[i],
                                                                   parsed_ref[i],
                                                                   meteor_alignments[i][0])