Exemplo n.º 1
0
def align(srcfile, tgtfile, approx_src_tgt_file):
    # srcfile, tgtfile = tgtfile, srcfile
    options = {
        # source and target files needed by Aligner
        # they can be filenames, arrays of strings or io objects.
        'srcfile': srcfile,
        'targetfile': tgtfile,
        # translations of srcfile and targetfile, not influenced by 'factored'
        # they can be filenames, arrays of strings or io objects, too.
        'srctotarget': [approx_src_tgt_file],
        'targettosrc': [],
        # passing filenames or io object for them in respectly.
        # if not passing anything or assigning None, they will use StringIO to save results.
        'output-src': None, 
        'output-target': None,
        # other options ...
        'log_to': open("/dev/null", "w+")
    }
    a = Aligner(options)
    a.mainloop()
    output_src, output_target = a.results()
    # output_src, output_target is StringIO because options['output-src'] is None
    src = output_src.getvalue().splitlines()  # StringIO member function
    tgt = output_target.getvalue().splitlines()  # array of string
    return (src, tgt)
 def _sameResultForEval(self, de, fr, s2t, t2s):
     options = {
         'srcfile': de,
         'targetfile': fr,
         'srctotarget': s2t,
         'targettosrc': t2s,
         'verbosity': 0,
     }
     a = Aligner(options)
     a.mainloop()
     output_src, output_target = a.results()
     s2t_src = output_src.getvalue().splitlines()
     s2t_trg = output_target.getvalue().splitlines()
     options = {
         'srcfile': fr,
         'targetfile': de,
         'srctotarget': t2s,
         'targettosrc': s2t,
         'verbosity': 0,
     }
     a = Aligner(options)
     a.mainloop()
     output_src, output_target = a.results()
     t2s_src = output_src.getvalue().splitlines()
     t2s_trg = output_target.getvalue().splitlines()
     self.assertEqual(s2t_src, t2s_trg)
     self.assertEqual(s2t_trg, t2s_src)
	def main_test(self, option_function,
			close_file_object = None, remove_file = None):
		test_dir = os.path.dirname(os.path.abspath(__file__))
		eval_dir = os.path.join(test_dir, '..', 'eval')
		result_dir = os.path.join(test_dir, 'result')
		refer_dir = os.path.join(test_dir, 'refer')
		bleualign.log = lambda a, b:None
		compare_files = []
		for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]:
			fr_text = []
			de_text = []
			for filename in os.listdir(eval_dir):
				if filename.startswith(test_set):
					attr = filename.split('.')
					if len(attr) == 3:
						filepath = os.path.join(eval_dir, filename)
						if attr[1] != 'clean':
							if attr[2] == 'fr':
								fr_text.append(filepath)
							elif attr[2] == 'de':
								de_text.append(filepath)
			fr_text.sort()
			de_text.sort()
			test_files = []
			test_files.append((fr_text[0:1], de_text[-3:-2]))
			test_files.append((fr_text, []))
			test_files.append((fr_text[1::3], de_text[-2:-1]))
			test_files.append((fr_text[2:3], de_text[3:4]))
			test_files.append((fr_text[0:1], []))
			test_files.append((fr_text[2:], de_text[:3]))
			test_files.append((fr_text, de_text))
# 			test_files.append(([], [])) add in another test after
# 			test_files.append(([], de_text))
# 			test_files.append(([], de_text[-1:]))
			for fr_file, de_file in test_files:
				srctotarget_file = fr_file
				targettosrc_file = de_file
				output_file = self.output_file_path(srctotarget_file, targettosrc_file)
				output_path = os.path.join(result_dir , output_file)
				options = getattr(self, option_function)(test_argument,
					srctotarget_file, targettosrc_file, output_path)
				a = Aligner(options)
				a.mainloop()
				output_src, output_target = a.results()
				if close_file_object != None:
					getattr(self, close_file_object)([output_src, output_target])
					getattr(self, close_file_object)([options['targetfile']])
					getattr(self, close_file_object)(options['targettosrc'])
					if option_function == 'fileObjectOptions':
						getattr(self, close_file_object)([options['srcfile']])
						getattr(self, close_file_object)(options['srctotarget'])
				refer_path = os.path.join(refer_dir , output_file)
				compare_files.append((output_path + '-s', refer_path + '-s', output_src))
				compare_files.append((output_path + '-t', refer_path + '-t', output_target))
		# compare result with data in refer
		for result_path, refer_path, output_object in compare_files:
			self.cmp_files(result_path, refer_path, output_object)
			if remove_file != None:
				getattr(self, remove_file)(result_path)
	def 對齊(self, 原來, 目標, 原來翻目標, 目標翻原來, 原來對齊=None, 目標對齊=None):
		參數 = self.公家參數.copy()
		參數['srcfile'] = 原來
		參數['targetfile'] = 目標
		參數['srctotarget'] = 原來翻目標
		參數['targettosrc'] = 目標翻原來
		參數['output-src'] = 原來對齊
		參數['output-target'] = 目標對齊

		a = Aligner(參數)
		a.mainloop()
		return a.results()
Exemplo n.º 5
0
	def main_test(self, option_function):
		test_dir = os.path.dirname(os.path.abspath(__file__))
		eval_dir = os.path.join(test_dir, '..', 'eval')
		result_dir = os.path.join(test_dir, 'result')
		refer_dir = os.path.join(test_dir, 'refer')
		bleualign.log = lambda a, b:None
		compare_files = []
		for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]:
			fr_text = []
			de_text = []
			for filename in os.listdir(eval_dir):
				if filename.startswith(test_set):
					attr = filename.split('.')
					if len(attr) == 3:
						filepath = os.path.join(eval_dir, filename)
						if attr[1] != 'clean':
							if attr[2] == 'fr':
								fr_text.append(filepath)
							elif attr[2] == 'de':
								de_text.append(filepath)
			fr_text.sort()
			de_text.sort()
			test_files = []
			test_files.append((fr_text[0:1], de_text[-3:-2], 'articles'))
			test_files.append((fr_text, [], 'sentences'))
			test_files.append((fr_text, de_text, 'sentences'))
			for fr_file, de_file, filter_type in test_files:
				srctotarget_file = fr_file
				targettosrc_file = de_file
				output_file = self.output_file_path(srctotarget_file, targettosrc_file)
				output_path = os.path.join(result_dir , output_file)
				options = getattr(self, option_function)(test_argument, filter_type,
					srctotarget_file, targettosrc_file, output_path)
				a = Aligner(options)
				a.mainloop()
				output_src, output_target = a.results()
				output_src_bad, output_target_bad = a.results_bad()
				if option_function == 'fileObjectOptions':
					output_src.close()
					output_target.close()
					output_src_bad.close()
					output_target_bad.close()
				refer_path = os.path.join(refer_dir , output_file)
				compare_files.append((output_path + '-good-s', refer_path + '-good-s', output_src))
				compare_files.append((output_path + '-good-t', refer_path + '-good-t', output_target))
				compare_files.append((output_path + '-bad-s', refer_path + '-bad-s', output_src_bad))
				compare_files.append((output_path + '-bad-t', refer_path + '-bad-t', output_target_bad))
		for result_path, refer_path, output_object in compare_files:
			self.cmp_files(result_path, refer_path, output_object)
			if option_function.startswith('file'):
				os.remove(result_path)
Exemplo n.º 6
0
	def _sameResultForEval(self,de,fr,s2t,t2s):
		options = {
			'srcfile':de,
			'targetfile':fr,
			'srctotarget':s2t,
			'targettosrc':t2s,
			'verbosity':0,
			}
		a = Aligner(options)
		a.mainloop()
		output_src, output_target = a.results()
		s2t_src = output_src.getvalue().splitlines()
		s2t_trg = output_target.getvalue().splitlines()
		options = {
			'srcfile':fr,
			'targetfile':de,
			'srctotarget':t2s,
			'targettosrc':s2t,
			'verbosity':0,
			}
		a = Aligner(options)
		a.mainloop()
		output_src, output_target = a.results()
		t2s_src = output_src.getvalue().splitlines()
		t2s_trg = output_target.getvalue().splitlines()
		self.assertEqual(s2t_src, t2s_trg)
		self.assertEqual(s2t_trg, t2s_src)
Exemplo n.º 7
0
	def test_no_translation(self):
		self.assertRaises(ValueError, Aligner,
			{'srcfile':self.srcfile, 'targetfile':self.targetfile})
		a=Aligner(
			{'srcfile':self.srcfile, 'targetfile':self.targetfile,
				'no_translation_override':True})
		a.close_file_streams()
		a=Aligner(
			{'srcfile':self.srcfile, 'targetfile':self.targetfile,
			'srctotarget':[self.targetfile]})
		a.close_file_streams()
		a=Aligner(
			{'srcfile':self.srcfile, 'targetfile':self.targetfile,
			'targettosrc':[self.srcfile]})
		a.close_file_streams()
Exemplo n.º 8
0
    def bleu_align(self, srcfile, tgtfile, hyp_src_tgt_file):
        output = StringIO()
        # src_out, tgt_out = StringIO(), StringIO()
        options = {
            'verbosity': 0,
            'srcfile': srcfile,
            'targetfile': tgtfile,
            'srctotarget': [hyp_src_tgt_file],
            'targettosrc': [],
            # 'output': output,
            # 'output-src': src_out, 'output-target': tgt_out,
        }

        a = Aligner(options)
        a.mainloop()
        src_out, tgt_out = a.results()
        srcs = src_out.getvalue().splitlines()
        tgts = tgt_out.getvalue().splitlines()
        return srcs, tgts
Exemplo n.º 9
0
	def test_gale_church(self):
		test_dir = os.path.dirname(os.path.abspath(__file__))
		result_dir = os.path.join(test_dir, 'result')
		refer_dir = os.path.join(test_dir, 'refer')
		bleualign.log = lambda a, b:None
		compare_files = []
		for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]:
			options = load_arguments(['', test_argument, '--srctotarget', '-'])
			output_file = test_set + '-galechurch'
			output_path = os.path.join(result_dir , output_file)
# 			options['output'] = output_path
			a = Aligner(options)
			a.mainloop()
			output_src, output_target = a.results()
			refer_path = os.path.join(refer_dir , output_file)
			compare_files.append((output_path + '-s', refer_path + '-s', output_src))
			compare_files.append((output_path + '-t', refer_path + '-t', output_target))
		# compare result with data in refer
		for result_path, refer_path, output_object in compare_files:
			self.cmp_files(result_path, refer_path, output_object)
Exemplo n.º 10
0
    def bleu_align(self, srcfile, tgtfile, hyp_src_tgt_file=None):
        output = StringIO()
        options = {
            'srcfile': srcfile,
            'targetfile': tgtfile,
            'galechurch': True if hyp_src_tgt_file is None else False,
            'no_translation_override':
            True if hyp_src_tgt_file is None else False,
            'srctotarget': [hyp_src_tgt_file] if hyp_src_tgt_file else [],
            'targettosrc': [],
            'verbosity': 0,
        }

        a = Aligner(options)
        a.mainloop()
        src_out, tgt_out = a.results()

        srcs = src_out.getvalue().splitlines()
        tgts = tgt_out.getvalue().splitlines()

        return srcs, tgts
Exemplo n.º 11
0
 def test_gale_church(self):
     test_dir = os.path.dirname(os.path.abspath(__file__))
     result_dir = os.path.join(test_dir, 'result')
     refer_dir = os.path.join(test_dir, 'refer')
     bleualign.log = lambda a, b: None
     compare_files = []
     for test_set, test_argument in [('eval1957', '-d'),
                                     ('eval1989', '-e')]:
         options = load_arguments(['', test_argument, '--srctotarget', '-'])
         output_file = test_set + '-galechurch'
         output_path = os.path.join(result_dir, output_file)
         # 			options['output'] = output_path
         a = Aligner(options)
         a.mainloop()
         output_src, output_target = a.results()
         refer_path = os.path.join(refer_dir, output_file)
         compare_files.append(
             (output_path + '-s', refer_path + '-s', output_src))
         compare_files.append(
             (output_path + '-t', refer_path + '-t', output_target))
     # compare result with data in refer
     for result_path, refer_path, output_object in compare_files:
         self.cmp_files(result_path, refer_path, output_object)
Exemplo n.º 12
0
options['verbosity'] = 1
options['printempty'] = False
options['output'] = None

jobs = []

for source_document in [d for d in os.listdir(directory) if d.endswith('.' + source_suffix)]:

    source_document = os.path.join(directory, source_document)
    target_document = source_document[:-len(source_suffix)] + target_suffix
    translation_document = source_document[:-len(source_suffix)] + translation_suffix

    # Sanity checks
    for f in source_document, target_document, translation_document:
        if not os.path.isfile(f):
            sys.stderr.write('ERROR: File {0} expected, but not found\n'.format(f))
            exit()

    jobs.append((source_document, target_document, translation_document))

for (source_document,target_document,translation_document) in jobs:

    options['srcfile'] = source_document
    options['targetfile'] = target_document
    options['srctotarget'] = [translation_document]
    options['output-src'] = source_document + '.aligned'
    options['output-target'] = target_document + '.aligned'

    a = Aligner(options)
    a.mainloop()
Exemplo n.º 13
0
jobs = []

for source_document in [
        d for d in os.listdir(directory) if d.endswith('.' + source_suffix)
]:

    source_document = os.path.join(directory, source_document)
    target_document = source_document[:-len(source_suffix)] + target_suffix
    translation_document = source_document[:-len(source_suffix
                                                 )] + translation_suffix

    # Sanity checks
    for f in source_document, target_document, translation_document:
        if not os.path.isfile(f):
            sys.stderr.write(
                'ERROR: File {0} expected, but not found\n'.format(f))
            exit()

    jobs.append((source_document, target_document, translation_document))

for (source_document, target_document, translation_document) in jobs:

    options['srcfile'] = source_document
    options['targetfile'] = target_document
    options['srctotarget'] = [translation_document]
    options['output-src'] = source_document + '.aligned'
    options['output-target'] = target_document + '.aligned'

    a = Aligner(options)
    a.mainloop()
Exemplo n.º 14
0
        # source and target files needed by Aligner
        # they can be filenames, arrays of strings or io objects.
        'srcfile':
        os.path.join(current_path, '..', 'eval', 'eval1989.de'),
        'targetfile':
        os.path.join(current_path, '..', 'eval', 'eval1989.fr'),
        # translations of srcfile and targetfile, not influenced by 'factored'
        # they can be filenames, arrays of strings or io objects, too.
        'srctotarget':
        [os.path.join(current_path, '..', 'eval', 'eval1957.europarlfull.fr')],
        'targettosrc': [],
        # passing filenames or io object for them in respectly.
        # if not passing anything or assigning None, they will use StringIO to save results.
        'output-src':
        None,
        'output-target':
        None,
        # other options ...
    }
    a = Aligner(options)
    a.mainloop()
    output_src, output_target = a.results()
    # output_src, output_target is StringIO because options['output-src'] is None
    src = output_src.getvalue()  # StringIO member function
    trg = output_target.getvalue().splitlines()  # array of string
    print('output_src.getvalue()')
    print(src[:30])
    print()
    print('output_target.getvalue().splitlines()')
    print(trg[:3])
Exemplo n.º 15
0
import os
from bleualign.align import Aligner

if __name__ == '__main__':
	current_path = os.path.dirname(os.path.abspath(__file__))
	options = {
		# source and target files needed by Aligner
		# they can be filenames, arrays of strings or io objects.
		'srcfile':os.path.join(current_path, '..', 'eval', 'eval1989.de'),
		'targetfile': os.path.join(current_path, '..', 'eval', 'eval1989.fr'),
		# translations of srcfile and targetfile, not influenced by 'factored'
		# they can be filenames, arrays of strings or io objects, too.
		'srctotarget': [os.path.join(current_path, '..', 'eval', 'eval1957.europarlfull.fr')],
		'targettosrc': [],
		# passing filenames or io object for them in respectly.
		# if not passing anything or assigning None, they will use StringIO to save results.
		'output-src': None, 'output-target': None,
		# other options ...
		}
	a = Aligner(options)
	a.mainloop()
	output_src, output_target = a.results()
	# output_src, output_target is StringIO because options['output-src'] is None
	src = output_src.getvalue()  # StringIO member function
	trg = output_target.getvalue().splitlines()  # array of string
	print('output_src.getvalue()')
	print(src[:30])
	print()
	print('output_target.getvalue().splitlines()')
	print(trg[:3])
 def main_test(self, option_function):
     test_dir = os.path.dirname(os.path.abspath(__file__))
     eval_dir = os.path.join(test_dir, '..', 'eval')
     result_dir = os.path.join(test_dir, 'result')
     refer_dir = os.path.join(test_dir, 'refer')
     bleualign.log = lambda a, b: None
     compare_files = []
     for test_set, test_argument in [('eval1957', '-d'),
                                     ('eval1989', '-e')]:
         fr_text = []
         de_text = []
         for filename in os.listdir(eval_dir):
             if filename.startswith(test_set):
                 attr = filename.split('.')
                 if len(attr) == 3:
                     filepath = os.path.join(eval_dir, filename)
                     if attr[1] != 'clean':
                         if attr[2] == 'fr':
                             fr_text.append(filepath)
                         elif attr[2] == 'de':
                             de_text.append(filepath)
         fr_text.sort()
         de_text.sort()
         test_files = []
         test_files.append((fr_text[0:1], de_text[-3:-2], 'articles'))
         test_files.append((fr_text, [], 'sentences'))
         test_files.append((fr_text, de_text, 'sentences'))
         for fr_file, de_file, filter_type in test_files:
             srctotarget_file = fr_file
             targettosrc_file = de_file
             output_file = self.output_file_path(srctotarget_file,
                                                 targettosrc_file)
             output_path = os.path.join(result_dir, output_file)
             options = getattr(self,
                               option_function)(test_argument, filter_type,
                                                srctotarget_file,
                                                targettosrc_file,
                                                output_path)
             a = Aligner(options)
             a.mainloop()
             output_src, output_target = a.results()
             output_src_bad, output_target_bad = a.results_bad()
             if option_function == 'fileObjectOptions':
                 output_src.close()
                 output_target.close()
                 output_src_bad.close()
                 output_target_bad.close()
             refer_path = os.path.join(refer_dir, output_file)
             compare_files.append((output_path + '-good-s',
                                   refer_path + '-good-s', output_src))
             compare_files.append((output_path + '-good-t',
                                   refer_path + '-good-t', output_target))
             compare_files.append((output_path + '-bad-s',
                                   refer_path + '-bad-s', output_src_bad))
             compare_files.append(
                 (output_path + '-bad-t', refer_path + '-bad-t',
                  output_target_bad))
     for result_path, refer_path, output_object in compare_files:
         self.cmp_files(result_path, refer_path, output_object)
         if option_function.startswith('file'):
             os.remove(result_path)
Exemplo n.º 17
0
 def test_no_translation(self):
     self.assertRaises(ValueError, Aligner, {
         'srcfile': self.srcfile,
         'targetfile': self.targetfile
     })
     a = Aligner({
         'srcfile': self.srcfile,
         'targetfile': self.targetfile,
         'no_translation_override': True
     })
     a.close_file_streams()
     a = Aligner({
         'srcfile': self.srcfile,
         'targetfile': self.targetfile,
         'srctotarget': [self.targetfile]
     })
     a.close_file_streams()
     a = Aligner({
         'srcfile': self.srcfile,
         'targetfile': self.targetfile,
         'targettosrc': [self.srcfile]
     })
     a.close_file_streams()
Exemplo n.º 18
0
     'targettosrc': [],
     # passing filenames or io object for them in respectly.
     # if not passing anything or assigning None, they will use StringIO to save results.
     'output-src':
     None,
     'output-target':
     None,
     'output-src-bad':
     None,
     'output-target-bad':
     None,
     # other options ...
 }
 options['filter'] = 'sentences'
 options['filterthreshold'] = 66
 a = Aligner(options)
 a.mainloop()
 output_src, output_target = a.results()
 output_src_bad, output_target_bad = a.results_bad(
 )  # if you set options['filter']
 # output_src is StringIO because options['output-src'] is None
 src = output_src.getvalue()  # StringIO member function
 trg = output_target.getvalue().splitlines()  # array of string
 print('output_src.getvalue()')
 print(src[:30])
 print()
 print('output_target.getvalue().splitlines()')
 print(trg[:3])
 print()
 print('filterthreshold for choice good part of alignment: {0}%'.format(
     options['filterthreshold']))
Exemplo n.º 19
0
		# they can be filenames, arrays of strings or io objects.
		'srcfile':os.path.join(current_path, '..', 'eval', 'eval1989.de'),
		'targetfile': os.path.join(current_path, '..', 'eval', 'eval1989.fr'),
		# translations of srcfile and targetfile, not influenced by 'factored'
		# they can be filenames, arrays of strings or io objects, too.
		'srctotarget': [os.path.join(current_path, '..', 'eval', 'eval1957.europarlfull.fr')],
		'targettosrc': [],
		# passing filenames or io object for them in respectly.
		# if not passing anything or assigning None, they will use StringIO to save results.
		'output-src': None, 'output-target': None,
		'output-src-bad': None, 'output-target-bad': None,
		# other options ...
		}
	options['filter'] = 'sentences'
	options['filterthreshold'] = 66
	a = Aligner(options)
	a.mainloop()
	output_src, output_target = a.results()
	output_src_bad, output_target_bad = a.results_bad()  # if you set options['filter']
	# output_src is StringIO because options['output-src'] is None
	src = output_src.getvalue()  # StringIO member function
	trg = output_target.getvalue().splitlines()  # array of string
	print('output_src.getvalue()')
	print(src[:30])
	print()
	print('output_target.getvalue().splitlines()')
	print(trg[:3])
	print()
	print('filterthreshold for choice good part of alignment: {0}%'.format(options['filterthreshold']))
	print('number of good/bad alignmemts: {0}'.format(len(output_src.getvalue().splitlines()), len(output_src_bad.getvalue().splitlines())))