Exemplo n.º 1
0
	def main_test(self, option_function):
		test_dir = os.path.dirname(os.path.abspath(__file__))
		eval_dir = os.path.join(test_dir, '..', 'eval')
		result_dir = os.path.join(test_dir, 'result')
		refer_dir = os.path.join(test_dir, 'refer')
		bleualign.log = lambda a, b:None
		compare_files = []
		for test_set, test_argument in [('eval1957', '-d'), ('eval1989', '-e')]:
			fr_text = []
			de_text = []
			for filename in os.listdir(eval_dir):
				if filename.startswith(test_set):
					attr = filename.split('.')
					if len(attr) == 3:
						filepath = os.path.join(eval_dir, filename)
						if attr[1] != 'clean':
							if attr[2] == 'fr':
								fr_text.append(filepath)
							elif attr[2] == 'de':
								de_text.append(filepath)
			fr_text.sort()
			de_text.sort()
			test_files = []
			test_files.append((fr_text[0:1], de_text[-3:-2], 'articles'))
			test_files.append((fr_text, [], 'sentences'))
			test_files.append((fr_text, de_text, 'sentences'))
			for fr_file, de_file, filter_type in test_files:
				srctotarget_file = fr_file
				targettosrc_file = de_file
				output_file = self.output_file_path(srctotarget_file, targettosrc_file)
				output_path = os.path.join(result_dir , output_file)
				options = getattr(self, option_function)(test_argument, filter_type,
					srctotarget_file, targettosrc_file, output_path)
				a = Aligner(options)
				a.mainloop()
				output_src, output_target = a.results()
				output_src_bad, output_target_bad = a.results_bad()
				if option_function == 'fileObjectOptions':
					output_src.close()
					output_target.close()
					output_src_bad.close()
					output_target_bad.close()
				refer_path = os.path.join(refer_dir , output_file)
				compare_files.append((output_path + '-good-s', refer_path + '-good-s', output_src))
				compare_files.append((output_path + '-good-t', refer_path + '-good-t', output_target))
				compare_files.append((output_path + '-bad-s', refer_path + '-bad-s', output_src_bad))
				compare_files.append((output_path + '-bad-t', refer_path + '-bad-t', output_target_bad))
		for result_path, refer_path, output_object in compare_files:
			self.cmp_files(result_path, refer_path, output_object)
			if option_function.startswith('file'):
				os.remove(result_path)
 def main_test(self, option_function):
     test_dir = os.path.dirname(os.path.abspath(__file__))
     eval_dir = os.path.join(test_dir, '..', 'eval')
     result_dir = os.path.join(test_dir, 'result')
     refer_dir = os.path.join(test_dir, 'refer')
     bleualign.log = lambda a, b: None
     compare_files = []
     for test_set, test_argument in [('eval1957', '-d'),
                                     ('eval1989', '-e')]:
         fr_text = []
         de_text = []
         for filename in os.listdir(eval_dir):
             if filename.startswith(test_set):
                 attr = filename.split('.')
                 if len(attr) == 3:
                     filepath = os.path.join(eval_dir, filename)
                     if attr[1] != 'clean':
                         if attr[2] == 'fr':
                             fr_text.append(filepath)
                         elif attr[2] == 'de':
                             de_text.append(filepath)
         fr_text.sort()
         de_text.sort()
         test_files = []
         test_files.append((fr_text[0:1], de_text[-3:-2], 'articles'))
         test_files.append((fr_text, [], 'sentences'))
         test_files.append((fr_text, de_text, 'sentences'))
         for fr_file, de_file, filter_type in test_files:
             srctotarget_file = fr_file
             targettosrc_file = de_file
             output_file = self.output_file_path(srctotarget_file,
                                                 targettosrc_file)
             output_path = os.path.join(result_dir, output_file)
             options = getattr(self,
                               option_function)(test_argument, filter_type,
                                                srctotarget_file,
                                                targettosrc_file,
                                                output_path)
             a = Aligner(options)
             a.mainloop()
             output_src, output_target = a.results()
             output_src_bad, output_target_bad = a.results_bad()
             if option_function == 'fileObjectOptions':
                 output_src.close()
                 output_target.close()
                 output_src_bad.close()
                 output_target_bad.close()
             refer_path = os.path.join(refer_dir, output_file)
             compare_files.append((output_path + '-good-s',
                                   refer_path + '-good-s', output_src))
             compare_files.append((output_path + '-good-t',
                                   refer_path + '-good-t', output_target))
             compare_files.append((output_path + '-bad-s',
                                   refer_path + '-bad-s', output_src_bad))
             compare_files.append(
                 (output_path + '-bad-t', refer_path + '-bad-t',
                  output_target_bad))
     for result_path, refer_path, output_object in compare_files:
         self.cmp_files(result_path, refer_path, output_object)
         if option_function.startswith('file'):
             os.remove(result_path)
Exemplo n.º 3
0
        'output-src':
        None,
        'output-target':
        None,
        'output-src-bad':
        None,
        'output-target-bad':
        None,
        # other options ...
    }
    options['filter'] = 'sentences'
    options['filterthreshold'] = 66
    a = Aligner(options)
    a.mainloop()
    output_src, output_target = a.results()
    output_src_bad, output_target_bad = a.results_bad(
    )  # if you set options['filter']
    # output_src is StringIO because options['output-src'] is None
    src = output_src.getvalue()  # StringIO member function
    trg = output_target.getvalue().splitlines()  # array of string
    print('output_src.getvalue()')
    print(src[:30])
    print()
    print('output_target.getvalue().splitlines()')
    print(trg[:3])
    print()
    print('filterthreshold for choice good part of alignment: {0}%'.format(
        options['filterthreshold']))
    print('number of good/bad alignmemts: {0}'.format(
        len(output_src.getvalue().splitlines()),
        len(output_src_bad.getvalue().splitlines())))
Exemplo n.º 4
0
		# they can be filenames, arrays of strings or io objects.
		'srcfile':os.path.join(current_path, '..', 'eval', 'eval1989.de'),
		'targetfile': os.path.join(current_path, '..', 'eval', 'eval1989.fr'),
		# translations of srcfile and targetfile, not influenced by 'factored'
		# they can be filenames, arrays of strings or io objects, too.
		'srctotarget': [os.path.join(current_path, '..', 'eval', 'eval1957.europarlfull.fr')],
		'targettosrc': [],
		# passing filenames or io object for them in respectly.
		# if not passing anything or assigning None, they will use StringIO to save results.
		'output-src': None, 'output-target': None,
		'output-src-bad': None, 'output-target-bad': None,
		# other options ...
		}
	options['filter'] = 'sentences'
	options['filterthreshold'] = 66
	a = Aligner(options)
	a.mainloop()
	output_src, output_target = a.results()
	output_src_bad, output_target_bad = a.results_bad()  # if you set options['filter']
	# output_src is StringIO because options['output-src'] is None
	src = output_src.getvalue()  # StringIO member function
	trg = output_target.getvalue().splitlines()  # array of string
	print('output_src.getvalue()')
	print(src[:30])
	print()
	print('output_target.getvalue().splitlines()')
	print(trg[:3])
	print()
	print('filterthreshold for choice good part of alignment: {0}%'.format(options['filterthreshold']))
	print('number of good/bad alignmemts: {0}'.format(len(output_src.getvalue().splitlines()), len(output_src_bad.getvalue().splitlines())))