def _run_test(configuration, box_eval): box_config = configure(configuration) box = initialise(box_config) output = eval_pipeline(box, box_eval, box_config) for data_set in ['devel', 'eval', 'train']: for lang in ['src', 'trg']: filename = output[data_set + '_' + lang + '_filename'] filename_expected = box_eval[data_set + '_' + lang + '_expected'] thelp.diff(filename_expected, filename)
def _run_test(configuration, box_eval): box_config = configure(configuration) box = initialise(box_config) output = eval_pipeline(box, box_eval, box_config) try: thelp.diff(box_eval['cleaned_src_file_expected'], output['cleaned_src_filename']) thelp.diff(box_eval['cleaned_trg_file_expected'], output['cleaned_trg_filename']) finally: os.unlink(output['cleaned_src_filename']) os.unlink(output['cleaned_trg_filename'])
if __name__ == '__main__': from pypeline.helpers.helpers import eval_pipeline lm_dir = os.environ["PWD"] configuration = { 'irstlm_root': os.environ["IRSTLM"], 'irstlm_smoothing_method': 'improved-kneser-ney', 'language_model_directory': lm_dir } component_config = configure(configuration) component = initialise(component_config) value = eval_pipeline( component, { 'input_filename': '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en' }, component_config) target = { 'add_start_end_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'), 'lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'), 'compiled_lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en') } print "Target: %s" % target if value != target: raise Exception("Massive fail!")
output = {'add_start_end_filename': start_end_output_filename, 'lm_filename': lm_filename, 'compiled_lm_filename': compiled_lm_filename} print "IRSTLM Build: Output = %s" % output return output return process if __name__ == '__main__': from pypeline.helpers.helpers import eval_pipeline lm_dir = os.environ["PWD"] configuration = {'irstlm_root': os.environ["IRSTLM"], 'irstlm_smoothing_method': 'improved-kneser-ney', 'language_model_directory': lm_dir} component_config = configure(configuration) component = initialise(component_config) value = eval_pipeline(component, {'input_filename': '/Users/ianjohnson/Dropbox/Documents/MTM2012/tokenised_files/news-commentary-v7.fr-en.tok.en'}, component_config) target = {'add_start_end_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.sb.en'), 'lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.lm.en.gz'), 'compiled_lm_filename': os.path.join(lm_dir, 'news-commentary-v7.fr-en.arpa.en')} print "Target: %s" % target if value != target: raise Exception("Massive fail!")