def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, cmd='-c 95 -r 1000 -n 2 -m', system_id=1): """ evaluate by original Perl implementation""" # silence pyrouge logging try: _ROUGE_PATH = '/home/yhj/ROUGE/RELEASE-1.5.5' except KeyError: print('Warning: ROUGE is not configured') _ROUGE_PATH = None assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) with tempfile.TemporaryDirectory() as tmp_dir: Rouge155.convert_summaries_to_rouge_format( dec_dir, os.path.join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format( ref_dir, os.path.join(tmp_dir, 'ref')) Rouge155.write_config_static( os.path.join(tmp_dir, 'dec'), dec_pattern, os.path.join(tmp_dir, 'ref'), ref_pattern, os.path.join(tmp_dir, 'settings.xml'), system_id ) cmd = (os.path.join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(os.path.join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(os.path.join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) return output
def eval_rouge(dec_dir, ref_dir): """ evaluate by original Perl implementation""" # silence pyrouge logging assert _ROUGE_PATH is not None assert os.path.exists(_ROUGE_PATH) print(_ROUGE_PATH) log.get_global_console_logger().setLevel(logging.WARNING) dec_pattern = '(\d+).dec' ref_pattern = '#ID#.ref' cmd = '-c 95 -r 1000 -n 2 -m' with tempfile.TemporaryDirectory() as tmp_dir: Rouge155.convert_summaries_to_rouge_format( dec_dir, join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format( ref_dir, join(tmp_dir, 'ref')) Rouge155.write_config_static( join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id=1 ) cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) print("cmd:{}".format(cmd)) output = sp.check_output(cmd.split(' '), universal_newlines=True) return output
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, cmd='-c 95 -r 1000 -n 2 -m', system_id=1): """ evaluate by original Perl implementation""" # silence pyrouge logging assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) with tempfile.TemporaryDirectory() as tmp_dir: try: Rouge155.convert_summaries_to_rouge_format( dec_dir, join(tmp_dir, 'dec')) except: print("part1:") #pdb.set_trace() try: Rouge155.convert_summaries_to_rouge_format( ref_dir, join(tmp_dir, 'ref')) except: print("part2:") #pdb.set_trace() try: Rouge155.write_config_static( join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id ) except: print("part3:") #pdb.set_trace() cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) return output
def get_rouge(path, dec): log.get_global_console_logger().setLevel(logging.WARNING) dec_pattern = '(\d+).dec' ref_pattern = '#ID#.ref' dec_dir = join(path, 'decode') ref_dir = join(path, 'reference') with open(join(dec_dir, '0.dec'), 'w') as f: for sentence in dec: print(sentence, file=f) cmd = '-c 95 -r 1000 -n 2 -m' with tempfile.TemporaryDirectory() as tmp_dir: Rouge155.convert_summaries_to_rouge_format(dec_dir, join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format(ref_dir, join(tmp_dir, 'ref')) Rouge155.write_config_static(join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id=1) cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) line = output.split('\n') rouge1 = float(line[3].split(' ')[3]) rouge2 = float(line[7].split(' ')[3]) rougel = float(line[11].split(' ')[3]) return (rouge1 + rouge2 + rougel) / 3
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, dir_name, cmd='-c 95 -r 1000 -n 2 -m', system_id=1): """ evaluate by original Perl implementation""" # silence pyrouge logging assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) # with tempfile.TemporaryDirectory() as tmp_dir: tmp_dir = '/mnt/e/Work/Ahmed/Summarization/SummRuNNer/output/{}/temp/'.format( dir_name) if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) Rouge155.convert_summaries_to_rouge_format(dec_dir, join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format(ref_dir, join(tmp_dir, 'ref')) Rouge155.write_config_static(join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id) cmd = ('sudo perl ' + _ROUGE_PATH + '/ROUGE-1.5.5.pl' + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd, universal_newlines=True, shell=True) return output
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, cmd='-c 95 -r 1000 -n 2 -m', system_id=1, force=False): """ evaluate by original Perl implementation""" assert _ROUGE_PATH is not None # silence pyrouge logging log.get_global_console_logger().setLevel(logging.WARNING) rouge_dec = join(dec_dir, '../rouge_dec') if not os.path.exists(rouge_dec) or force: Rouge155.convert_summaries_to_rouge_format(dec_dir, rouge_dec) rouge_ref = join(ref_dir, '../rouge_{}_ref'.format(basename(normpath(ref_dir)))) if not os.path.exists(rouge_ref) or force: Rouge155.convert_summaries_to_rouge_format(ref_dir, rouge_ref) rouge_settings = join(dec_dir, '../rouge_settings.xml') if not os.path.exists(rouge_settings) or force: Rouge155.write_config_static(rouge_dec, dec_pattern, rouge_ref, ref_pattern, rouge_settings, system_id) cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(rouge_settings)) output = sp.check_output(cmd.split(' '), universal_newlines=True) return output
def eval_rouge(dec_dir, ref_dir): assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) dec_pattern = '(\d+).dec' ref_pattern = '#ID#.ref' cmd = '-c 95 -r 1000 -n 2 -m' with tempfile.TemporaryDirectory() as tmp_dir: Rouge155.convert_summaries_to_rouge_format(dec_dir, join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format(ref_dir, join(tmp_dir, 'ref')) Rouge155.write_config_static(join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id=1) cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) R_1 = float(output.split('\n')[3].split(' ')[3]) R_2 = float(output.split('\n')[7].split(' ')[3]) R_L = float(output.split('\n')[11].split(' ')[3]) print(output) return R_1, R_2, R_L
def setup_and_eval(system_summaries, model_summaries, with_meteor=False): ''' temporarily setup rouge structure and evaluate given summaries :param system_summaries: list of abstract-summaries in text ["John Stein went ...", ...] :param model_summaries: list of corresponding model-summaries ["John walked to ...", ...] :return: count of evaluated pairs, rouge result dictionary ''' system_dir_name = "system_summaries" model_dir_name = "model_summaries" #log.get_global_console_logger().setLevel(logging.WARNING) log.get_global_console_logger().disabled = True sents_filter = lambda summary: [ sent.split(" ").__len__() > 1 for sent in summary.split(".") ] summary_filter = lambda summary, sents_filter: np.array(summary.split(".") )[sents_filter] with tempfile.TemporaryDirectory() as tmp_dir: system_dir = os.path.join(tmp_dir, system_dir_name) model_dir = os.path.join(tmp_dir, model_dir_name) os.mkdir(system_dir) os.mkdir(model_dir) count = 0 for sys, mod in zip(system_summaries, model_summaries): system_sents_filter = sents_filter(sys) model_sents_filter = sents_filter(mod) system_summary_filter = summary_filter(sys, system_sents_filter) model_summary_filter = summary_filter(mod, model_sents_filter) if system_summary_filter.__len__( ) > 0 and model_summary_filter.__len__() > 0: system_file = f"system.{count}.txt" model_file = f"model.{count}.txt" with open(os.path.join(system_dir, system_file), "w") as sf: for i, sent in enumerate(system_summary_filter, 1): sf.write(sent.lstrip() + "\n") if i < len(system_summary_filter) \ else sf.write(sent.lstrip()) with open(os.path.join(model_dir, model_file), "w") as mf: for i, sent in enumerate(model_summary_filter, 1): mf.write(sent.lstrip() + "\n") if i < len(model_summary_filter) \ else mf.write(sent.lstrip()) count += 1 output = rouge_scores(system_dir, model_dir) meteor = None if with_meteor: meteor = meteor_score(system_dir, model_dir) log.get_global_console_logger().disabled = False return count, output, meteor
def __init__(self, rouge_dir=None, rouge_args=None, log_level=None): """ Create a Rouge155 object. rouge_dir: Directory containing Rouge-1.5.5.pl rouge_args: Arguments to pass through to ROUGE if you don't want to use the default pyrouge arguments. """ if log_level is None: self.log = log.get_global_console_logger() else: self.log = log.get_global_console_logger(log_level) self.__set_dir_properties() self._config_file = None self._settings_file = self.__get_config_path() self.__set_rouge_dir(rouge_dir) self.args = self.__clean_rouge_args(rouge_args) self._system_filename_pattern = None self._model_filename_pattern = None
def __init__(self, rouge_dir=None, rouge_args=None, log_level=None): """ Create a Rouge155 object. rouge_dir: Directory containing Rouge-1.5.5.pl rouge_args: Arguments to pass through to ROUGE if you don't want to use the default pyrouge arguments. """ if log_level is None: self.log = log.get_global_console_logger() else: self.log = log.get_global_console_logger(log_level) self.__set_dir_properties() self._config_file = None self._settings_file = self.__get_config_path() self.__set_rouge_dir(rouge_dir) self.args = self.__clean_rouge_args(rouge_args) self._system_filename_pattern = None self._model_filename_pattern = None
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, cmd='-c 95 -r 1000 -n 2 -m -d', system_id=1): print('evaluate') """ evaluate by original Perl implementation""" # silence pyrouge logging assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) with tempfile.TemporaryDirectory() as tmp_dir: Rouge155.convert_summaries_to_rouge_format( dec_dir, join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format( ref_dir, join(tmp_dir, 'ref')) Rouge155.write_config_static( join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id ) cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) rouge_1 = [] rouge_2 = [] rouge_l = [] for line in output.split('\n'): if 'ROUGE-1 Eval' in line: rouge_1.append(line.split()[-1][2:]) if 'ROUGE-2 Eval' in line: rouge_2.append(line.split()[-1][2:]) if 'ROUGE-L Eval' in line: rouge_l.append(line.split()[-1][2:]) rouge_1 = '\n'.join(rouge_1) rouge_2 = '\n'.join(rouge_2) rouge_l = '\n'.join(rouge_l) return rouge_1, rouge_2, rouge_l
def eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, cmd='-c 95 -r 1000 -n 2 -m', system_id=1): """ evaluate by original Perl implementation""" # silence pyrouge logging assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) with tempfile.TemporaryDirectory() as tmp_dir: Rouge155.convert_summaries_to_rouge_format( dec_dir, join(tmp_dir, 'dec')) Rouge155.convert_summaries_to_rouge_format( ref_dir, join(tmp_dir, 'ref')) Rouge155.write_config_static( join(tmp_dir, 'dec'), dec_pattern, join(tmp_dir, 'ref'), ref_pattern, join(tmp_dir, 'settings.xml'), system_id ) cmd = (join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) return output
def rouge_eval(ref_dir, dec_dir, dec_pattern='(\d+)_decoded.txt', ref_pattern='#ID#_reference.txt', cmd="-c 95 -r 1000 -n 2 -m", system_id=1): # only print rouge 1 2 L assert _ROUGE_PATH is not None log.get_global_console_logger().setLevel(logging.WARNING) with tempfile.TemporaryDirectory() as tmp_dir: tmp_dec_dir = os.path.join(tmp_dir, 'dec') tmp_ref_dir = os.path.join(tmp_dir, 'ref') Rouge155.convert_summaries_to_rouge_format(dec_dir, tmp_dec_dir) Rouge155.convert_summaries_to_rouge_format(ref_dir, tmp_ref_dir) Rouge155.write_config_static(tmp_dec_dir, dec_pattern, tmp_ref_dir, ref_pattern, os.path.join(tmp_dir, 'settings.xml'), system_id) cmd = (os.path.join(_ROUGE_PATH, 'ROUGE-1.5.5.pl') + ' -e {} '.format(os.path.join(_ROUGE_PATH, 'data')) + cmd + ' -a {}'.format(os.path.join(tmp_dir, 'settings.xml'))) output = sp.check_output(cmd.split(' '), universal_newlines=True) return output
def evaluate_static(home_dir, config_file_path, rouge_args=None): """ This is the static version of the evaluate method. Run ROUGE to evaluate the configuration file using the arguments provided. Returns: Rouge output as string. """ options = rouge_args + ['-m'] + [config_file_path] command = [os.path.join(home_dir, 'ROUGE-1.5.5.pl')] + options logger = log.get_global_console_logger() logger.info( "Running ROUGE with command {}".format(" ".join(command))) return check_output(command).decode("UTF-8")
def __init__(self, language="en", punkt_data_path=None): self.lang2datapath = {"en": "tokenizers/punkt/english.pickle"} self.log = log.get_global_console_logger() try: import nltk.data except ImportError: self.log.error( "Cannot import NLTK data for the sentence splitter. Please " "check if the 'punkt' NLTK-package is installed correctly.") try: if not punkt_data_path: punkt_data_path = self.lang2datapath[language] self.sent_detector = nltk.data.load(punkt_data_path) except KeyError: self.log.error( "No sentence splitter data for language {}.".format(language)) except: self.log.error("Could not load sentence splitter data: {}".format( self.lang2datapath[language]))
def __init__(self, rouge_dir=None, rouge_args=None): """ Create a Rouge155 object. rouge_dir: Directory containing Rouge-1.5.5.pl rouge_args: Arguments to pass through to ROUGE if you don't want to use the default pyrouge arguments. """ self.log = log.get_global_console_logger() self.__set_dir_properties() self._config_file = None self._settings_file = os.path.join( os.path.dirname(__file__), 'settings.ini') self.__set_rouge_dir(rouge_dir) self.args = self.__clean_rouge_args(rouge_args) self._system_filename_pattern = None self._model_filename_pattern = None
def process(input_dir, output_dir, function): """ Apply function to all files in input_dir and save the resulting ouput files in output_dir. """ if not os.path.exists(output_dir): os.makedirs(output_dir) logger = log.get_global_console_logger() logger.info("Processing files in {}.".format(input_dir)) input_file_names = os.listdir(input_dir) for input_file_name in input_file_names: input_file = os.path.join(input_dir, input_file_name) with codecs.open(input_file, "r", encoding="UTF-8") as f: input_string = f.read() output_string = function(input_string) output_file = os.path.join(output_dir, input_file_name) with codecs.open(output_file, "w", encoding="UTF-8") as f: f.write(clean(output_string.lower())) logger.info("Saved processed files to {}.".format(output_dir))
def __init__(self, rouge_dir=None, rouge_args=None): """ Create a Rouge155 object. rouge_dir: Directory containing Rouge-1.5.5.pl rouge_args: Arguments to pass through to ROUGE if you don't want to use the default pyrouge arguments. """ self.log = log.get_global_console_logger() self.__set_dir_properties() self._config_file = None self._settings_file = os.path.join(os.path.dirname(__file__), 'settings.ini') self.__set_rouge_dir(rouge_dir) self.args = self.__clean_rouge_args(rouge_args) self._system_filename_pattern = None self._model_filename_pattern = None
def __init__(self, language="en", punkt_data_path=None): self.lang2datapath = {"en": "tokenizers/punkt/english.pickle"} self.log = log.get_global_console_logger() try: import nltk.data except ImportError: self.log.error( "Cannot import NLTK data for the sentence splitter. Please " "check if the 'punkt' NLTK-package is installed correctly.") try: if not punkt_data_path: punkt_data_path = self.lang2datapath[language] self.sent_detector = nltk.data.load(punkt_data_path) except KeyError: self.log.error( "No sentence splitter data for language {}.".format(language)) except: self.log.error( "Could not load sentence splitter data: {}".format( self.lang2datapath[language]))
def process(input_dir, output_dir, function): """ Apply function to all files in input_dir and save the resulting ouput files in output_dir. """ if not os.path.exists(output_dir): os.makedirs(output_dir) logger = log.get_global_console_logger() logger.info("Processing files in {}.".format(input_dir)) input_file_names = os.listdir(input_dir) for input_file_name in input_file_names: logger.info("Processing {}.".format(input_file_name)) input_file = os.path.join(input_dir, input_file_name) with codecs.open(input_file, "r", encoding="UTF-8") as f: input_string = f.read() output_string = function(input_string) output_file = os.path.join(output_dir, input_file_name) with codecs.open(output_file, "w", encoding="UTF-8") as f: f.write(output_string) logger.info("Saved processed files to {}.".format(output_dir))
def __init__(self, rouge_dir=None, verbose=True, rouge_args=None): """ Create a Rouge155 object. rouge_dir: Directory containing Rouge-1.5.5.pl verbose: Prints a detailed log. rouge_args: Arguments to pass through to ROUGE if you don't want to use the default pyrouge arguments. """ self.log = log.get_global_console_logger() if not verbose: self.log.setLevel(30) # Disable all "info" logs. self.__set_dir_properties() self._config_file = None self._settings_file = self.__get_config_path() self.__set_rouge_dir(rouge_dir) self.args = self.__clean_rouge_args(rouge_args) self._system_filename_pattern = None self._model_filename_pattern = None
def process(input_dir, output_dir, function): """ Apply function to all files in input_dir and save the resulting ouput files in output_dir. """ if not os.path.exists(output_dir): os.makedirs(output_dir) logger = log.get_global_console_logger() logger.info("Processing files in {}.".format(input_dir)) input_file_names = os.listdir(input_dir) for input_file_name in input_file_names: logger.info("Processing {}.".format(input_file_name)) input_file = os.path.join(input_dir, input_file_name) input_file = re.sub(r'\\', '/', input_file) with codecs.open( input_file, "r", encoding="UTF-8", errors='ignore') as f: #JC ERRORS IGNORE BAD ENCODING input_string = f.read() output_string = function(input_string) output_file = os.path.join(output_dir, input_file_name) with codecs.open(output_file, "w", encoding="UTF-8") as f: f.write(output_string) logger.info("Saved processed files to {}.".format(output_dir))