Exemplo n.º 1
0
def run_baselines(lang='English'):
    ROOT_DIR = dirname(dirname(__file__))

    gold_data_folder = join(ROOT_DIR, 'data/task1/{}'.format(lang))

    train_debates = [join(gold_data_folder, 'Task1-{}-1st-Presidential.txt'.format(lang)),
                     join(gold_data_folder, 'Task1-{}-Vice-Presidential.txt'.format(lang))]
    test_debate = join(gold_data_folder, 'Task1-{}-2nd-Presidential.txt'.format(lang))

    random_baseline_fpath = join(ROOT_DIR, 'baselines/data/task1_random_baseline_{}.txt'.format(lang))
    run_random_baseline(test_debate, random_baseline_fpath)
    if check_format(random_baseline_fpath):
        evaluate(test_debate, random_baseline_fpath)

    ngram_baseline_fpath = join(ROOT_DIR, 'baselines/data/task1_ngram_baseline_{}.txt'.format(lang))
    run_ngram_baseline(train_debates, test_debate, ngram_baseline_fpath)
    if check_format(ngram_baseline_fpath):
        evaluate(test_debate, ngram_baseline_fpath)
Exemplo n.º 2
0
def validate_files(pred_files, gold_files):
    if len(pred_files) != len(gold_files):
        logging.error(
            'Different number of gold files ({}) and pred files ({}) provided. Cannot score.'.format(
                len(gold_files), len(pred_files)
            )
        )
        return False

    if len(pred_files) != len(set(pred_files)):
        logging.error('Same pred file provided multiple times. The pred files should be for different debates.')
        return False

    for pred_file in pred_files:
        if not check_format(pred_file):
            logging.error('Bad format for pred file {}. Cannot score.'.format(pred_file))
            return False

    return True
Exemplo n.º 3
0
    logging.info('{:<25}'.format("") + "".join(['{0:<10.4f}'.format(r) for r in avg_precisions]))
    logging.info(lines_separator)

    logging.info(threshold_line_format.format('RECIPROCAL RANK@N:'))
    logging.info('{:<25}'.format("") + "".join(['{0:<10.4f}'.format(r) for r in reciprocal_ranks]))
    logging.info(lines_separator)

    logging.info(threshold_line_format.format('PRECISION@N:'))
    logging.info('{:<25}'.format("") + "".join(['{0:<10.4f}'.format(precisions[r-1]) for r in thresholds]))
    logging.info(lines_separator)

    logging.info('Description of the evaluation metrics: ')
    logging.info('R-Precision is Precision at R, where R is the number of relevant line_numbers for the evaluated set.')
    logging.info('Average Precision@N is precision, estimated at each relevant line_number, averaged for all relevant line_numbers up to the N-th (or by the threshold, if it is smaller).')
    logging.info('Reciprocal Rank@N is the sum of the reciprocal ranks of the relevant line_numbers (up to the N-th), according to the ranked list.')
    logging.info('Precision@N is precision estimated for the first N line_numbers in the provided ranked list.')
    logging.info(lines_separator)
    logging.info(lines_separator)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--gold_file_path", help="The absolute path to the file with gold annotations.", type=str)
    parser.add_argument("--pred_file_path", help="The absolute path to the file with ranked line_numbers.", type=str)
    args = parser.parse_args()

    logging.info("Started evaluating results for Task 1 ...")
    if check_format(args.pred_file_path):
        evaluate(args.gold_file_path, args.pred_file_path)

Exemplo n.º 4
0
 def test_not_ok(self):
     for _file in self._NOT_OK_FILES:
         self.assertFalse(task1.check_format(join(_TEST_DATA_FOLDER,
                                                  _file)))