Exemplo n.º 1
0
def run_baselines(lang='English'):
    ROOT_DIR = dirname(dirname(__file__))
    gold_data_folder = join(ROOT_DIR, 'data/task2/{}'.format(lang))

    train_debates = [join(gold_data_folder, 'Task2-{}-1st-Presidential.txt'.format(lang)),
                     join(gold_data_folder, 'Task2-{}-Vice-Presidential.txt'.format(lang))]
    test_debate = join(gold_data_folder, 'Task2-{}-2nd-Presidential.txt'.format(lang))

    random_baseline_fpath = join(ROOT_DIR, 'baselines/data/task2_random_baseline_{}.txt'.format(lang))
    run_random_baseline(test_debate, random_baseline_fpath)
    if check_format(random_baseline_fpath):
        evaluate(test_debate, random_baseline_fpath)

    ngram_baseline_fpath = join(ROOT_DIR, 'baselines/data/task2_ngram_baseline_{}.txt'.format(lang))
    run_ngram_baseline(train_debates, test_debate, ngram_baseline_fpath)

    if check_format(ngram_baseline_fpath):
        evaluate(test_debate, ngram_baseline_fpath)
Exemplo n.º 2
0
def validate_files(pred_files, gold_files):
    if len(pred_files) != len(gold_files):
        logging.error(
            'Different number of gold files ({}) and pred files ({}) provided. Cannot score.'
            .format(len(gold_files), len(pred_files)))
        return False

    if len(pred_files) != len(set(pred_files)):
        logging.error(
            'Same pred file provided multiple times. The pred files should be for different debates.'
        )
        return False

    for pred_file in pred_files:
        if not check_format(pred_file):
            logging.error(
                'Bad format for pred file {}. Cannot score.'.format(pred_file))
            return False

    return True
Exemplo n.º 3
0
    logging.info('{:<25}'.format('MACRO F1:') + '{0:.4f}'.format(macro_f1))
    logging.info(lines_separator)

    logging.info('{:<25}'.format('MACRO RECALL:') + '{0:.4f}'.format(macro_recall))
    logging.info(lines_separator)

    logging.info('{:<25}'.format('CONFUSION MATRIX:'))
    logging.info(' '*10 + ''.join(['{:>15}'.format(l) for l in _LABELS]))
    for true_label in _LABELS:
        predicted_labels = conf_matrix[true_label]
        logging.info('{:<10}'.format(true_label) + ''.join(['{:>15}'.format(predicted_labels[l]) for l in _LABELS]))
    logging.info(lines_separator)

    logging.info('Description of the evaluation metrics: ')
    logging.info('Accuracy computes the percentage of correctly predicted classes.')
    logging.info('Macro F1 computes the F1 score for each of the classes and takes their average.')
    logging.info('Macro Recall computes Recall for each of the classes and takes its average.')
    logging.info('Confusion Matrix computes the distribution of predicted classes, where rows are true labels and columns are predicted ones.')
    logging.info(lines_separator)
    logging.info(lines_separator)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--gold_file_path", help="The absolute path to the file with gold annotations.", type=str)
    parser.add_argument("--pred_file_path", help="The absolute path to the file with ranked line_numbers.", type=str)
    args = parser.parse_args()

    logging.info("Started evaluating results for Task 2 ...")
    if check_format(args.pred_file_path):
        evaluate(args.gold_file_path, args.pred_file_path)
Exemplo n.º 4
0
 def test_not_ok(self):
     for _file in self._NOT_OK_FILES:
         self.assertFalse(task2.check_format(join(_TEST_DATA_FOLDER,
                                                  _file)))