示例#1
0
文件: vw_audit.py 项目: xxlatgh/qb
def audit_report(df, output):
    df.to_csv(output)

    df.head(25).plot.bar('feature', 'value')
    plt.title('Feature Magnitudes')
    plt.xlabel('Magnitude')
    plt.savefig('/tmp/feature_importance.png', dpi=200, format='png')

    pd.set_option('display.width', 1000)
    pd.set_option('display.max_rows', 100)
    pd.set_option('display.max_colwidth', 30)

    top_features = str(df.head(100))

    report = ReportGenerator(
        {
            'feature_importance_plot': '/tmp/feature_importance.png',
            'top_features': top_features
        }, 'audit_regressor.md')

    output = safe_path(VW_AUDIT_REGRESSOR_REPORT)
    report.create(output)
    plt.clf()
    plt.cla()
    plt.close()
示例#2
0
def report(variables, save_dir, folds):
    # use this to have jinja skip non-existent features
    jinja_keys = ['his_lines', 'his_stacked', 'rush_late_plot', 'choice_plot',
            'hype_configs', 'protobowl_plot', 'protobowl_stats']
    _variables = {k: dict() for k in jinja_keys}
    _variables.update(variables)
    if len(folds) == 1:
        output = os.path.join(save_dir, 'report_{}.pdf'.format(folds[0]))
    else:
        output = os.path.join(save_dir, 'report_all.pdf')
    report_generator = ReportGenerator('new_performance.md')
    report_generator.create(_variables, output)
示例#3
0
def report(variables, save_dir, folds):
    # use this to have jinja skip non-existent features
    jinja_keys = [
        'his_lines', 'his_stacked', 'rush_late_plot', 'choice_plot',
        'hype_configs', 'protobowl_plot', 'protobowl_stats'
    ]
    _variables = {k: dict() for k in jinja_keys}
    _variables.update(variables)
    if len(folds) == 1:
        output = os.path.join(save_dir, 'report_{}.pdf'.format(folds[0]))
    else:
        output = os.path.join(save_dir, 'report_all.pdf')
    report_generator = ReportGenerator('new_performance.md')
    report_generator.create(_variables, output)
示例#4
0
def report(variables, save_dir, folds):
    # use this to have jinja skip non-existent features
    jinja_keys = [
        "his_lines",
        "his_stacked",
        "rush_late_plot",
        "choice_plot",
        "hype_configs",
        "protobowl_plot",
        "protobowl_stats",
    ]
    _variables = {k: dict() for k in jinja_keys}
    _variables.update(variables)
    if len(folds) == 1:
        output = os.path.join(save_dir, "report_{}.pdf".format(folds[0]))
    else:
        output = os.path.join(save_dir, "report_all.pdf")
    report_generator = ReportGenerator("new_performance.md")
    report_generator.create(_variables, output)
示例#5
0
def n_guesser_report(report_path, fold, n_samples=10):
    qdb = QuestionDatabase()
    question_lookup = qdb.all_questions()
    questions = [q for q in question_lookup.values() if q.fold == fold]
    guess_dataframes = []
    folds = [fold]
    for g_spec in AbstractGuesser.list_enabled_guessers():
        path = AbstractGuesser.output_path(g_spec.guesser_module,
                                           g_spec.guesser_class, '')
        guess_dataframes.append(AbstractGuesser.load_guesses(path,
                                                             folds=folds))
    df = pd.concat(guess_dataframes)  # type: pd.DataFrame
    guessers = set(df['guesser'].unique())
    n_guessers = len(guessers)
    guesses = []
    for name, group in df.groupby(['guesser', 'qnum', 'sentence', 'token']):
        top_guess = group.sort_values('score', ascending=False).iloc[0]
        guesses.append(top_guess)

    top_df = pd.DataFrame.from_records(guesses)

    guess_lookup = {}
    for name, group in top_df.groupby(['qnum', 'sentence', 'token']):
        guess_lookup[name] = group

    performance = {}
    question_positions = {}
    n_correct_samples = defaultdict(list)
    for q in questions:
        page = q.page
        positions = [(sent, token) for sent, token, _ in q.partials()]
        # Since partials() passes word_skip=-1 each entry is guaranteed to be a sentence
        n_sentences = len(positions)
        q_positions = {
            'start': 1,
            'p_25': max(1, round(n_sentences * .25)),
            'p_50': max(1, round(n_sentences * .5)),
            'p_75': max(1, round(n_sentences * .75)),
            'end': len(positions)
        }
        question_positions[q.qnum] = q_positions
        for sent, token in positions:
            key = (q.qnum, sent, token)
            if key in guess_lookup:
                guesses = guess_lookup[key]
                n_correct = (guesses.guess == page).sum()
                n_correct_samples[n_correct].append(key)
                if n_correct == 0:
                    correct_guessers = 'None'
                elif n_correct == n_guessers:
                    correct_guessers = 'All'
                else:
                    correct_guessers = '/'.join(
                        sorted(guesses[guesses.guess == page].guesser.values))
            else:
                n_correct = 0
                correct_guessers = 'None'
            performance[key] = (n_correct, correct_guessers)

    start_accuracies = []
    p_25_accuracies = []
    p_50_accuracies = []
    p_75_accuracies = []
    end_accuracies = []

    for q in questions:
        qnum = q.qnum
        start_pos = question_positions[qnum]['start']
        p_25_pos = question_positions[qnum]['p_25']
        p_50_pos = question_positions[qnum]['p_50']
        p_75_pos = question_positions[qnum]['p_75']
        end_pos = question_positions[qnum]['end']

        start_accuracies.append((*performance[(qnum, start_pos, 0)], 'start'))
        p_25_accuracies.append((*performance[(qnum, p_25_pos, 0)], 'p_25'))
        p_50_accuracies.append((*performance[(qnum, p_50_pos, 0)], 'p_50'))
        p_75_accuracies.append((*performance[(qnum, p_75_pos, 0)], 'p_75'))
        end_accuracies.append((*performance[(qnum, end_pos, 0)], 'end'))

    all_accuracies = start_accuracies + p_25_accuracies + p_50_accuracies + p_75_accuracies + end_accuracies

    perf_df = pd.DataFrame.from_records(
        all_accuracies,
        columns=['n_guessers_correct', 'correct_guessers', 'position'])
    perf_df['count'] = 1
    n_questions = len(questions)

    aggregate_df = (perf_df.groupby(
        ['position', 'n_guessers_correct', 'correct_guessers']).count() /
                    n_questions).reset_index()

    fig, ax = plt.subplots(figsize=(12, 8),
                           nrows=2,
                           ncols=3,
                           sharey=True,
                           sharex=True)

    positions = {
        'start': (0, 0),
        'p_25': (0, 1),
        'p_50': (1, 0),
        'p_75': (1, 1),
        'end': (1, 2)
    }

    position_labels = {
        'start': 'Start',
        'p_25': '25%',
        'p_50': '50%',
        'p_75': '75%',
        'end': '100%'
    }
    ax[(0, 2)].axis('off')

    for p, key in positions.items():
        data = aggregate_df[aggregate_df.position == p].pivot(
            index='n_guessers_correct',
            columns='correct_guessers').fillna(0)['count']
        plot_ax = ax[key]
        data.plot.bar(stacked=True,
                      ax=plot_ax,
                      title='Question Position: {}'.format(position_labels[p]))
        handles, labels = plot_ax.get_legend_handles_labels()
        ax_legend = plot_ax.legend()
        ax_legend.set_visible(False)
        plot_ax.set(xlabel='Number of Correct Guessers', ylabel='Accuracy')

    for plot_ax in list(ax.flatten()):
        for tk in plot_ax.get_yticklabels():
            tk.set_visible(True)
        for tk in plot_ax.get_xticklabels():
            tk.set_rotation('horizontal')
    fig.legend(handles, labels, bbox_to_anchor=(.8, .75))
    fig.suptitle('Accuracy Breakdown by Guesser')
    accuracy_by_n_correct_plot_path = '/tmp/accuracy_by_n_correct_{}.png'.format(
        fold)
    fig.savefig(accuracy_by_n_correct_plot_path, dpi=200)

    sampled_questions_by_correct = sample_n_guesser_correct_questions(
        question_lookup, guess_lookup, n_correct_samples, n_samples=n_samples)

    report = ReportGenerator('compare_guessers.md')
    report.create(
        {
            'dev_accuracy_by_n_correct_plot': accuracy_by_n_correct_plot_path,
            'sampled_questions_by_correct': sampled_questions_by_correct
        }, safe_path(report_path))
示例#6
0
    def create_report(self, directory: str):
        with open(os.path.join(directory, 'guesser_params.pickle'), 'rb') as f:
            params = pickle.load(f)
        dev_guesses = AbstractGuesser.load_guesses(directory,
                                                   folds=[c.GUESSER_DEV_FOLD])

        qdb = QuestionDatabase()
        questions = qdb.all_questions()

        # Compute recall and accuracy
        dev_recall = compute_fold_recall(dev_guesses, questions)
        dev_questions = {
            qnum: q
            for qnum, q in questions.items() if q.fold == c.GUESSER_DEV_FOLD
        }
        dev_recall_stats = compute_recall_at_positions(dev_recall)
        dev_summary_accuracy = compute_summary_accuracy(
            dev_questions, dev_recall_stats)
        dev_summary_recall = compute_summary_recall(dev_questions,
                                                    dev_recall_stats)

        accuracy_plot('/tmp/dev_accuracy.png', dev_summary_accuracy,
                      'Guesser Dev')
        recall_plot('/tmp/dev_recall.png', dev_questions, dev_summary_recall,
                    'Guesser Dev')

        # Obtain metrics on number of answerable questions based on the dataset requested
        all_answers = {g for g in qdb.all_answers().values()}
        all_questions = list(qdb.all_questions().values())
        answer_lookup = {
            qnum: guess
            for qnum, guess in qdb.all_answers().items()
        }
        dataset = self.qb_dataset()
        training_data = dataset.training_data()

        min_n_answers = {g for g in training_data[1]}

        train_questions = [
            q for q in all_questions if q.fold == c.GUESSER_TRAIN_FOLD
        ]
        train_answers = {q.page for q in train_questions}

        dev_questions = [
            q for q in all_questions if q.fold == c.GUESSER_DEV_FOLD
        ]
        dev_answers = {q.page for q in dev_questions}

        min_n_train_questions = [
            q for q in train_questions if q.page in min_n_answers
        ]

        all_common_train_dev = train_answers.intersection(dev_answers)
        min_common_train_dev = min_n_answers.intersection(dev_answers)

        all_train_answerable_questions = [
            q for q in train_questions if q.page in train_answers
        ]
        all_dev_answerable_questions = [
            q for q in dev_questions if q.page in train_answers
        ]

        min_train_answerable_questions = [
            q for q in train_questions if q.page in min_n_answers
        ]
        min_dev_answerable_questions = [
            q for q in dev_questions if q.page in min_n_answers
        ]

        # The next section of code generates the percent of questions correct by the number
        # of training examples.
        Row = namedtuple('Row', [
            'fold', 'guess', 'guesser', 'qnum', 'score', 'sentence', 'token',
            'correct', 'answerable_1', 'answerable_2', 'n_examples'
        ])

        train_example_count_lookup = seq(train_questions) \
            .group_by(lambda q: q.page) \
            .smap(lambda page, group: (page, len(group))) \
            .dict()

        def guess_to_row(*args):
            guess = args[1]
            qnum = args[3]
            answer = answer_lookup[qnum]

            return Row(
                *args, answer == guess, answer in train_answers, answer
                in min_n_answers, train_example_count_lookup[answer]
                if answer in train_example_count_lookup else 0)

        dev_data = seq(dev_guesses) \
            .smap(guess_to_row) \
            .group_by(lambda r: (r.qnum, r.sentence)) \
            .smap(lambda key, group: seq(group).max_by(lambda q: q.sentence)) \
            .to_pandas(columns=Row._fields)
        dev_data['correct_int'] = dev_data['correct'].astype(int)
        dev_data['ones'] = 1
        dev_counts = dev_data\
            .groupby('n_examples')\
            .agg({'correct_int': np.mean, 'ones': np.sum})\
            .reset_index()
        correct_by_n_count_plot('/tmp/dev_correct_by_count.png', dev_counts,
                                'Guesser Dev')
        n_train_vs_fold_plot('/tmp/n_train_vs_dev.png', dev_counts,
                             'Guesser Dev')

        with open(os.path.join(directory, 'guesser_report.pickle'), 'wb') as f:
            pickle.dump(
                {
                    'dev_accuracy': dev_summary_accuracy,
                    'guesser_name': self.display_name(),
                    'guesser_params': params
                }, f)

        output = safe_path(os.path.join(directory, 'guesser_report.pdf'))
        report = ReportGenerator('guesser.md')
        report.create(
            {
                'dev_recall_plot':
                '/tmp/dev_recall.png',
                'dev_accuracy_plot':
                '/tmp/dev_accuracy.png',
                'dev_accuracy':
                dev_summary_accuracy,
                'guesser_name':
                self.display_name(),
                'guesser_params':
                params,
                'n_answers_all_folds':
                len(all_answers),
                'n_total_train_questions':
                len(train_questions),
                'n_train_questions':
                len(min_n_train_questions),
                'n_dev_questions':
                len(dev_questions),
                'n_total_train_answers':
                len(train_answers),
                'n_train_answers':
                len(min_n_answers),
                'n_dev_answers':
                len(dev_answers),
                'all_n_common_train_dev':
                len(all_common_train_dev),
                'all_p_common_train_dev':
                len(all_common_train_dev) / max(1, len(dev_answers)),
                'min_n_common_train_dev':
                len(min_common_train_dev),
                'min_p_common_train_dev':
                len(min_common_train_dev) / max(1, len(dev_answers)),
                'all_n_answerable_train':
                len(all_train_answerable_questions),
                'all_p_answerable_train':
                len(all_train_answerable_questions) / len(train_questions),
                'all_n_answerable_dev':
                len(all_dev_answerable_questions),
                'all_p_answerable_dev':
                len(all_dev_answerable_questions) / len(dev_questions),
                'min_n_answerable_train':
                len(min_train_answerable_questions),
                'min_p_answerable_train':
                len(min_train_answerable_questions) / len(train_questions),
                'min_n_answerable_dev':
                len(min_dev_answerable_questions),
                'min_p_answerable_dev':
                len(min_dev_answerable_questions) / len(dev_questions),
                'dev_correct_by_count_plot':
                '/tmp/dev_correct_by_count.png',
                'n_train_vs_dev_plot':
                '/tmp/n_train_vs_dev.png',
            }, output)
示例#7
0
def create_report(classifier, class_type, question_db=None):
    if question_db is None:
        question_db = QuestionDatabase(QB_QUESTION_DB)

    all_questions = question_db.questions_with_pages()
    train = compute_features(all_questions, 'train', class_type)
    train_x = train['text']
    train_y = train['label']
    dev = compute_features(all_questions, 'dev', class_type)
    dev_x = dev['text']
    dev_y = dev['label']
    train_score = classifier.score(train_x, train_y)
    dev_score = classifier.score(dev_x, dev_y)

    true_labels = dev['label'].values
    predicted_labels = classifier.predict(dev_x)

    cf_norm = '/tmp/norm_confusion.png'
    plot_confusion(
        'Row Normalized Confusion Matrix of {} Classification'.format(
            class_type),
        true_labels,
        predicted_labels,
        normalized=True)
    plt.savefig(cf_norm, format='png', dpi=200)
    plt.clf()
    plt.cla()
    plt.close()

    cf_unnorm = '/tmp/unnorm_confusion.png'
    plot_confusion('Unnormalized Confusion Matrix of {} Classification'.format(
        class_type),
                   true_labels,
                   predicted_labels,
                   normalized=False)
    plt.savefig(cf_unnorm, format='png', dpi=200)

    correct_by_position = '/tmp/correct_by_position.png'

    dev['prediction'] = pd.Series(predicted_labels)
    dev['correct'] = dev['prediction'] == dev['label']
    pd.pivot_table(dev,
                   values=['text'],
                   index=['sentence', 'correct'],
                   aggfunc=lambda x: len(x)).unstack(fill_value=0).plot.bar(
                       title='Number of Questions Correct vs Sentence Number')
    plt.xlabel('Sentence Number')
    plt.ylabel('Number Correct')
    handles, labels = plt.gca().get_legend_handles_labels()
    plt.gca().legend(handles, ['Number Incorrect', 'Number Correct'])
    plt.savefig(correct_by_position, format='png', dpi=200)

    report = ReportGenerator(
        {
            'unnormalized_confusion_plot': cf_unnorm,
            'normalized_confusion_plot': cf_norm,
            'correct_by_position_plot': correct_by_position,
            'train_score': train_score,
            'dev_score': dev_score,
            'class_type': class_type
        }, 'classifier.md')
    output = safe_path(CLASSIFIER_REPORT_PATH.format(class_type))
    report.create(output)
    plt.clf()
    plt.cla()
    plt.close()
示例#8
0
if __name__ == '__main__':
    args = parse_args()
    if args.fold != None:
        folds = [args.fold]
    else:
        folds = c.BUZZ_FOLDS

    all_questions = QuestionDatabase().all_questions()
    answers = {k: v.page for k, v in all_questions.items()}

    variables = dict()
    for fold in folds:
        guesses_df = AbstractGuesser.load_guesses(bc.GUESSES_DIR, folds=[fold])

        buzzes_dir = bc.BUZZES_DIR.format(fold)
        with open(buzzes_dir, 'rb') as infile:
            buzzes = pickle.load(infile)
        log.info('Buzzes loaded from {}.'.format(buzzes_dir))

        checkpoint_dir = "output/summary/performance_{}.pkl".format(fold)
        plot_dir = "output/summary/performance_{}_his.png".format(fold)
        eop_output, his_output = generate(buzzes, answers, guesses_df, fold,
                                          checkpoint_dir, plot_dir)
        variables['eop_{}_output'.format(fold)] = eop_output
        variables['his_{}_output'.format(fold)] = his_output
        variables['his_{}_plot'.format(fold)] = plot_dir

    output = 'output/summary/new_performance.pdf'
    report_generator = ReportGenerator('new_performance.md')
    report_generator.create(variables, output)