def plot_first_session_vs_total(figure, answers, verbose=False):
    answers = decorator.session_number(answers)
    ax = figure.add_subplot(111)
    total = user.answers_per_user(answers)
    total_first = user.answers_per_user(answers[answers['session_number'] == 0])
    users = answers['user'].unique()
    vals = lambda x: [x.get(i, 0) for i in users]
    pairs = map(lambda (x, y): (x, y - x), sorted(zip(vals(total_first), vals(total))))
    total_first, total = zip(*pairs)
    ax.plot(total_first, total, 'o', alpha=0.3, linewidth=0, color='black')
    ax.set_xlabel('number of answers in the first session')
    ax.set_ylabel('number of answer at all')
    ax.set_xscale('log')
    ax.set_yscale('log')
示例#2
0
def hist_answers_per_user(figure,
                          answers,
                          group_column,
                          group_name_mapping=None,
                          verbose=False):
    ax = figure.add_subplot(111)
    to_plots = []
    group_names = []
    for group_name, group_data in answers.groupby(group_column):
        to_plots.append(numpy.log10(
            user.answers_per_user(group_data).values()))
        group_names.append(group_name)
    if group_name_mapping:
        group_names = [
            group_name_mapping[group_name] for group_name in group_names
        ]
    else:
        group_names = map(str, group_names)
    group_names, to_plots = zip(
        *sorted(zip(group_names, to_plots), key=lambda x: x[0]))
    ax.hist(
        to_plots,
        label=[
            group_name + ' (' + str(len(to_plot)) + ')'
            for group_name, to_plot in zip(group_names, to_plots)
        ],
        normed=True,
    )
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    ax.set_xlabel("Number of Answers (log)")
    ax.set_ylabel("Number of Users (normed)")
    figure.tight_layout()
示例#3
0
def boxplot_feedback_vs_number_of_answers(figure,
                                          feedback,
                                          answers,
                                          verbose=False):
    ax = figure.add_subplot(111)
    first_feedback = (feedback.sort('id').drop_duplicates('user').groupby(
        'user').apply(lambda x: x['value'].mean()).to_dict())
    labels = []
    to_plot = []
    answers = answers[answers['user'].isin(first_feedback.keys())]
    answers['temp_group'] = answers['user'].apply(lambda u: first_feedback[u])
    for group_name, group_data in answers.groupby('temp_group'):
        number = user.answers_per_user(group_data)
        to_plot.append(number.values())
        labels.append('%s (%s)' % (FEEDBACK_MAPPING[group_name], len(number)))
    del answers['temp_group']
    ax.set_yscale('log')
    ax.set_ylabel('Number of Answers')
    ax.set_xlabel('First Feedback')
    _boxplot(ax,
             to_plot,
             labels,
             name='Feedback vs Number of Answers',
             verbose=verbose)
    figure.tight_layout()
示例#4
0
def plot_first_session_vs_total(figure, answers, verbose=False):
    answers = decorator.session_number(answers)
    ax = figure.add_subplot(111)
    total = user.answers_per_user(answers)
    total_first = user.answers_per_user(
        answers[answers['session_number'] == 0])
    users = answers['user'].unique()
    vals = lambda x: [x.get(i, 0) for i in users]
    pairs = map(lambda (x, y): (x, y - x),
                sorted(zip(vals(total_first), vals(total))))
    total_first, total = zip(*pairs)
    ax.plot(total_first, total, 'o', alpha=0.3, linewidth=0, color='black')
    ax.set_xlabel('number of answers in the first session')
    ax.set_ylabel('number of answer at all')
    ax.set_xscale('log')
    ax.set_yscale('log')
示例#5
0
def plot_answers_vs_prior_skill(figure, answers, prior_skill, verbose=False):
    answers = decorator.session_number(answers)
    ax = figure.add_subplot(111)
    total = user.answers_per_user(answers)
    users = answers['user'].unique()
    vals = lambda x: [x[i] for i in users]
    total, prior_skill = zip(*sorted(zip(vals(total), vals(prior_skill))))
    ax.plot(total, prior_skill, 'o', alpha=0.3, linewidth=0, color='black')
    ax.set_xlabel('number of answer at all')
    ax.set_ylabel('prior skill')
    ax.set_xscale('log')
def plot_answers_vs_prior_skill(figure, answers, prior_skill, verbose=False):
    answers = decorator.session_number(answers)
    ax = figure.add_subplot(111)
    total = user.answers_per_user(answers)
    users = answers['user'].unique()
    vals = lambda x: [x[i] for i in users]
    total, prior_skill = zip(*sorted(zip(vals(total), vals(prior_skill))))
    ax.plot(total, prior_skill, 'o', alpha=0.3, linewidth=0, color='black')
    ax.set_xlabel('number of answer at all')
    ax.set_ylabel('prior skill')
    ax.set_xscale('log')
def plot_first_session_vs_session_number(figure, answers, verbose=False):
    answers = decorator.session_number(answers)
    ax = figure.add_subplot(111)
    ses = user.session_per_user(answers)
    total_first = user.answers_per_user(answers[answers['session_number'] == 0])
    users = answers['user'].unique()
    vals = lambda x: [x.get(i, 0) for i in users]
    total_first, ses = zip(*sorted(zip(vals(total_first), vals(ses))))
    ax.plot(total_first, ses, 'o', alpha=0.3, linewidth=0, color='black')
    ax.set_xlabel('number of answers in the first session')
    ax.set_ylabel('maximal session number')
    ax.set_xscale('log')
示例#8
0
def plot_first_session_vs_session_number(figure, answers, verbose=False):
    answers = decorator.session_number(answers)
    ax = figure.add_subplot(111)
    ses = user.session_per_user(answers)
    total_first = user.answers_per_user(
        answers[answers['session_number'] == 0])
    users = answers['user'].unique()
    vals = lambda x: [x.get(i, 0) for i in users]
    total_first, ses = zip(*sorted(zip(vals(total_first), vals(ses))))
    ax.plot(total_first, ses, 'o', alpha=0.3, linewidth=0, color='black')
    ax.set_xlabel('number of answers in the first session')
    ax.set_ylabel('maximal session number')
    ax.set_xscale('log')
def boxplot_answers_per_user(figure, answers, group_column, group_name_mapping=None, verbose=False):
    ax = figure.add_subplot(111)
    labels = []
    to_plot = []
    for group_name, group_data in answers.groupby(group_column):
        number = user.answers_per_user(group_data)
        to_plot.append(number.values())
        labels.append(
            str(group_name_mapping[group_name] if group_name_mapping else group_name) + '\n(' + str(len(number)) + ')')
    ax.set_yscale('log')
    ax.set_xlabel(group_name_mapping.get(group_column, group_column) if group_name_mapping else group_column)
    ax.set_ylabel('Number of Answers')
    ax.set_title('Implicit Feedback')
    _boxplot(ax, to_plot, labels, name='Answers per User', verbose=verbose)
    figure.tight_layout()
def boxplot_feedback_vs_number_of_answers(figure, feedback, answers, verbose=False):
    ax = figure.add_subplot(111)
    first_feedback = (feedback.sort('id').
        drop_duplicates('user').
        groupby('user').
        apply(lambda x: x['value'].mean()).to_dict())
    labels = []
    to_plot = []
    answers = answers[answers['user'].isin(first_feedback.keys())]
    answers['temp_group'] = answers['user'].apply(lambda u: first_feedback[u])
    for group_name, group_data in answers.groupby('temp_group'):
        number = user.answers_per_user(group_data)
        to_plot.append(number.values())
        labels.append('%s (%s)' % (FEEDBACK_MAPPING[group_name], len(number)))
    del answers['temp_group']
    ax.set_yscale('log')
    ax.set_ylabel('Number of Answers')
    ax.set_xlabel('First Feedback')
    _boxplot(ax, to_plot, labels, name='Feedback vs Number of Answers', verbose=verbose)
    figure.tight_layout()
def hist_answers_per_user(figure, answers, group_column, group_name_mapping=None, verbose=False):
    ax = figure.add_subplot(111)
    to_plots = []
    group_names = []
    for group_name, group_data in answers.groupby(group_column):
        to_plots.append(numpy.log10(user.answers_per_user(group_data).values()))
        group_names.append(group_name)
    if group_name_mapping:
        group_names = [group_name_mapping[group_name] for group_name in group_names]
    else:
        group_names = map(str, group_names)
    group_names, to_plots = zip(*sorted(zip(group_names, to_plots), key=lambda x: x[0]))
    ax.hist(
        to_plots,
        label=[group_name + ' (' + str(len(to_plot)) + ')' for group_name, to_plot in zip(group_names, to_plots)],
        normed=True,
        )
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    ax.set_xlabel("Number of Answers (log)")
    ax.set_ylabel("Number of Users (normed)")
    figure.tight_layout()
示例#12
0
def boxplot_answers_per_user(figure,
                             answers,
                             group_column,
                             group_name_mapping=None,
                             verbose=False):
    ax = figure.add_subplot(111)
    labels = []
    to_plot = []
    for group_name, group_data in answers.groupby(group_column):
        number = user.answers_per_user(group_data)
        to_plot.append(number.values())
        labels.append(
            str(group_name_mapping[group_name] if group_name_mapping else
                group_name) + '\n(' + str(len(number)) + ')')
    ax.set_yscale('log')
    ax.set_xlabel(
        group_name_mapping.get(group_column, group_column
                               ) if group_name_mapping else group_column)
    ax.set_ylabel('Number of Answers')
    ax.set_title('Implicit Feedback')
    _boxplot(ax, to_plot, labels, name='Answers per User', verbose=verbose)
    figure.tight_layout()