def plot_first_session_vs_total(figure, answers, verbose=False): answers = decorator.session_number(answers) ax = figure.add_subplot(111) total = user.answers_per_user(answers) total_first = user.answers_per_user(answers[answers['session_number'] == 0]) users = answers['user'].unique() vals = lambda x: [x.get(i, 0) for i in users] pairs = map(lambda (x, y): (x, y - x), sorted(zip(vals(total_first), vals(total)))) total_first, total = zip(*pairs) ax.plot(total_first, total, 'o', alpha=0.3, linewidth=0, color='black') ax.set_xlabel('number of answers in the first session') ax.set_ylabel('number of answer at all') ax.set_xscale('log') ax.set_yscale('log')
def hist_answers_per_user(figure, answers, group_column, group_name_mapping=None, verbose=False): ax = figure.add_subplot(111) to_plots = [] group_names = [] for group_name, group_data in answers.groupby(group_column): to_plots.append(numpy.log10( user.answers_per_user(group_data).values())) group_names.append(group_name) if group_name_mapping: group_names = [ group_name_mapping[group_name] for group_name in group_names ] else: group_names = map(str, group_names) group_names, to_plots = zip( *sorted(zip(group_names, to_plots), key=lambda x: x[0])) ax.hist( to_plots, label=[ group_name + ' (' + str(len(to_plot)) + ')' for group_name, to_plot in zip(group_names, to_plots) ], normed=True, ) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) ax.set_xlabel("Number of Answers (log)") ax.set_ylabel("Number of Users (normed)") figure.tight_layout()
def boxplot_feedback_vs_number_of_answers(figure, feedback, answers, verbose=False): ax = figure.add_subplot(111) first_feedback = (feedback.sort('id').drop_duplicates('user').groupby( 'user').apply(lambda x: x['value'].mean()).to_dict()) labels = [] to_plot = [] answers = answers[answers['user'].isin(first_feedback.keys())] answers['temp_group'] = answers['user'].apply(lambda u: first_feedback[u]) for group_name, group_data in answers.groupby('temp_group'): number = user.answers_per_user(group_data) to_plot.append(number.values()) labels.append('%s (%s)' % (FEEDBACK_MAPPING[group_name], len(number))) del answers['temp_group'] ax.set_yscale('log') ax.set_ylabel('Number of Answers') ax.set_xlabel('First Feedback') _boxplot(ax, to_plot, labels, name='Feedback vs Number of Answers', verbose=verbose) figure.tight_layout()
def plot_first_session_vs_total(figure, answers, verbose=False): answers = decorator.session_number(answers) ax = figure.add_subplot(111) total = user.answers_per_user(answers) total_first = user.answers_per_user( answers[answers['session_number'] == 0]) users = answers['user'].unique() vals = lambda x: [x.get(i, 0) for i in users] pairs = map(lambda (x, y): (x, y - x), sorted(zip(vals(total_first), vals(total)))) total_first, total = zip(*pairs) ax.plot(total_first, total, 'o', alpha=0.3, linewidth=0, color='black') ax.set_xlabel('number of answers in the first session') ax.set_ylabel('number of answer at all') ax.set_xscale('log') ax.set_yscale('log')
def plot_answers_vs_prior_skill(figure, answers, prior_skill, verbose=False): answers = decorator.session_number(answers) ax = figure.add_subplot(111) total = user.answers_per_user(answers) users = answers['user'].unique() vals = lambda x: [x[i] for i in users] total, prior_skill = zip(*sorted(zip(vals(total), vals(prior_skill)))) ax.plot(total, prior_skill, 'o', alpha=0.3, linewidth=0, color='black') ax.set_xlabel('number of answer at all') ax.set_ylabel('prior skill') ax.set_xscale('log')
def plot_first_session_vs_session_number(figure, answers, verbose=False): answers = decorator.session_number(answers) ax = figure.add_subplot(111) ses = user.session_per_user(answers) total_first = user.answers_per_user(answers[answers['session_number'] == 0]) users = answers['user'].unique() vals = lambda x: [x.get(i, 0) for i in users] total_first, ses = zip(*sorted(zip(vals(total_first), vals(ses)))) ax.plot(total_first, ses, 'o', alpha=0.3, linewidth=0, color='black') ax.set_xlabel('number of answers in the first session') ax.set_ylabel('maximal session number') ax.set_xscale('log')
def plot_first_session_vs_session_number(figure, answers, verbose=False): answers = decorator.session_number(answers) ax = figure.add_subplot(111) ses = user.session_per_user(answers) total_first = user.answers_per_user( answers[answers['session_number'] == 0]) users = answers['user'].unique() vals = lambda x: [x.get(i, 0) for i in users] total_first, ses = zip(*sorted(zip(vals(total_first), vals(ses)))) ax.plot(total_first, ses, 'o', alpha=0.3, linewidth=0, color='black') ax.set_xlabel('number of answers in the first session') ax.set_ylabel('maximal session number') ax.set_xscale('log')
def boxplot_answers_per_user(figure, answers, group_column, group_name_mapping=None, verbose=False): ax = figure.add_subplot(111) labels = [] to_plot = [] for group_name, group_data in answers.groupby(group_column): number = user.answers_per_user(group_data) to_plot.append(number.values()) labels.append( str(group_name_mapping[group_name] if group_name_mapping else group_name) + '\n(' + str(len(number)) + ')') ax.set_yscale('log') ax.set_xlabel(group_name_mapping.get(group_column, group_column) if group_name_mapping else group_column) ax.set_ylabel('Number of Answers') ax.set_title('Implicit Feedback') _boxplot(ax, to_plot, labels, name='Answers per User', verbose=verbose) figure.tight_layout()
def boxplot_feedback_vs_number_of_answers(figure, feedback, answers, verbose=False): ax = figure.add_subplot(111) first_feedback = (feedback.sort('id'). drop_duplicates('user'). groupby('user'). apply(lambda x: x['value'].mean()).to_dict()) labels = [] to_plot = [] answers = answers[answers['user'].isin(first_feedback.keys())] answers['temp_group'] = answers['user'].apply(lambda u: first_feedback[u]) for group_name, group_data in answers.groupby('temp_group'): number = user.answers_per_user(group_data) to_plot.append(number.values()) labels.append('%s (%s)' % (FEEDBACK_MAPPING[group_name], len(number))) del answers['temp_group'] ax.set_yscale('log') ax.set_ylabel('Number of Answers') ax.set_xlabel('First Feedback') _boxplot(ax, to_plot, labels, name='Feedback vs Number of Answers', verbose=verbose) figure.tight_layout()
def hist_answers_per_user(figure, answers, group_column, group_name_mapping=None, verbose=False): ax = figure.add_subplot(111) to_plots = [] group_names = [] for group_name, group_data in answers.groupby(group_column): to_plots.append(numpy.log10(user.answers_per_user(group_data).values())) group_names.append(group_name) if group_name_mapping: group_names = [group_name_mapping[group_name] for group_name in group_names] else: group_names = map(str, group_names) group_names, to_plots = zip(*sorted(zip(group_names, to_plots), key=lambda x: x[0])) ax.hist( to_plots, label=[group_name + ' (' + str(len(to_plot)) + ')' for group_name, to_plot in zip(group_names, to_plots)], normed=True, ) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) ax.set_xlabel("Number of Answers (log)") ax.set_ylabel("Number of Users (normed)") figure.tight_layout()
def boxplot_answers_per_user(figure, answers, group_column, group_name_mapping=None, verbose=False): ax = figure.add_subplot(111) labels = [] to_plot = [] for group_name, group_data in answers.groupby(group_column): number = user.answers_per_user(group_data) to_plot.append(number.values()) labels.append( str(group_name_mapping[group_name] if group_name_mapping else group_name) + '\n(' + str(len(number)) + ')') ax.set_yscale('log') ax.set_xlabel( group_name_mapping.get(group_column, group_column ) if group_name_mapping else group_column) ax.set_ylabel('Number of Answers') ax.set_title('Implicit Feedback') _boxplot(ax, to_plot, labels, name='Answers per User', verbose=verbose) figure.tight_layout()