def plot_correlations(results, data, pdf): print("Saving {} result plots to pdf.".format(len(results))) for result in results: print('.'), sys.stdout.flush() q1, q2 = result['questions'] title_1 = tools.get_question_title(q1, data) title_2 = tools.get_question_title(q2, data) x_raw = tools.get_responses_to_number(q1, data) y_raw = tools.get_responses_to_number(q2, data) x,y = tools.extract_vals_from_responses(x_raw, y_raw) invalid_x, invalid_y = tools.get_indexes_of_invalid_repsonse_types( [int], x, y ) invalid_all = tools.merge_invalid_indexes(invalid_x, invalid_y) x, y = tools.remove_entries_at_indexes(invalid_all, x, y) # Calculate the point density xy = np.vstack([x,y]) try: z = stats.gaussian_kde(xy)(xy) except Exception as e: print(xy) raise e size = 5000*z final_size = [] for s in size: final_size.append(max(s,60)) # Calculate axis numbers x_range = (min(x)-1, max(x)+1) y_range = (min(y)-1, max(y)+1) # generate data for best fit line slope = result['slope'] intercept = result['intercept'] x_fit_points = x_range y_fit_points = (x_range[0]*slope + intercept, x_range[1]*slope + intercept) fig = plt.figure() ax = fig.add_subplot(1,1,1) ax.set_title("{} vs {}\nr_squared = {:.4f}".format(title_1, title_2, result['r_squared'])) ax.set_xlabel("{} (Q{})".format(title_1, q1)) ax.set_ylabel("{} (Q{})".format(title_2, q2)) ax.scatter(x, y, c=z, s=final_size, edgecolor='') ax.plot(x_fit_points, y_fit_points, '-') pdf.savefig(fig) plt.close(fig) print("\nDone saving plots to pdf.\n")
def print_interesting_correlations(interesting_correlations, data): for result in interesting_correlations: num_1, num_2 = result["questions"] title_1, title_2 = (tools.get_question_title(num_1, data), tools.get_question_title(num_2, data)) print(("Notable correlation between:\n" + "\t'{}'({})\n" + "\t'{}'({})\n" + "\tr_squared = {:.3f}") .format( title_1, num_1, title_2, num_2, result["r_squared"] )) print("\n")