from matplotlib import pyplot as plt from matplotlib import pylab import seaborn as sns # this code from passing_vs_notpassing import print_columns from passing_vs_notpassing import Statistics from utilities import list_to_account_dict from utilities import read_pickle_3 from utilities import total_engagement_attribute #pylab.rcParams['figure.figsize'] = (10.0, 8.0) # plt.style.use('ggplot') # these are total_minutes_visited passed = read_pickle_3('passed') distinctive = read_pickle_3('distinctive') non_passing = read_pickle_3('non_passing') paid_engagements = read_pickle_3('paid_engagements') paid_submissions = read_pickle_3('paid_submissions') passing_submissions = read_pickle_3('passing_submissions') passing_engagement = read_pickle_3('passing_engagement') passed_engagement = read_pickle_3('passed_engagement') distinctive_engagement = read_pickle_3('distinctive_engagement') non_passing_engagement = read_pickle_3('non_passing_engagement') subway_submissions = read_pickle_3('subway_submissions') passed = pandas.DataFrame({'passed': passed})
# python standard library from collections import namedtuple import csv import pickle # third-party import numpy # this code from utilities import read_pickle_3, write_pickle PWEAVE = __name__ in ('builtins', '__builtin__') passed_engagement = read_pickle_3('passed_engagement') passing_engagement = read_pickle_3('passing_engagement') distinctive_engagement = read_pickle_3('distinctive_engagement') non_passing_engagement = read_pickle_3('non_passing_engagement') for collection in (passed_engagement, passing_engagement, distinctive_engagement, non_passing_engagement): assert b'total_minutes_visited' in collection[0],\ "'total_minutes_visited' not in {0}".format(collection[0]) def get_columns(engagements, column): return numpy.array([engagement[column] for engagement in engagements]) Quartiles = namedtuple("Quartiles", 'q1 median q3 iqr'.split()) def quartiles(array): q1 = numpy.percentile(array, 25) median = numpy.median(array) q3 = numpy.percentile(array, 75)