def plot_start_time_day(): day_dict = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" ] metadata, connection = setup_database() tables = ["visits_10min"] for table in tables: consolidated = return_joined_table(table, metadata) for place_label in xrange(1, 11): query = select( [consolidated.c["visits_joined_" + table + "_time_start"]], consolidated.c["visits_joined_places_place_label"] == place_label) start_times = connection.execute(query).fetchall() days = [0 for i in xrange(7)] for start_time in start_times: current_day = datetime.fromtimestamp(start_time[0]).weekday() days[current_day] += 1 place_name = LABEL_PLACE_MAPPING[place_label] filename = place_name.replace(';', '_').replace(" ", "_") + "_day.png" draw_barplot(days, x_ticks=day_dict, xlabel="Day of week", ylabel="Number of Checkins", title="%s Visits by Days" % (place_name), save_as=os.path.join("/local", "thesis", "plots", filename))
def plot_working_groups(): metadata, connection = setup_database() tables = ["visits_10min"] for table in tables: consolidated = return_joined_table(table, metadata) for place_label in xrange(1, 11): working_checkins = [] for working_group in xrange(1, 9): query = select([func.count()], and_(consolidated.c["visits_joined_places_place_label"] == place_label, consolidated.c["demographics_working"] == working_group)) result = connection.execute(query).fetchall() working_checkins.append(result[0][0]) #fig, ax = plt.subplots() #ax.legend((xrange(1,9)), xrange(1, 9)) #rects = ax.bar(xrange(1, 9), working_checkins) #ax.set_ylabel("Count", fontsize=30, fontweight='bold') #ax.set_xlabel("Working groups", fontsize=30, fontweight='bold') #ax.set_title(LABEL_PLACE_MAPPING[place_label] + " Visits across Work Groups", fontsize=36, fontweight='bold') x_ticks = [WORKING_MAPPING[i] for i in xrange(1, 9)] #xticks_values = [textwrap.fill(text,7) for text in xticks_values] #ax.set_xticks([i + 0.3 for i in xrange(1, 9)]) #ax.set_xticklabels(xticks_values) #autolabel(rects, working_checkins) place_name = LABEL_PLACE_MAPPING[place_label] filename = place_name.replace(";", "_").replace(" ", "_") + "_" + "workgroup.png" #fig.set_size_inches((15, 12)) #fig.savefig(filename, dpi=100) #plt.close(fig) draw_barplot(working_checkins, x_ticks=[textwrap.fill(text,10) for text in x_ticks], xlabel="Working Status", ylabel="Visits", title=LABEL_PLACE_MAPPING[place_label] + " Visits across Employment Status", save_as=os.path.join("/local", "thesis", "plots", "working",filename), width=0.35)
def plot_demographics(): metadata, connection = setup_database() demographics = get_table("demographics", metadata) gender_query = select([demographics.c.gender, func.count(demographics.c.gender)]).group_by(demographics.c.gender) result = connection.execute(gender_query).fetchall() result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [GENDER_MAPPING[r[0]] for r in result] filename = "gender.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Gender", ylabel="Count", title="Gender Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) age_query = select([demographics.c.age_group, func.count(demographics.c.age_group)]).group_by(demographics.c.age_group) result = connection.execute(age_query).fetchall() result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [AGE_MAPPING[r[0]] for r in result] filename = "age.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Age Group", ylabel="Count", title="Age Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) working_query = select([demographics.c.working, func.count(demographics.c.working)]).group_by(demographics.c.working) result = connection.execute(working_query).fetchall() print result result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [WORKING_MAPPING[r[0]] for r in result] filename = "working.png" draw_barplot(vals, x_ticks=[textwrap.fill(text,10) for text in x_ticks], xlabel="Employment Status", ylabel="Count", title="Employment Status Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) bill_query = select([demographics.c.phone_bill, func.count(demographics.c.phone_bill)]).group_by(demographics.c.phone_bill) result = connection.execute(bill_query).fetchall() result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [BILL_MAPPING[r[0]] for r in result] filename = "bill.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Bill", ylabel="Count", title="Bill Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) bill_query = select([demographics.c.nb_12, demographics.c.nb_12_18, demographics.c.nb_18_30, demographics.c.nb_30_40, demographics.c.nb_40_50, demographics.c.nb_50_65, demographics.c.nb_65]) result = connection.execute(bill_query).fetchall() result = [sum([a for a in r if a is not None]) for r in result if r is not None] s = set(result) print s vals = [] x_ticks = [] for elem in s: if elem > 13: continue x_ticks.append(elem) vals.append(result.count(elem)) #vals = [r[1] for r in result] #x_ticks = [BILL_MAPPING[r[0]] for r in result] filename = "family.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Number of members in family", ylabel="Count", title="Number of Family Members Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)
def plot_start_time_hour(): metadata, connection = setup_database() tables = ["visits_10min"] for table in tables: consolidated = return_joined_table(table, metadata) for place_label in xrange(1, 11): query = select([consolidated.c["visits_joined_" + table + "_time_start"]], consolidated.c["visits_joined_places_place_label"] == place_label) start_times = connection.execute(query).fetchall() hours = [0 for i in xrange(24)] for start_time in start_times: hours[datetime.fromtimestamp(start_time[0]).hour] += 1 place_name = LABEL_PLACE_MAPPING[place_label] filename = place_name.replace(';', '_').replace(" ", "_") + "_hours.png" draw_barplot(hours, x_ticks=xrange(24), xlabel="Hour of Day", ylabel="Number of Checkins", title="%s Visits by Hours" % (place_name), save_as=os.path.join("/local", "thesis", "plots", filename))
def plot_start_time_day(): day_dict = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"] metadata, connection = setup_database() tables = ["visits_10min"] for table in tables: consolidated = return_joined_table(table, metadata) for place_label in xrange(1, 11): query = select([consolidated.c["visits_joined_" + table + "_time_start"]], consolidated.c["visits_joined_places_place_label"] == place_label) start_times = connection.execute(query).fetchall() days = [0 for i in xrange(7)] for start_time in start_times: current_day = datetime.fromtimestamp(start_time[0]).weekday() days[current_day] += 1 place_name = LABEL_PLACE_MAPPING[place_label] filename = place_name.replace(';', '_').replace(" ", "_") + "_day.png" draw_barplot(days, x_ticks=day_dict, xlabel="Day of week", ylabel="Number of Checkins", title="%s Visits by Days" % (place_name), save_as=os.path.join("/local", "thesis", "plots", filename))
def plot_working_groups(): metadata, connection = setup_database() tables = ["visits_10min"] for table in tables: consolidated = return_joined_table(table, metadata) for place_label in xrange(1, 11): working_checkins = [] for working_group in xrange(1, 9): query = select( [func.count()], and_( consolidated.c["visits_joined_places_place_label"] == place_label, consolidated.c["demographics_working"] == working_group)) result = connection.execute(query).fetchall() working_checkins.append(result[0][0]) #fig, ax = plt.subplots() #ax.legend((xrange(1,9)), xrange(1, 9)) #rects = ax.bar(xrange(1, 9), working_checkins) #ax.set_ylabel("Count", fontsize=30, fontweight='bold') #ax.set_xlabel("Working groups", fontsize=30, fontweight='bold') #ax.set_title(LABEL_PLACE_MAPPING[place_label] + " Visits across Work Groups", fontsize=36, fontweight='bold') x_ticks = [WORKING_MAPPING[i] for i in xrange(1, 9)] #xticks_values = [textwrap.fill(text,7) for text in xticks_values] #ax.set_xticks([i + 0.3 for i in xrange(1, 9)]) #ax.set_xticklabels(xticks_values) #autolabel(rects, working_checkins) place_name = LABEL_PLACE_MAPPING[place_label] filename = place_name.replace(";", "_").replace( " ", "_") + "_" + "workgroup.png" #fig.set_size_inches((15, 12)) #fig.savefig(filename, dpi=100) #plt.close(fig) draw_barplot(working_checkins, x_ticks=[textwrap.fill(text, 10) for text in x_ticks], xlabel="Working Status", ylabel="Visits", title=LABEL_PLACE_MAPPING[place_label] + " Visits across Employment Status", save_as=os.path.join("/local", "thesis", "plots", "working", filename), width=0.35)
def plot_start_time_hour(): metadata, connection = setup_database() tables = ["visits_10min"] for table in tables: consolidated = return_joined_table(table, metadata) for place_label in xrange(1, 11): query = select( [consolidated.c["visits_joined_" + table + "_time_start"]], consolidated.c["visits_joined_places_place_label"] == place_label) start_times = connection.execute(query).fetchall() hours = [0 for i in xrange(24)] for start_time in start_times: hours[datetime.fromtimestamp(start_time[0]).hour] += 1 place_name = LABEL_PLACE_MAPPING[place_label] filename = place_name.replace(';', '_').replace(" ", "_") + "_hours.png" draw_barplot(hours, x_ticks=xrange(24), xlabel="Hour of Day", ylabel="Number of Checkins", title="%s Visits by Hours" % (place_name), save_as=os.path.join("/local", "thesis", "plots", filename))
def plot_demographics(): metadata, connection = setup_database() demographics = get_table("demographics", metadata) gender_query = select( [demographics.c.gender, func.count(demographics.c.gender)]).group_by(demographics.c.gender) result = connection.execute(gender_query).fetchall() result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [GENDER_MAPPING[r[0]] for r in result] filename = "gender.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Gender", ylabel="Count", title="Gender Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) age_query = select([ demographics.c.age_group, func.count(demographics.c.age_group) ]).group_by(demographics.c.age_group) result = connection.execute(age_query).fetchall() result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [AGE_MAPPING[r[0]] for r in result] filename = "age.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Age Group", ylabel="Count", title="Age Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) working_query = select( [demographics.c.working, func.count(demographics.c.working)]).group_by(demographics.c.working) result = connection.execute(working_query).fetchall() print result result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [WORKING_MAPPING[r[0]] for r in result] filename = "working.png" draw_barplot(vals, x_ticks=[textwrap.fill(text, 10) for text in x_ticks], xlabel="Employment Status", ylabel="Count", title="Employment Status Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) bill_query = select([ demographics.c.phone_bill, func.count(demographics.c.phone_bill) ]).group_by(demographics.c.phone_bill) result = connection.execute(bill_query).fetchall() result = [r for r in result if r[0] is not None] result = sorted(result, key=lambda x: x[0]) vals = [r[1] for r in result] x_ticks = [BILL_MAPPING[r[0]] for r in result] filename = "bill.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Bill", ylabel="Count", title="Bill Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35) bill_query = select([ demographics.c.nb_12, demographics.c.nb_12_18, demographics.c.nb_18_30, demographics.c.nb_30_40, demographics.c.nb_40_50, demographics.c.nb_50_65, demographics.c.nb_65 ]) result = connection.execute(bill_query).fetchall() result = [ sum([a for a in r if a is not None]) for r in result if r is not None ] s = set(result) print s vals = [] x_ticks = [] for elem in s: if elem > 13: continue x_ticks.append(elem) vals.append(result.count(elem)) #vals = [r[1] for r in result] #x_ticks = [BILL_MAPPING[r[0]] for r in result] filename = "family.png" draw_barplot(vals, x_ticks=x_ticks, xlabel="Number of members in family", ylabel="Count", title="Number of Family Members Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)