Exemplo n.º 1
0
def plot_age_groups():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)

        for place_label in xrange(1, 11):
            age_checkins = []
            for age_group in xrange(1, 9):
                query = select([func.count()], and_(consolidated.c["visits_joined_places_place_label"] == place_label, consolidated.c["demographics_age_group"] == age_group))
                result = connection.execute(query).fetchall()
                age_checkins.append(result[0][0])
            fig, ax = plt.subplots()
            rects = ax.bar(xrange(1, 9), age_checkins)
            ax.set_ylabel("Count", fontsize=30, fontweight='bold')
            ax.set_xlabel("Age groups", fontsize=30, fontweight='bold')
            ax.set_title(LABEL_PLACE_MAPPING[place_label] + " Visits across Age Groups", fontsize=36, fontweight='bold')
            xticks_values = [AGE_MAPPING[i] for i in xrange(1, 9)]
            ax.set_xticks([i + 0.35 for i in xrange(1, 9)])
            ax.set_xticklabels(xticks_values)
            #autolabel(rects, age_checkins)
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(";", "_").replace(" ", "_")  +  "_" + "age.png"
            fig.set_size_inches((15, 12))
            fig.savefig(filename, dpi=100)
            plt.close(fig)
Exemplo n.º 2
0
def plot_start_time_day():
    day_dict = [
        "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
        "Sunday"
    ]
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        for place_label in xrange(1, 11):
            query = select(
                [consolidated.c["visits_joined_" + table + "_time_start"]],
                consolidated.c["visits_joined_places_place_label"] ==
                place_label)
            start_times = connection.execute(query).fetchall()
            days = [0 for i in xrange(7)]
            for start_time in start_times:
                current_day = datetime.fromtimestamp(start_time[0]).weekday()
                days[current_day] += 1
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(';', '_').replace(" ",
                                                            "_") + "_day.png"
            draw_barplot(days,
                         x_ticks=day_dict,
                         xlabel="Day of week",
                         ylabel="Number of Checkins",
                         title="%s Visits by Days" % (place_name),
                         save_as=os.path.join("/local", "thesis", "plots",
                                              filename))
Exemplo n.º 3
0
def plot_age_groups():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)

        for place_label in xrange(1, 11):
            age_checkins = []
            for age_group in xrange(1, 9):
                query = select(
                    [func.count()],
                    and_(
                        consolidated.c["visits_joined_places_place_label"] ==
                        place_label,
                        consolidated.c["demographics_age_group"] == age_group))
                result = connection.execute(query).fetchall()
                age_checkins.append(result[0][0])
            fig, ax = plt.subplots()
            rects = ax.bar(xrange(1, 9), age_checkins)
            ax.set_ylabel("Count", fontsize=30, fontweight='bold')
            ax.set_xlabel("Age groups", fontsize=30, fontweight='bold')
            ax.set_title(LABEL_PLACE_MAPPING[place_label] +
                         " Visits across Age Groups",
                         fontsize=36,
                         fontweight='bold')
            xticks_values = [AGE_MAPPING[i] for i in xrange(1, 9)]
            ax.set_xticks([i + 0.35 for i in xrange(1, 9)])
            ax.set_xticklabels(xticks_values)
            #autolabel(rects, age_checkins)
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(";", "_").replace(
                " ", "_") + "_" + "age.png"
            fig.set_size_inches((15, 12))
            fig.savefig(filename, dpi=100)
            plt.close(fig)
Exemplo n.º 4
0
def plot_gender():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        print connection.execute(select([func.count()], consolidated.c["demographics_gender"] == 2)).fetchall()
        gender_checkins = []
        for gender in (0, 1):
            gender_checkins.append([])
            for place_label in xrange(1, 11):
                query = select([func.count()], and_(consolidated.c["visits_joined_places_place_label"] == place_label, consolidated.c["demographics_gender"] == gender + 1))
                result = connection.execute(query).fetchall()
                gender_checkins[gender].append(result[0][0])
        fig, ax = plt.subplots()
        width = 0.35
        rects1 = ax.bar(xrange(1, 11), gender_checkins[0], width, color='r')
        rects2 = ax.bar([i + width for i in xrange(1, 11)], gender_checkins[1], width, color='g')
        ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))
        ax.set_ylabel("Count", fontsize = 24, fontweight = 'bold')
        
        ax.set_xlabel("Place Category", fontsize=24, fontweight = 'bold')
        ax.set_title("Visits Across Gender", fontsize=32, fontweight='bold')
        xticks_values = [LABEL_PLACE_MAPPING[i] for i in xrange(1, 11)]
        xticks_values = [textwrap.fill(text,10) for text in xticks_values]
        ax.set_xticks([i + width for i in xrange(1, 11)])
        ax.set_xticklabels(xticks_values)
        #autolabel(rects1, gender_checkins[0])
        #autolabel(rects2, gender_checkins[1])
        plt.show()
Exemplo n.º 5
0
def plot_working_groups():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        for place_label in xrange(1, 11):
            working_checkins = []
            for working_group in xrange(1, 9):
                query = select([func.count()], and_(consolidated.c["visits_joined_places_place_label"] == place_label, consolidated.c["demographics_working"] == working_group))
                result = connection.execute(query).fetchall()
                working_checkins.append(result[0][0])
            #fig, ax = plt.subplots()
            #ax.legend((xrange(1,9)), xrange(1, 9))
            #rects = ax.bar(xrange(1, 9), working_checkins)
            #ax.set_ylabel("Count", fontsize=30, fontweight='bold')
            #ax.set_xlabel("Working groups", fontsize=30, fontweight='bold')
            #ax.set_title(LABEL_PLACE_MAPPING[place_label] + " Visits across Work Groups", fontsize=36, fontweight='bold')
            x_ticks = [WORKING_MAPPING[i] for i in xrange(1, 9)]
            #xticks_values = [textwrap.fill(text,7) for text in xticks_values]

            #ax.set_xticks([i + 0.3 for i in xrange(1, 9)])
            #ax.set_xticklabels(xticks_values)
            #autolabel(rects, working_checkins)
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(";", "_").replace(" ", "_") + "_" + "workgroup.png"
            #fig.set_size_inches((15, 12))
            #fig.savefig(filename, dpi=100)
            #plt.close(fig)
            draw_barplot(working_checkins, x_ticks=[textwrap.fill(text,10) for text in x_ticks], xlabel="Working Status", ylabel="Visits", title=LABEL_PLACE_MAPPING[place_label] + " Visits across Employment Status", save_as=os.path.join("/local", "thesis", "plots", "working",filename), width=0.35)
Exemplo n.º 6
0
def plot_demographics():
    metadata, connection = setup_database()
    demographics = get_table("demographics", metadata)

    gender_query = select([demographics.c.gender, func.count(demographics.c.gender)]).group_by(demographics.c.gender)
    result = connection.execute(gender_query).fetchall()
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [GENDER_MAPPING[r[0]] for r in result]
    filename = "gender.png"
    draw_barplot(vals, x_ticks=x_ticks, xlabel="Gender", ylabel="Count", title="Gender Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)

    age_query = select([demographics.c.age_group, func.count(demographics.c.age_group)]).group_by(demographics.c.age_group)
    result = connection.execute(age_query).fetchall()
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [AGE_MAPPING[r[0]] for r in result]
    filename = "age.png"
    draw_barplot(vals, x_ticks=x_ticks, xlabel="Age Group", ylabel="Count", title="Age Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)

    working_query = select([demographics.c.working, func.count(demographics.c.working)]).group_by(demographics.c.working)
    result = connection.execute(working_query).fetchall()
    print result
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [WORKING_MAPPING[r[0]] for r in result]
    filename = "working.png"
    draw_barplot(vals, x_ticks=[textwrap.fill(text,10) for text in x_ticks], xlabel="Employment Status", ylabel="Count", title="Employment Status Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)

    bill_query = select([demographics.c.phone_bill, func.count(demographics.c.phone_bill)]).group_by(demographics.c.phone_bill)
    result = connection.execute(bill_query).fetchall()
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [BILL_MAPPING[r[0]] for r in result]
    filename = "bill.png"
    draw_barplot(vals, x_ticks=x_ticks, xlabel="Bill", ylabel="Count", title="Bill Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)

    bill_query = select([demographics.c.nb_12, demographics.c.nb_12_18, demographics.c.nb_18_30, demographics.c.nb_30_40, demographics.c.nb_40_50, demographics.c.nb_50_65, demographics.c.nb_65])
    result = connection.execute(bill_query).fetchall()
    result = [sum([a for a in r if a is not None]) for r in result if r is not None]
    s = set(result)
    print s
    vals = []
    x_ticks = []
    for elem in s:
        if elem > 13:
            continue
        x_ticks.append(elem)
        vals.append(result.count(elem))
    #vals = [r[1] for r in result]
    #x_ticks = [BILL_MAPPING[r[0]] for r in result]
    filename = "family.png"
    draw_barplot(vals, x_ticks=x_ticks, xlabel="Number of members in family", ylabel="Count", title="Number of Family Members Distribution", save_as=os.path.join("/local", "thesis", "plots", filename), width=0.35)
Exemplo n.º 7
0
def plot_start_time_hour():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        for place_label in xrange(1, 11):
            query = select([consolidated.c["visits_joined_" + table + "_time_start"]], consolidated.c["visits_joined_places_place_label"] == place_label)
            start_times = connection.execute(query).fetchall()
            hours = [0 for i in xrange(24)]
            for start_time in start_times:
                hours[datetime.fromtimestamp(start_time[0]).hour] += 1
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(';', '_').replace(" ", "_") + "_hours.png"
            draw_barplot(hours, x_ticks=xrange(24), xlabel="Hour of Day", ylabel="Number of Checkins", title="%s Visits by Hours" % (place_name), save_as=os.path.join("/local", "thesis", "plots", filename))
Exemplo n.º 8
0
def plot_start_time_day():
    day_dict = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        for place_label in xrange(1, 11):
            query = select([consolidated.c["visits_joined_" + table + "_time_start"]], consolidated.c["visits_joined_places_place_label"] == place_label)
            start_times = connection.execute(query).fetchall()
            days = [0 for i in xrange(7)]
            for start_time in start_times:
                current_day = datetime.fromtimestamp(start_time[0]).weekday()
                days[current_day] += 1
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(';', '_').replace(" ", "_") + "_day.png"
            draw_barplot(days, x_ticks=day_dict, xlabel="Day of week", ylabel="Number of Checkins", title="%s Visits by Days" % (place_name), save_as=os.path.join("/local", "thesis", "plots", filename))
Exemplo n.º 9
0
def plot_working_groups():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        for place_label in xrange(1, 11):
            working_checkins = []
            for working_group in xrange(1, 9):
                query = select(
                    [func.count()],
                    and_(
                        consolidated.c["visits_joined_places_place_label"] ==
                        place_label, consolidated.c["demographics_working"] ==
                        working_group))
                result = connection.execute(query).fetchall()
                working_checkins.append(result[0][0])
            #fig, ax = plt.subplots()
            #ax.legend((xrange(1,9)), xrange(1, 9))
            #rects = ax.bar(xrange(1, 9), working_checkins)
            #ax.set_ylabel("Count", fontsize=30, fontweight='bold')
            #ax.set_xlabel("Working groups", fontsize=30, fontweight='bold')
            #ax.set_title(LABEL_PLACE_MAPPING[place_label] + " Visits across Work Groups", fontsize=36, fontweight='bold')
            x_ticks = [WORKING_MAPPING[i] for i in xrange(1, 9)]
            #xticks_values = [textwrap.fill(text,7) for text in xticks_values]

            #ax.set_xticks([i + 0.3 for i in xrange(1, 9)])
            #ax.set_xticklabels(xticks_values)
            #autolabel(rects, working_checkins)
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(";", "_").replace(
                " ", "_") + "_" + "workgroup.png"
            #fig.set_size_inches((15, 12))
            #fig.savefig(filename, dpi=100)
            #plt.close(fig)
            draw_barplot(working_checkins,
                         x_ticks=[textwrap.fill(text, 10) for text in x_ticks],
                         xlabel="Working Status",
                         ylabel="Visits",
                         title=LABEL_PLACE_MAPPING[place_label] +
                         " Visits across Employment Status",
                         save_as=os.path.join("/local", "thesis", "plots",
                                              "working", filename),
                         width=0.35)
Exemplo n.º 10
0
def plot_gender():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        print connection.execute(
            select([func.count()],
                   consolidated.c["demographics_gender"] == 2)).fetchall()
        gender_checkins = []
        for gender in (0, 1):
            gender_checkins.append([])
            for place_label in xrange(1, 11):
                query = select(
                    [func.count()],
                    and_(
                        consolidated.c["visits_joined_places_place_label"] ==
                        place_label,
                        consolidated.c["demographics_gender"] == gender + 1))
                result = connection.execute(query).fetchall()
                gender_checkins[gender].append(result[0][0])
        fig, ax = plt.subplots()
        width = 0.35
        rects1 = ax.bar(xrange(1, 11), gender_checkins[0], width, color='r')
        rects2 = ax.bar([i + width for i in xrange(1, 11)],
                        gender_checkins[1],
                        width,
                        color='g')
        ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))
        ax.set_ylabel("Count", fontsize=24, fontweight='bold')

        ax.set_xlabel("Place Category", fontsize=24, fontweight='bold')
        ax.set_title("Visits Across Gender", fontsize=32, fontweight='bold')
        xticks_values = [LABEL_PLACE_MAPPING[i] for i in xrange(1, 11)]
        xticks_values = [textwrap.fill(text, 10) for text in xticks_values]
        ax.set_xticks([i + width for i in xrange(1, 11)])
        ax.set_xticklabels(xticks_values)
        #autolabel(rects1, gender_checkins[0])
        #autolabel(rects2, gender_checkins[1])
        plt.show()
Exemplo n.º 11
0
def plot_start_time_hour():
    metadata, connection = setup_database()
    tables = ["visits_10min"]
    for table in tables:
        consolidated = return_joined_table(table, metadata)
        for place_label in xrange(1, 11):
            query = select(
                [consolidated.c["visits_joined_" + table + "_time_start"]],
                consolidated.c["visits_joined_places_place_label"] ==
                place_label)
            start_times = connection.execute(query).fetchall()
            hours = [0 for i in xrange(24)]
            for start_time in start_times:
                hours[datetime.fromtimestamp(start_time[0]).hour] += 1
            place_name = LABEL_PLACE_MAPPING[place_label]
            filename = place_name.replace(';', '_').replace(" ",
                                                            "_") + "_hours.png"
            draw_barplot(hours,
                         x_ticks=xrange(24),
                         xlabel="Hour of Day",
                         ylabel="Number of Checkins",
                         title="%s Visits by Hours" % (place_name),
                         save_as=os.path.join("/local", "thesis", "plots",
                                              filename))
Exemplo n.º 12
0
import sys
import csv
import datetime
import math
from spams.db.utils import setup_database, get_table
from sqlalchemy.sql import select
from sqlalchemy import and_, func
from sklearn.neighbors import DistanceMetric

metadata, connection = setup_database()
places_location = get_table("places_location", metadata)
visits_10min = get_table("visits_10min", metadata)
records = get_table("records", metadata)


def relative_frequency(place, user):
    count_place = connection.execute(
        select([visits_10min.c.userid, visits_10min.c.placeid]).where(
            and_(visits_10min.c.userid == user,
                 visits_10min.c.placeid == place))).rowcount
    count_all_places = connection.execute(
        select([visits_10min.c.userid
                ]).where(visits_10min.c.userid == user)).rowcount
    return (count_place * 1.0) / (count_all_places * 1.0)


def distance_from_most_visited_place(place, user):
    q = select([func.count(), visits_10min.c.placeid
                ]).where(visits_10min.c.userid == user).group_by(
                    visits_10min.c.placeid).order_by(func.count().desc())
    most_visited_places = [r[1] for r in connection.execute(q).fetchall()]
Exemplo n.º 13
0
import csv
import datetime
import math
from spams.db.utils import setup_database, get_table
from sqlalchemy.sql import select
from sqlalchemy import and_, func
from sklearn.neighbors import DistanceMetric
import numpy as np
from itertools import izip
from extract_features import write_features_to_csv

import logging
logging.basicConfig(filename='motion.log',level=logging.DEBUG)


metadata, connection = setup_database()
places_location = get_table("places_location", metadata)
visits_10min = get_table("visits_10min", metadata)
records = get_table("records", metadata)
accel = get_table("accel", metadata)

def get_features(data):
    X = [d[0] for d in data] 
    Y = [d[1] for d in data]
    Z = [d[2] for d in data]
    x_mean = np.mean(X)
    y_mean = np.mean(Y)
    z_mean = np.mean(Z)
    x_var  = np.var(X)
    y_var =  np.var(Y)
    z_var =  np.var(Z)
Exemplo n.º 14
0
def plot_demographics():
    metadata, connection = setup_database()
    demographics = get_table("demographics", metadata)

    gender_query = select(
        [demographics.c.gender,
         func.count(demographics.c.gender)]).group_by(demographics.c.gender)
    result = connection.execute(gender_query).fetchall()
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [GENDER_MAPPING[r[0]] for r in result]
    filename = "gender.png"
    draw_barplot(vals,
                 x_ticks=x_ticks,
                 xlabel="Gender",
                 ylabel="Count",
                 title="Gender Distribution",
                 save_as=os.path.join("/local", "thesis", "plots", filename),
                 width=0.35)

    age_query = select([
        demographics.c.age_group,
        func.count(demographics.c.age_group)
    ]).group_by(demographics.c.age_group)
    result = connection.execute(age_query).fetchall()
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [AGE_MAPPING[r[0]] for r in result]
    filename = "age.png"
    draw_barplot(vals,
                 x_ticks=x_ticks,
                 xlabel="Age Group",
                 ylabel="Count",
                 title="Age Distribution",
                 save_as=os.path.join("/local", "thesis", "plots", filename),
                 width=0.35)

    working_query = select(
        [demographics.c.working,
         func.count(demographics.c.working)]).group_by(demographics.c.working)
    result = connection.execute(working_query).fetchall()
    print result
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [WORKING_MAPPING[r[0]] for r in result]
    filename = "working.png"
    draw_barplot(vals,
                 x_ticks=[textwrap.fill(text, 10) for text in x_ticks],
                 xlabel="Employment Status",
                 ylabel="Count",
                 title="Employment Status Distribution",
                 save_as=os.path.join("/local", "thesis", "plots", filename),
                 width=0.35)

    bill_query = select([
        demographics.c.phone_bill,
        func.count(demographics.c.phone_bill)
    ]).group_by(demographics.c.phone_bill)
    result = connection.execute(bill_query).fetchall()
    result = [r for r in result if r[0] is not None]
    result = sorted(result, key=lambda x: x[0])
    vals = [r[1] for r in result]
    x_ticks = [BILL_MAPPING[r[0]] for r in result]
    filename = "bill.png"
    draw_barplot(vals,
                 x_ticks=x_ticks,
                 xlabel="Bill",
                 ylabel="Count",
                 title="Bill Distribution",
                 save_as=os.path.join("/local", "thesis", "plots", filename),
                 width=0.35)

    bill_query = select([
        demographics.c.nb_12, demographics.c.nb_12_18, demographics.c.nb_18_30,
        demographics.c.nb_30_40, demographics.c.nb_40_50,
        demographics.c.nb_50_65, demographics.c.nb_65
    ])
    result = connection.execute(bill_query).fetchall()
    result = [
        sum([a for a in r if a is not None]) for r in result if r is not None
    ]
    s = set(result)
    print s
    vals = []
    x_ticks = []
    for elem in s:
        if elem > 13:
            continue
        x_ticks.append(elem)
        vals.append(result.count(elem))
    #vals = [r[1] for r in result]
    #x_ticks = [BILL_MAPPING[r[0]] for r in result]
    filename = "family.png"
    draw_barplot(vals,
                 x_ticks=x_ticks,
                 xlabel="Number of members in family",
                 ylabel="Count",
                 title="Number of Family Members Distribution",
                 save_as=os.path.join("/local", "thesis", "plots", filename),
                 width=0.35)