Пример #1
0
def user_gps_records():
    my_dao = dbdao.DBDAO()
    user_df = my_dao.users_df()
    my_dao.close_connection()
    count = 0

    for userid in user_df["userid"]:
        count += 1
        my_dao = dbdao.DBDAO()
        print("\n")
        print("USERID:", userid)

        if not os.path.isfile("outputs/user_gps/" + str(userid) + "_gps.csv"):
            my_dao = dbdao.DBDAO()
            result = my_dao.records_join_df(join_to_table=RecordType.GPS.value,
                                            right_cols=["latitude", "longitude", "speed", "horizontal_accuracy",
                                                        "vertical_accuracy", "speed_accuracy", "altitude", "horizontal_dop", "vertical_dop"],
                                            userids=[userid], verbose=True)
            result.to_csv("outputs/user_gps/" + str(userid) + "_gps.csv", index=False)

            if len(result) > 5:
                print(result.sample(5))
            my_dao.close_connection()

        if not os.path.isfile("outputs/user_gpswlan/" + str(userid) + "_gpswlan.csv"):
            my_dao = dbdao.DBDAO()
            result = my_dao.records_join_df(join_to_table=RecordType.GPSWLAN.value,
                                            right_cols=["latitude", "longitude"],
                                            userids=[userid], verbose=True)
            result.to_csv("outputs/user_gpswlan/" + str(userid) + "_gpswlan.csv", index=False)

            if len(result) > 5:
                print(result.sample(5))
            my_dao.close_connection()


        if not os.path.isfile("outputs/user_accel/" + str(userid) + "_accel.csv"):
            my_dao = dbdao.DBDAO()
            result = my_dao.records_join_df(join_to_table=RecordType.ACCEL.value,
                                            right_cols=["start", "stop", "avdelt", "data"],
                                            userids=[userid], verbose=True)
            result.to_csv("outputs/user_accel/" + str(userid) + "_accel.csv", index=False)

            if len(result) > 5:
                print(result.sample(5))
            my_dao.close_connection()

        print("Instances:", count, "out of", len(user_df["userid"]))
        print("--")
Пример #2
0
def time_resolution_gps(userids=None):
    if userids is None:
        my_dao = dbdao.DBDAO()
        user_df = my_dao.users_df()
        userids = user_df["userid"]
        my_dao.close_connection()

    time_diffs_list_dict = []

    for userid in userids:
        print(userid)
        try:
            user_gps_df = pd.read_csv("outputs/user_gps/" + str(userid) + "_gps.csv")
            user_gps_df = user_gps_df.sort_values(by="local_time")
            user_time = user_gps_df["local_time"][1:len(user_gps_df)].reset_index(drop=True)
            user_time_prev = user_gps_df["local_time"][0:len(user_gps_df) - 1].reset_index(drop=True)

            diff = user_time - user_time_prev

            time_diffs_list_dict.append({"userid": userid, "time_diff_percentiles": quantiles(diff)})

        except pd.errors.EmptyDataError:
            print("Empty CSV")
        print("")

    pd.DataFrame(time_diffs_list_dict).to_csv("outputs/time_resolution_gps.csv", index=False)
Пример #3
0
def gps_accuracy(userids=None):
    if userids is None:
        my_dao = dbdao.DBDAO()
        user_df = my_dao.users_df()
        userids = user_df["userid"]
        my_dao.close_connection()

    horizontal_accuracy_list_dict = []

    for userid in userids:
        print(userid)
        try:
            user_gps_df = pd.read_csv("outputs/user_gps/" + str(userid) + "_gps.csv")
            user_gps_df = user_gps_df.sort_values(by="local_time").drop_duplicates()

            sp = quantiles(user_gps_df["horizontal_accuracy"])
            sp["userid"] = userid

            if user_gps_df["horizontal_accuracy"].count() > 0:
                n = len(user_gps_df["horizontal_accuracy"])
                valid_values = float(user_gps_df["horizontal_accuracy"].count())
                sp["nan_proportion"] = (n - valid_values) / (n)
            else:
                sp["nan_proportion"] = 0

            horizontal_accuracy_list_dict.append(sp)

        except pd.errors.EmptyDataError:
            print("Empty CSV")
        print("")

    pd.DataFrame(horizontal_accuracy_list_dict).to_csv("outputs/horizontal_accuracy.csv", index=False)

    print(pd.DataFrame(horizontal_accuracy_list_dict).describe())
Пример #4
0
def places_home(userid, do_remove_outliers=False):
    '''
    Returns a list of places that matches the time the user informed as being at home.
    :param userid:
    :return:
    '''
    user_gps_data = load_user_gps_data(userid)
    if len(dbdao.DBDAO().places_home_df(userid=userid)) == 0:
        return pd.DataFrame()

    home_visit_data = dbdao.DBDAO().places_home_df(
        userid=userid).sort_values("time_start")
    home_gps_data = places(home_visit_data, user_gps_data)

    if do_remove_outliers:
        return geo.remove_outliers(home_gps_data)

    return home_gps_data
Пример #5
0
def places_work(userid, do_remove_outliers=True):
    '''
    Returns intervals that the user stated as work time
    :userid:
    :return:
    '''
    user_gps_data = load_user_gps_data(userid)
    if len(dbdao.DBDAO().places_work_df(userid=userid)) == 0:
        return pd.DataFrame()

    work_visit_data = dbdao.DBDAO().places_work_df(
        userid=userid).sort_values("time_start")
    work_gps_data = places(work_visit_data, user_gps_data)

    if do_remove_outliers:
        return geo.remove_outliers(work_gps_data)

    return work_gps_data
Пример #6
0
def speed_nan(userids=None):
    if userids is None:
        my_dao = dbdao.DBDAO()
        user_df = my_dao.users_df()
        userids = user_df["userid"]
        my_dao.close_connection()

    for userid in userids:
        print(userid)
        try:
            user_gps_df = pd.read_csv("outputs/user_gps/" + str(userid) + "_gps.csv")
            user_gps_df = user_gps_df.sort_values(by="local_time").drop_duplicates().reset_index(drop=True)

            nan_indexes = user_gps_df[user_gps_df["speed"].isnull()].index.tolist()
            nan_index_data_list = []

            for nan_index in nan_indexes:
                if nan_index > 0:
                    prev_loc = user_gps_df.loc[nan_index - 1][["latitude", "longitude", "local_time"]]
                    loc = user_gps_df.loc[nan_index][["latitude", "longitude", "local_time"]]
                    dS = haversine_vectorized(loc["longitude"], loc["latitude"], prev_loc["longitude"], prev_loc["latitude"])
                    dT = loc["local_time"] - prev_loc["local_time"]

                    nan_index_data_list.append({"userid": userid, "current_time": loc["local_time"], "dS": dS, "dT": dT, "speed_valid": 0, "lon": loc["longitude"], "lat": loc["latitude"], "prev_lon": prev_loc["longitude"], "prev_lat": prev_loc["latitude"]})

            not_nan_indexes = set(user_gps_df.index.tolist()) - set(nan_indexes)
            not_nan_index_data_list = []

            for not_nan_index in not_nan_indexes:
                if not_nan_index > 0:
                    prev_loc = user_gps_df.loc[not_nan_index - 1][["latitude", "longitude", "local_time"]]
                    loc = user_gps_df.loc[not_nan_index][["latitude", "longitude", "local_time"]]
                    dS = haversine_vectorized(loc["longitude"], loc["latitude"], prev_loc["longitude"], prev_loc["latitude"])
                    dT = loc["local_time"] - prev_loc["local_time"]

                    not_nan_index_data_list.append({"userid": userid, "current_time": loc["local_time"], "dS": dS, "dT": dT, "speed_valid": 1, "lon": loc["longitude"], "lat": loc["latitude"], "prev_lon": prev_loc["longitude"], "prev_lat": prev_loc["latitude"]})


            pd.DataFrame(nan_index_data_list + not_nan_index_data_list).to_csv("outputs/user_gps/speeds/" + str(userid) + "_user_gps_speeds.csv", index=False)


        except pd.errors.EmptyDataError:
            print("Empty CSV")
        print("")
Пример #7
0
def user_total_records(save_to_filepath="outputs/user_total_records.csv"):
    my_dao = dbdao.DBDAO()
    user_df = my_dao.users_df()

    try:
        already_computed = pd.read_csv(save_to_filepath)

        user_records_list = already_computed.to_dict(orient="records")
        use_userids = set(user_df["userid"]) - set(already_computed["userid"])
    except FileNotFoundError:
        user_records_list = []
        use_userids = user_df["userid"]

    for userid in use_userids:
        records = my_dao.records_df(userids=[userid])
        user_records_list.append({"userid": userid, "n_records": len(records)})
        pd.DataFrame(user_records_list).to_csv(save_to_filepath, index=False)

        print("userid:", userid, " - ", len(user_records_list), "out of", len(user_df))
        print("n_records:", len(records))
        print("")