def process_trip(x, start_time):
    tt = time.localtime(start_time)
    data = [tt.tm_wday, tt.tm_hour]
    # distance from the center till cutting point
    v_mn = 0
    head = 0
    if len(x) > 1:
        v_mn = haversine_distance(x[0, :], x[-1, :])[0]
        head = heading(x[0, :], x[-1, :])
    # distance from the center till cutting point
    d_st = haversine_distance(x[0, :], CITY_CENTER)
    h_st = heading(x[0, :], CITY_CENTER[0])
    data += [x[-1, 0], x[-1, 1], d_st, h_st, v_mn, head]
    return data
Пример #2
0
def distance_intra_cluster(transformed_df, main_df, centroids):
    zones = ["D1", "D2", "D3", "D4", "D5", "D6"]
    centroids_map = dict(zip(zones, centroids))
    centroids_df = pd.DataFrame.from_dict(
        centroids_map,
        orient="index",
        columns=["lat_centroids", "lon_centroids"])
    centroids_df = centroids_df.reset_index().rename(
        columns={"index": "zones"})

    unpivoted_df = pd.melt(transformed_df,
                           id_vars=['Id_Cliente'],
                           var_name="zones")
    unpivoted_df = unpivoted_df[unpivoted_df.value > 0]
    joined_df = unpivoted_df.merge(main_df[["Id_Cliente", "lat", "lon"]], on="Id_Cliente", how = "left")\
                            .merge(centroids_df,  on="zones", how = "left")
    joined_df["manhattan"] = manhattan_distance(joined_df["lat"],
                                                joined_df["lon"],
                                                joined_df["lat_centroids"],
                                                joined_df["lon_centroids"])
    joined_df["haversine"] = haversine_distance(joined_df["lat"],
                                                joined_df["lon"],
                                                joined_df["lat_centroids"],
                                                joined_df["lon_centroids"])
    print("Manhattan Distance")
    print(joined_df.groupby("zones")["manhattan"].sum())
    print("Haversine Distance")
    print(joined_df.groupby("zones")["haversine"].sum())
    return (joined_df["manhattan"].sum(), joined_df["haversine"].sum())
Пример #3
0
def process_trip(x, start_time):
    tt = time.localtime(start_time)
    data = [tt.tm_wday, tt.tm_hour]
    # distance from the center till cutting point
    d_st = haversine_distance(x, CITY_CENTER)
    head = heading(x, CITY_CENTER[0])
    data += [x[0], x[1], d_st, head]
    return data
Пример #4
0
 def _compute_or_fetch(self, code_one, loc_one, code_two, loc_two):
     """Computes the distance between two languages or fetches it."""
     key_one = (code_one, code_two)
     key_two = (code_two, code_one)
     if key_one in self._distance_cache or key_two in self._distance_cache:
         return self._distance_cache[key_one]
     else:
         dist = utils.haversine_distance(loc_one, loc_two)
         self._distance_cache[key_one] = dist
         self._distance_cache[key_two] = dist
         return dist
def process_trip(x, start_time):
    tt = time.localtime(start_time)
    data = [tt.tm_wday, tt.tm_hour]
    # cumulative sum of distance
    d_cs = 0
    vcar = 0
    vmed = 0
    head = 0
    if x.shape[0] > 1:
        d1 = haversine_distance(x[:-1, :], x[1:, :])
        d_cs = np.sum(d1)
        vmed = np.median(d1)
        vcar = d1[-1]
        head = heading(x[-2, :], x[-1, :])
    # distance from the center till cutting point
    d_st = haversine_distance(x[0, :], CITY_CENTER)[0]
    h_st = heading(x[0, :], CITY_CENTER[0])
    d_cut = haversine_distance(x[-1, :], CITY_CENTER)[0]
    h_cut = heading(CITY_CENTER[0], x[-1, :])
    data += [
        x.shape[0], x[0, 0], x[0, 1], x[-1, 0], x[-1, 1], d_st, h_st, d_cut,
        h_cut, d_cs, vmed, vcar, head
    ]
    return data
Пример #6
0
    def SearchNearestPharmacy(cls, currentLocation: Dict, range: int,
                              limit: int) -> tuple:
        if not {"latitude", "longitude"} <= currentLocation.keys():
            return {
                "message":
                "Validation error: currentLocation must contain 'latitude' and 'longitude'"
            }, 422

        uri = env("DATA_URL")
        try:
            res = requests.get(uri, timeout=1)
        except requests.ConnectionError:
            logger.error("Connection error")
            return {"message": "Connection Error"}, 503
        except Exception as e:
            logger.error(str(e))
            return {"message": str(e)}, 500
        if res.status_code != 200:
            logger.error(res.status_code)
            return {"message": "Service error"}, res.status_code

        data = res.json()['features']
        pharmacies_distance = []

        for key, pharmacy in enumerate(data):
            coordinates = pharmacy['geometry']['coordinates']
            pharmacy_name = pharmacy['properties']['Descrizione']
            distance = utils.haversine_distance(currentLocation['latitude'],
                                                currentLocation['longitude'],
                                                coordinates[1], coordinates[0])
            if distance <= range:
                pharmacies_distance.append({
                    "name": pharmacy_name,
                    "distance": distance,
                    "location": {
                        "latitude": coordinates[1],
                        "longitude": coordinates[0]
                    }
                })
        if len(pharmacies_distance) == 0:
            return "No resources", 404
        sorted_pharmacies = sorted(pharmacies_distance,
                                   key=lambda k: k['distance'])
        sorted_pharmacies = sorted_pharmacies[:limit] if limit < len(
            sorted_pharmacies) else sorted_pharmacies
        return {"pharmacies": sorted_pharmacies}, 200
Пример #7
0
    def match_telemetry(self):
        """
        Match visitors telemetry fields
        :return score: calculated telemetry score based on all attributes
        :return ip_timing_red_flag: True if IP timing/geographic location values indicate it can't
                                    be the same person
        """
        ip_timing_red_flag = False
        match_scores = []
        for prev_v in self.prev_vs:
            distance_scores, ip_matches = [], []
            for previous_ip_data in prev_v["ips"]:
                for new_ip_data in self.new_v["ips"]:
                    distance = haversine_distance(previous_ip_data["props"], new_ip_data["props"])
                    time_delta = timestamp_difference(
                        previous_ip_data["updated_at"], new_ip_data["updated_at"]
                    )
                    ip_matches.append(exact_match(previous_ip_data["ip"], new_ip_data["ip"]))
                    if distance > 10 and (distance / time_delta) > self.MAX_PLAUSIBLE_SPEED:
                        ip_timing_red_flag = True
                    elif distance < 10:
                        distance_scores.append(1)
                    else:
                        distance_scores.append(min(1000, distance) / 1000)

            results = {
                "ip_match": max(ip_matches),
                "geographic_proximity": 1
                - (sum(distance_scores) / len(distance_scores)),  # average distance score
                "creation_time_proximity": 1
                - min(
                    1,
                    timestamp_difference(
                        prev_v["visitors"]["createdAt"], self.new_v["visitors"]["createdAt"]
                    )
                    / 86400,  # seconds in a day
                ),
                "visitor_age_proximity": 1
                - age_difference(
                    prev_v["visitors"]["createdAt"], self.new_v["visitors"]["createdAt"]
                ),
            }

            match_scores.append(generate_match_score(results, self.weights["telemetry"]))

        return {"score": max(match_scores), "ip_timing_red_flag": ip_timing_red_flag}
def find_close_languages(lat1, lng1, languages, distance_cache):
    """Given latitude/longitude coordinates finds the nearest language."""
    close_language_indices = []
    for i, language in enumerate(languages):
        lat2 = language["latitude"]
        lng2 = language["longitude"]
        loc1 = (float(lat1), float(lng1))
        loc2 = (float(lat2), float(lng2))
        if (loc1, loc2) not in distance_cache:
            dist = utils.haversine_distance((float(lat1), float(lng1)),
                                            (float(lat2), float(lng2)))
            distance_cache[(loc1, loc2)] = dist
            distance_cache[(loc2, loc1)] = dist
        else:
            dist = distance_cache[(loc1, loc2)]
        if dist < FLAGS.close_enough:
            close_language_indices.append(i)
    return close_language_indices
Пример #9
0
def get_user_shop_distance(result):
    result['feature_user_shop_lon_sub'] = (result['user_longitude'] -
                                           result['shop_longitude'])
    result['feature_user_shop_lat_sub'] = (result['user_latitude'] -
                                           result['shop_latitude'])
    result['feature_user_shop_lon_sub_abs'] = abs(
        result['feature_user_shop_lon_sub'])
    result['feature_user_shop_lat_sub_abs'] = abs(
        result['feature_user_shop_lat_sub'])
    result['feature_user_shop_uclidean_dis'] = euclidean_distance(
        result['user_latitude'], result['user_longitude'],
        result['shop_latitude'], result['shop_longitude'])
    result['feature_user_shop_haversine_dis'] = haversine_distance(
        result['user_latitude'], result['user_longitude'],
        result['shop_latitude'], result['shop_longitude'])
    result['feature_user_shop_manhattan_dis'] = manhattan_distance(
        result['user_latitude'], result['user_longitude'],
        result['shop_latitude'], result['shop_longitude'])
    return result
Пример #10
0
def get_user_shop_average_distance(refer, result):
    shop_longitude = refer.groupby(['shop_id'],
                                   as_index=False)['longitude'].agg(
                                       {'shop_average_longitude': 'mean'})
    shop_latitude = refer.groupby(['shop_id'], as_index=False)['latitude'].agg(
        {'shop_average_latitude': 'mean'})
    result = pd.merge(result, shop_longitude, on=['shop_id'], how='left')
    result = pd.merge(result, shop_latitude, on=['shop_id'], how='left')
    result['feature_user_shop_aver_uclidean_dis'] = euclidean_distance(
        result['user_latitude'], result['user_longitude'],
        result['shop_average_latitude'], result['shop_average_longitude'])
    result['feature_user_shop_aver_haversine_dis'] = haversine_distance(
        result['user_latitude'], result['user_longitude'],
        result['shop_average_latitude'], result['shop_average_longitude'])
    result['feature_user_shop_aver_manhattan_dis'] = manhattan_distance(
        result['user_latitude'], result['user_longitude'],
        result['shop_average_latitude'], result['shop_average_longitude'])
    del result['shop_average_longitude']
    del result['shop_average_latitude']
    return result
Пример #11
0
import os
import time
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

from utils import haversine_distance

DATA_DIR = '../data'

t0 = time.time()
for filename in ['train_pp_N2.csv', 'train_pp_N3.csv', 'train_pp_N1.csv']:
    print('reading training data from %s ...' % filename)

    df = pd.read_csv(os.path.join(DATA_DIR, filename))
    d1 = haversine_distance(df[['xs', 'ys']].values, df[['xe', 'ye']].values)

    # create training set
    y = np.log((df['len'] - 1) * 15)
    # remove non-predictive features
    df.drop(['CALL_TYPE', 'TAXI_ID', 'xe', 'ye', 'len'], axis=1, inplace=True)
    X = np.array(df, dtype=np.float)

    # clean data by removing long distance tracks
    th1 = np.percentile(d1, [99.9])
    X = X[(d1 < th1), :]
    y = y[(d1 < th1)]

    print('training a random forest regressor ...')
    # Initialize the famous Random Forest Regressor from scikit-learn
    clf = RandomForestRegressor(n_estimators=200, n_jobs=3, random_state=21)
Пример #12
0
 def getDistanceTo(self, baum):
     # the returned distance between this and the given baum (in metre)
     return 1000 * haversine_distance(self.latitude, self.longitude,
                                      baum.latitude, baum.longitude)
Пример #13
0
    if not os.path.isfile(filename):
        continue
    
    df = pd.read_csv(filename)
    if df.shape[0] < 1000:
        print('skipping key point %i (%i)' % (id_, df.shape[0]))
        continue
    
    # factorize categorical columns in training set
    #df['CALL_TYPE'], ct_index = pd.factorize(df['CALL_TYPE'])
    #df = df[df['CALL_TYPE'] == 0]    # A=2, B=1, C=0
    # fill all NaN values with -1
    #df = df.fillna(-1)
        
    # remove long distance
    d1 = haversine_distance(df[['xs', 'ys']], df[['xe', 'ye']])
    th1 = np.percentile(d1, [99.9])
    df = df.loc[d1 < th1]

    y = np.ravel(np.log(df['len']*15 + 1))
    df.drop(['CALL_TYPE', 'TAXI_ID', 'xe', 'ye', 'len'], axis=1, inplace=True)
    X = np.array(df, dtype=np.float)

    print('training classifier of key point %i  (sz=%i) ...' % (id_, X.shape[0]))                                            
    # Initialize the famous Random Forest Regressor from scikit-learn
    clf = RandomForestRegressor(n_estimators=200, n_jobs=3, random_state=21)
    clf.fit(X, y)
    pred_rf = clf.predict(X_tst[id_:id_+1, :])

    clf = GradientBoostingRegressor(n_estimators=200, max_depth=3, random_state=21)
    clf.fit(X, y)