def r_cal(self, df, lon, lat, r): h_goal = encode(lat, lon, 4) df1 = df[df.h4 == h_goal[0:4]] df1 = df1.reset_index(drop=True) if (self.find_largest(df1, lon, lat, r) < r): df1 = df[df.h3 == h_goal[0:3]] df1 = df1.reset_index(drop=True) if (self.find_largest(df1, lon, lat, r) < r): df1 = df[df.h2 == h_goal[0:2]] df1 = df1.reset_index(drop=True) if (self.find_largest(df1, lon, lat, r) < r): df1 = df[df.h1 == h_goal[0]] df1 = df1.reset_index(drop=True) if (self.find_largest(df1, lon, lat, r) < r): df1 = df.copy() self.find_largest(df1, lon, lat, r) else: df1 = df1 else: df1 = df1 else: df1 = df1 else: df1 = df1 df_r = df1[df1.distance < r] return df_r
def preprocessing(df): h4 = [] h3 = [] h2 = [] h1 = [] for index in range(df.shape[0]): h = encode(df['lat'][index], df['lon'][index], 4) h4.append(h) h3.append(h[0:3]) h2.append(h[0:2]) h1.append(h[0]) df.insert(0, 'h4', h4) df.insert(0, 'h3', h3) df.insert(0, 'h2', h2) df.insert(0, 'h1', h1) return df
def topk(self, df, lon, lat, k): h_goal = encode(lat, lon, 4) df1 = df if df1[df1.h1 == h_goal[00000]].shape[0] > k: df1 = df1[df1.h1 == h_goal[0]] if df1[df1.h2 == h_goal[0:2]].shape[0] > k: df1 = df1[df1.h2 == h_goal[0:2]] if df1[df1.h3 == h_goal[0:3]].shape[0] > k: df1 = df1[df1.h3 == h_goal[0:3]] if df1[df1.h4 == h_goal[0:4]].shape[0] > k: df1 = df1[df1.h4 == h_goal[0:4]] df1 = df1.reset_index(drop=True) df_res = self.k_cal(df1, lon, lat, k) return df_res
def update_and_transform(self, data_type): refined_data = [] for item in self.data_dict: lat = item['latitude'] lng = item['longitude'] tz = item['timezone'] if data_type in ['alerts', 'ALERTS', 'Alerts', 'alert', 'ALERT']: data = item.get('alerts') elif data_type in ['hourly', 'Hourly', 'HOURLY']: data = item['hourly'].get('data') elif data_type in ['daily', 'DAILY', 'Daily']: data = item['daily'].get('data') else: raise Exception('Unrecognized data section passed') try: for v in data: v.update({'latitude': lat, 'longitude': lng, 'tzone': tz}) except: continue refined_data.append(data) df_list = [pd.DataFrame(item) for item in refined_data if item] if not df_list: return pd.DataFrame() df = pd.concat(df_list).reset_index(drop=True) df['id'] = [uuid.uuid4() for _ in range(len(df.index))] if 'precipType' and 'precipAccumulation' in df.columns: df = self.null_handler(df) time_cols = [ col for col in df.columns if 'time' in col.lower() or 'expires' in col.lower() ] for col in time_cols: df[col] = df.apply(lambda x: pd.Timestamp( x[col], unit='s', tz=x['tzone']).tz_convert(tz='UTC'), axis=1) df['geohash'] = df.apply( lambda x: encode(x['latitude'], x['longitude']), axis=1) return df
""" # convert decimal degrees to radians lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) # haversine formula dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 c = 2 * asin(sqrt(a)) r = 6371 # Radius of earth in kilometers. Use 3956 for miles return c * r if len(argv) == 3: lat,lon = float(argv[1]), float(argv[2]) # print(lat, lon, Geohash.encode(lat, lon)) print(lat, lon, encode(lat, lon)) else: for k,v in th.items(): lat,lon = v[0],v[1] # print(k, encode(lat, lon), lat, lon) encoded = encode(lat, lon) th[k] = (encoded, encoded[:8],v) for k,v in th.items(): print(k,v, len(v[0])) for k,v in th.items(): pass for a,b in comb(th.items(),2): n1, n2, c1, c2 = a[0], b[0], a[1][2],b[1][2] # print(a[1][2],b[1][2]) lat1,lon1 = c1