def read_data(): train_data = pd.read_csv(path + 'operation_train_new.csv') test_data = pd.read_csv(path + 'operation_round1_new.csv') ope_data = pd.concat([train_data, test_data]) ope_data['ope_appro_geo_code'] = ope_data['geo_code'].apply( lambda x: x[0:2] if (x == x) else x) ope_data['ope_latitude'] = ope_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[0] if (x == x) else x) ope_data['ope_longitude'] = ope_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[1] if (x == x) else x) ope_data['ope_hour'] = ope_data['time'].apply(lambda x: int(x[0:2])) ope_data['ope_device_main_kind'] = ope_data['device2'].apply( lambda x: x.split(' ')[0] if (x == x) else x) ope_data['main_version'] = ope_data['version'].apply( lambda x: x.split('.')[0] if (x == x) else -1) ope_data['os'] = ope_data['os'].astype(str) ope_data['is_strange_mac1'] = ope_data['mac1'].apply( lambda x: 1 if x == 'a8dc52f65085212e' else 0) ope_data['is_strange_mac2'] = ope_data['mac2'].apply( lambda x: 1 if x == 'a8dc52f65085212e' else 0) ope_data['is_strange_device_code3'] = ope_data['device_code3'].apply( lambda x: 1 if x == '14c09cc8ce23d46c' else 0) ope_data['hour_bin'] = ope_data['ope_hour'].apply(lambda x: hour_bin[x]) return ope_data
def geohash_shape(shape, precision, mode='intersect', threshold=None): """ Find list of geohashes to cover the shape :param shape: shape to cover :type shape: BaseGeometry :param precision: geohash precision :type precision: int :param mode: 'intersect' - all geohashes intersect the shape use 'threashold' option to specify a percentage of least coverage 'inside' - all geohashes inside the shape 'center' - all geohashes whose center is inside the shape :type mode: str :param threshold: percentage of least coverage :type threshold: float :return: list of geohashes :rtype: list """ (min_lon, min_lat, max_lon, max_lat) = shape.bounds hash_south_west = geohash.encode(min_lat, min_lon, precision) hash_north_east = geohash.encode(max_lat, max_lon, precision) box_south_west = geohash.decode_exactly(hash_south_west) box_north_east = geohash.decode_exactly(hash_north_east) per_lat = box_south_west[2] * 2 per_lon = box_south_west[3] * 2 lat_step = int(round((box_north_east[0] - box_south_west[0]) / per_lat)) lon_step = int(round((box_north_east[1] - box_south_west[1]) / per_lon)) hash_list = [] for lat in range(0, lat_step + 1): for lon in range(0, lon_step + 1): next_hash = neighbor(hash_south_west, [lat, lon]) if mode == 'center': (lat_center, lon_center) = decode(next_hash) if shape.contains(Point(lon_center, lat_center)): hash_list.append(next_hash) else: next_bbox = geohash.bbox(next_hash) next_bbox_geom = box(next_bbox['w'], next_bbox['s'], next_bbox['e'], next_bbox['n']) if mode == 'inside': if shape.contains(next_bbox_geom): hash_list.append(next_hash) elif mode == 'intersect': if shape.intersects(next_bbox_geom): if threshold is None: hash_list.append(next_hash) else: intersected_area = shape.intersection( next_bbox_geom).area if (intersected_area / next_bbox_geom.area) >= threshold: hash_list.append(next_hash) return hash_list
def data_process(): #数据预处理 train = pd.read_csv(train_path) test = pd.read_csv(test_path) test["geohashed_end_loc"] = np.nan all = train.append(test) all["starttime"] = pd.to_datetime(all["starttime"]) all["day"] = all["starttime"].dt.day all["hour"] = all["starttime"].dt.hour all["minute"] = all["starttime"].dt.minute all["minute_from"] = all["hour"] * 60 + all["minute"] all["work_day"] = all["day"].apply( lambda x: 1 if x in [13, 14, 20, 21, 28, 29, 30] else 0) all["lon_start"] = all["geohashed_start_loc"].map( lambda x: round(geohash.decode_exactly(x)[1], 7)) all["lat_start"] = all["geohashed_start_loc"].map( lambda x: round(geohash.decode_exactly(x)[0], 7)) def split(x): """ if x<5:return 0 if x<10:return 1 if x<14:return 2 if x<18:return 3 if x<21:return 4 """ if x < 7: return 0 if x < 10: return 1 if x == 12: return 2 if x < 17: return 3 if x < 20: return 4 return 5 all["period"] = all["hour"].apply(split) return all
def read_data(): pop_col_names = [ 'day', 'mode', 'ope_hour', 'version', 'device_code1', 'device_code2', 'device_code3', 'mac1', 'ip1', 'ip2', 'mac2', 'wifi', 'ope_appro_geo_code', 'ip1_sub', 'ip2_sub' ] train_data = pd.read_csv(path + 'operation_train_new.csv', dtype={ 'device1': str, 'geo_code': str }) train_data['ope_appro_geo_code'] = train_data['geo_code'].apply( lambda x: x[0:2] if (x == x) else x) train_data['ope_hour'] = train_data['time'].apply(lambda x: int(x[0:2])) test_data = pd.read_csv(path + 'operation_round1_new.csv', dtype={ 'device1': str, 'geo_code': str }) test_data['ope_appro_geo_code'] = test_data['geo_code'].apply( lambda x: x[0:2] if (x == x) else x) test_data['ope_hour'] = test_data['time'].apply(lambda x: int(x[0:2])) for col in pop_col_names: get_ope_popular_degree(ope_data=train_data) get_ope_popular_degree(ope_data=test_data) ope_data = pd.concat([train_data, test_data]) ope_data['ope_latitude'] = ope_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[0] if (x == x) else x) ope_data['ope_longitude'] = ope_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[1] if (x == x) else x) return ope_data
def read_data(): pop_col_names = ['day', 'merchant', 'code1', 'code2', 'acc_id1', 'device_code1', 'device_code2', 'device_code3', 'mac1', 'ip1', 'acc_id2', 'acc_id3', 'market_code', 'ip1_sub'] train_data = pd.read_csv(path + 'transaction_train_new.csv', dtype = { 'device1': str, 'geo_code':str }) train_data['trans_appro_geo_code'] = train_data['geo_code'].apply(lambda x: x[0:2] if (x == x) else x) test_data = pd.read_csv(path + 'transaction_round1_new.csv', dtype = { 'device1': str, 'geo_code':str }) for col in pop_col_names: get_trans_popular_degree(trans_data=train_data) get_trans_popular_degree(trans_data=test_data) train_data['trans_appro_geo_code'] = train_data['geo_code'].apply(lambda x: x[0:2] if (x == x) else x) trans_data = pd.concat([train_data, test_data]) trans_data['trans_latitude'] = trans_data['geo_code'].apply(lambda x: geohash.decode_exactly(x)[0] if (x == x) else x) trans_data['trans_longitude'] = trans_data['geo_code'].apply(lambda x: geohash.decode_exactly(x)[1] if (x == x) else x) trans_data['trans_hour'] = trans_data['time'].apply(lambda x: int(x[0:2])) return trans_data
def geohash_decode(hashed_code_l): result = [] for i in hashed_code_l: temp = gh.decode_exactly(i) #result.append([temp[0],temp[1]]) result.append([round(temp[0], 4), round(temp[1], 4)]) # 采用4位小数 return result
def encdec(): print 'geohash.encode ' + geohash.encode(12.963787, 77.637789) print 'geohash.encode with precision-5: ' + geohash.encode( 12.963787, 77.637789, precision=5) print 'geohash.decode ' + str(geohash.decode('tdr1wxype953')) print 'geohash.decode exactly' + str( geohash.decode_exactly('tdr1wxype953'))
def bbox_query(extent, tree, precision): """Given an extent and tree loaded with geohashes, return all geohashes which intersect the extent""" tl_hash = geohash.encode(extent[3], extent[0], precision=precision) tr_hash = geohash.encode(extent[3], extent[1], precision=precision) br_hash = geohash.encode(extent[2], extent[1], precision=precision) bl_hash = geohash.encode(extent[2], extent[0], precision=precision) common_hash = commonprefix([tl_hash, tr_hash, br_hash, bl_hash]) intersecting_hashes = tree.prefix_query(common_hash) centroids = [ geohash.decode_exactly(x)[:2][::-1] for x in intersecting_hashes ] xspace = x_spacing(centroids) yspace = y_spacing(centroids) valid_list = [] for idx, hash in enumerate(intersecting_hashes): centroid = centroids[idx] if centroid[0] < extent[1] + xspace * 0.5 and centroid[ 0] > extent[0] - xspace * 0.5 and centroid[ 1] < extent[3] + yspace * 0.5 and centroid[ 1] > extent[2] - yspace * 0.5: valid_list.append(hash) return list(set(valid_list))
def __init__(self, *args, **kwargs): __parcel_bucket_document = kwargs.get('parcelBucketDocument', '') __request = kwargs.get('request', None) (__lat, __lng, __lat_err, __lng_err) = geohash.decode_exactly(__parcel_bucket_document['key']) __bbox = self.__bbox(__parcel_bucket_document) __properties = self.__properties(__parcel_bucket_document, __bbox, request=__request) self.__class__.__name__ = 'Feature' geojson.Feature.__init__(self, geometry=geojson.Point((float(__lng), float(__lat))), properties=__properties, bbox=__bbox)
def get_neighbor_by_direction(hashcode, direction): if not isinstance(direction, tuple): raise TypeError( "direction should be a tuple of form (y_step, x_step).") (lat, lon, lat_delta, lon_delta) = geohash.decode_exactly(hashcode) (lat_step, lon_step) = direction nlat, nlon = _get_neighbor(lat, lat_delta, lat_step), _get_neighbor( lon, lon_delta, lon_step) return geohash.encode(nlat, nlon, len(hashcode))
def on_message(mosq, userdata, msg): # print("%s (qos=%s, r=%s) %s" % (msg.topic, str(msg.qos), msg.retain, str(msg.payload))) ''' "position": { "id": 18300, "attributes": { "t": "I", "ignition": false, "distance": 0, "totalDistance": 28212813.97, "motion": false, "hours": 55000 }, "deviceId": 7, "protocol": "owntracks", "serverTime": null, "deviceTime": "2018-09-10T07:47:45.000+0000", "fixTime": "2018-09-10T07:47:45.000+0000", "outdated": false, "valid": true, "latitude": 49.0156556, "longitude": 8.3975169, "altitude": 0, ''' try: d = json.loads(msg.payload) except: return if 'position' in d: p = d['position'] if 'latitude' not in p or 'longitude' not in p: return olat = lat = float(p['latitude']) olon = lon = float(p['longitude']) print("lat=", lat, "lon=", lon) ghash = geohash.encode(lat, lon, GEO_PREC) t = datetime.datetime.now() R = '' if ghash in cdb: data = json.loads(cdb[ghash]) print(t, "R=", R, msg.topic, ghash, json.dumps(data, indent=4)) else: hash_list = proximityhash.create_geohash(lat, lon, 100, 7).split(',') for neighbor in hash_list: # print("--->", neighbor) lat, lon, a, b = geohash.decode_exactly(neighbor) R = haversine(olon, olat, lon, lat) * 1000.0 # print("N=",neighbor, lat, ",", lon) if neighbor in cdb: data = json.loads(cdb[neighbor]) print(t, "R=%6.1fm" % R, msg.topic, neighbor, json.dumps(data, indent=4))
def points_from_geohash4(geohashlist): total = [['GEOHASH', 'LONG', 'LAT']] for row in geohashlist: y, x, yd, xd = geohash.decode_exactly(row) pt1 = [row, x + xd, y + yd] # ne pt2 = [row, x - xd, y - yd] # sw pt3 = [row, x + xd, y - yd] # se pt4 = [row, x - xd, y + yd] # nw total += [pt1, pt2, pt3, pt4] total = pd.DataFrame(total[1:], columns=total[0]) return total
def points_from_geohash4(geohashlist): total = [['GEOHASH','LONG','LAT']] for row in geohashlist: y,x,yd,xd = geohash.decode_exactly(row) pt1 = [row,x+xd,y+yd] # ne pt2 = [row,x-xd,y-yd] # sw pt3 = [row,x+xd,y-yd] # se pt4 = [row,x-xd,y+yd] # nw total += [pt1,pt2,pt3,pt4] total = pd.DataFrame(total[1:],columns=total[0]) return total
def get_position(ghash,xsign,ysign): y,x,ydelta,xdelta = geohash.decode_exactly(ghash) if xsign == '-': x = x - xdelta if xsign == '+': x = x + xdelta if ysign == '-': y = y - ydelta if ysign == '+': y = y + ydelta return x,y
def get_hashsize(ul,size): # getting geohash for ul and lr # assuming 8 for the time being as abs min ulhash = geohash.encode(ul[1],ul[0],size) lat,long,latdelta,longdelta = geohash.decode_exactly(ulhash) latdelta,longdelta = latdelta * 2.0,longdelta * 2.0 hashsize = ((latdelta ** 2) + (longdelta ** 2)) ** .5 return hashsize
def read_data(): train_data = pd.read_csv(path + 'transaction_train_new.csv') test_data = pd.read_csv(path + 'transaction_round1_new.csv') trans_data = pd.concat([train_data, test_data]) trans_data['trans_appro_geo_code'] = trans_data['geo_code'].apply( lambda x: x[0:2] if (x == x) else x) trans_data['trans_latitude'] = trans_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[0] if (x == x) else x) trans_data['trans_longitude'] = trans_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[1] if (x == x) else x) trans_data['trans_hour'] = trans_data['time'].apply(lambda x: int(x[0:2])) trans_data['trans_device_main_kind'] = trans_data['device2'].apply( lambda x: x.split(' ')[0] if (x == x) else x) trans_data['hour_bin'] = trans_data['trans_hour'].apply( lambda x: hour_bin[x]) trans_data['is_strange_mac1'] = trans_data['mac1'].apply( lambda x: 1 if x == 'a8dc52f65085212e' else 0) trans_data['is_strange_bal'] = trans_data['bal'].apply(lambda x: 1 if x == 100 else 0) return trans_data
def get_corner(hash,corner): lat,long,latdelta,longdelta = geohash.decode_exactly(hash) # ul corner if corner == 'ul': lat = lat + (3 * latdelta) long = long - (3 * longdelta) return geohash.encode(lat,long,len(hash)) elif corner == 'lr': lat = lat - (3 * latdelta) long = long + (3 * longdelta) return geohash.encode(lat,long,len(hash))
def read_data(): pop_col_names = [ 'day', 'mode', 'ope_hour', 'version', 'device_code1', 'device_code2', 'device_code3', 'mac1', 'ip1', 'ip2', 'mac2', 'wifi', 'ope_appro_geo_code', 'ip1_sub', 'ip2_sub' ] train_data = pd.read_csv("../../data/train/operation_train_new.csv", dtype={ 'device1': str, 'geo_code': str }) train_data['ope_appro_geo_code'] = train_data['geo_code'].apply( lambda x: x[0:2] if (x == x) else x) train_data['ope_hour'] = train_data['time'].apply(lambda x: int(x[0:2])) test_data = pd.read_csv("../../data/test/test_operation_round2.csv", dtype={ 'device1': str, 'geo_code': str }) test_data['ope_appro_geo_code'] = test_data['geo_code'].apply( lambda x: x[0:2] if (x == x) else x) test_data['ope_hour'] = test_data['time'].apply(lambda x: int(x[0:2])) for col in pop_col_names: get_ope_popular_degree(ope_data=train_data) get_ope_popular_degree(ope_data=test_data) #sampling完全没用 结果大幅下降 #test_data = test_data.sample(n=len(train_data), random_state=0) ope_data = pd.concat([train_data, test_data]) ope_data['ope_latitude'] = ope_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[0] if (x == x) else x) ope_data['ope_longitude'] = ope_data['geo_code'].apply( lambda x: geohash.decode_exactly(x)[1] if (x == x) else x) #get_ip_pop_degree(ope_data) return ope_data
def geohash_to_polygon(geo): """ :param geo: String that represents the geohash. :return: Returns a Shapely's Polygon instance that represents the geohash. """ lat_centroid, lng_centroid, lat_offset, lng_offset = geohash.decode_exactly( geo) corner_1 = (lat_centroid - lat_offset, lng_centroid - lng_offset)[::-1] corner_2 = (lat_centroid - lat_offset, lng_centroid + lng_offset)[::-1] corner_3 = (lat_centroid + lat_offset, lng_centroid + lng_offset)[::-1] corner_4 = (lat_centroid + lat_offset, lng_centroid - lng_offset)[::-1] return geometry.Polygon([corner_1, corner_2, corner_3, corner_4, corner_1])
def augment_data(raw_data): """ raw_data is whatever data provided (input) from the csv file This function converts raw_data into train data """ temp, n_day, n_time = get_14_days_data(raw_data) temp_norm = prepare_data_for_cluster(temp) clustered = cluster(temp_norm) # get demand for eact unique time "t" by_t = temp.pivot_table(index="geohash6", columns="t", values="demand").fillna(0) # get fourier values top_5_freqs, top_5_amplis, agg_fouriers = get_fourier(by_t) # get latlon for each geohash latlon = map(lambda x: geohash.decode_exactly(x), top_5_amplis.index) loc = pd.DataFrame( { 'lat': [x[1] for x in latlon], 'lon': [x[0] for x in latlon] }, index=top_5_amplis.index) # merge fourier and location var_add = pd.concat([top_5_freqs, top_5_amplis, loc], axis=1) # get previous demand values selected_periods = [1, 2, 3, 4, 5, 6, 7, 8, 96, 192] ds = get_n_previous_demand(by_t, selected_periods) # create training data base = temp[[ "geohash6", "t", "demand", "dayofweek", "hour", "minutes", "timestamp", "day" ]] base = pd.merge(ds.reset_index(), base, how="right", on=["geohash6", "t"]).dropna() base = pd.merge(base, clustered, on=["geohash6", "dayofweek"], how="left") base = pd.merge( base, var_add.reset_index().rename(columns={'index': 'geohash6'}), how="left", on="geohash6") base = base.sort_values(["geohash6", "t"]) base.index = range(len(base)) # return training data and table of all previous demands return base, by_t
def fill_geohashs(data,size): # function for linting whether the first point and lastpoint are the same if not appends talbe data = first_last(data) extrema = get_extrema(data) # getting upper lefft and lowerright point ul = [extrema['w'],extrema['n']] lr = [extrema['e'],extrema['s']] # getting geohash for ul and lr # assuming 8 for the time being as abs min ulhash = geohash.encode(ul[1],ul[0],size) lrhash = geohash.encode(lr[1],lr[0],size) lat,long,latdelta,longdelta = geohash.decode_exactly(ulhash) latdelta,longdelta = latdelta * 2.0,longdelta * 2.0 hashsize = ((latdelta ** 2) + (longdelta ** 2)) ** .5 count = 0 for row in data.columns.values.tolist(): if 'lat' in str(row).lower(): latheader = row elif 'long' in str(row).lower(): longheader = row count += 1 count = 0 newlist = [] for row in data[[longheader,latheader]].values.tolist(): if count == 0: count = 1 else: dist = distance(oldrow,row) if dist > hashsize / 5.0: number = (dist / hashsize) * 5.0 number = int(number) newlist += generate_points(number,oldrow,row)[1:] else: newlist.append(row) oldrow = row newlist = pd.DataFrame(newlist,columns=['LONG','LAT']) newlist = map_table(newlist,size,map_only=True) return newlist
def neighbor(geo_hash, direction): """ Find neighbor of a geohash string in certain direction. :param geo_hash: geohash string :type geo_hash: str :param direction: Direction is a two-element array, i.e. [1,0] means north, [1,1] means northeast :type direction: list :return: geohash string :rtype: str """ decode_result = geohash.decode_exactly(geo_hash) neighbor_lat = decode_result[0] + direction[0] * decode_result[2] * 2 neighbor_lon = decode_result[1] + direction[1] * decode_result[3] * 2 return geohash.encode(neighbor_lat, neighbor_lon, len(geo_hash))
def __init__(self, *args, **kwargs): __parcel_bucket_document = kwargs.get('parcelBucketDocument', '') __request = kwargs.get('request', None) (__lat, __lng, __lat_err, __lng_err) = geohash.decode_exactly(__parcel_bucket_document['key']) __bbox = self.__bbox(__parcel_bucket_document) __properties = self.__properties(__parcel_bucket_document, __bbox, request=__request) self.__class__.__name__ = 'Feature' geojson.Feature.__init__(self, geometry=geojson.Point( (float(__lng), float(__lat))), properties=__properties, bbox=__bbox)
def getImages(self, request): imageSet = db.GqlQuery("SELECT * FROM DropletImage") logging.info("Image count from database" + str(imageSet.count())) list = [] for e in imageSet: location = geohash.decode_exactly(e.geoHash) logging.info("image") a = imageData(imageID=e.imageID, url=images.get_serving_url(e.blobKey), flag=e.flag, like=e.like, score=1, latitude=str(location[0]), longitude=str(location[1])) list.append(a) logging.info("Image count for response" + str(len(list))) return responseMessage(images=list)
def get_alignment_geohash(hash): #processing out the 4 points hashreturn = geohash.decode_exactly(hash) #getting lat and long datu latdatum = hashreturn[0] longdatum = hashreturn[1] #getting delta latdelta = hashreturn[2] longdelta = hashreturn[3] point1 = [latdatum-latdelta, longdatum+longdelta] point2 = [latdatum-latdelta, longdatum-longdelta] point3 = [latdatum+latdelta, longdatum-longdelta] point4 = [latdatum+latdelta, longdatum+longdelta] return [point1,point2,point3,point4,point1]
def get_alignment_geohash(hash): #processing out the 4 points hashreturn = geohash.decode_exactly(hash) #getting lat and long datu latdatum = hashreturn[0] longdatum = hashreturn[1] #getting delta latdelta = hashreturn[2] longdelta = hashreturn[3] point1 = [latdatum - latdelta, longdatum + longdelta] point2 = [latdatum - latdelta, longdatum - longdelta] point3 = [latdatum + latdelta, longdatum - longdelta] point4 = [latdatum + latdelta, longdatum + longdelta] return [point1, point2, point3, point4, point1]
def get_data(file_path): print("Loading data..", file_path) with file_io.FileIO(file_path, mode='rb') as input_f: df = pd.read_csv(input_f) df[['h', 'm']] = df['timestamp'].str.split(':', expand=True) df['h'] = df['h'].astype('int64') df['m'] = df['m'].astype('int64') df['mins'] = (df['h'] * 60) + df['m'] df['mins_norm'] = df['mins'] / 1440 df['dow'] = df['day'] % 7 # Resolve GeoCodes geohashes_df = df.groupby('geohash6', as_index=False).agg({ 'day': 'count' }).rename(columns={ 'day': 'count' }).sort_values(by='count', ascending=False) geohashes_df['lat'] = None geohashes_df['lat_err'] = None geohashes_df['long'] = None geohashes_df['long_err'] = None geohashes_df['x'] = None geohashes_df['y'] = None geohashes_df['z'] = None for i in range(len(geohashes_df)): geo_decoded = geohash.decode_exactly(geohashes_df.loc[i, 'geohash6']) geohashes_df.loc[i, 'lat'] = geo_decoded[0] geohashes_df.loc[i, 'long'] = geo_decoded[1] geohashes_df.loc[i, 'lat_err'] = geo_decoded[2] geohashes_df.loc[i, 'long_err'] = geo_decoded[3] # https://datascience.stackexchange.com/a/13575 geohashes_df.loc[i, 'x'] = cos(geo_decoded[0]) * cos( geo_decoded[1]) # cos(lat) * cos(lon) geohashes_df.loc[i, 'y'] = cos(geo_decoded[0]) * sin( geo_decoded[1]) # cos(lat) * sin(lon) geohashes_df.loc[i, 'z'] = sin(geo_decoded[0]) # sin(lat) df = df.merge(geohashes_df.drop(columns=['count']), on='geohash6', how='inner') return df
def make_points_geohash(c1,c2,size): lat1,long1,latdelta,longdelta = geohash.decode_exactly(c1) lat2,long2 = geohash.decode(c2) latdelta,longdelta = latdelta * 2,longdelta * 2 # creating lats and longs longs = np.linspace(long1,long2,(abs(long2 - long1) / longdelta) + 1) lats = np.linspace(lat1,lat2,(abs(lat2 - lat1) / latdelta) + 1) total = [] count = 0 for long in longs: total += zip(lats,[long] * len(lats),[count] * len(lats)) count += 1 total = pd.DataFrame(total,columns=['LAT','LONG','X']) return map_table_new(total,precision=size)
def geohash_to_polygon(self, geo: str, Geometry: bool = True, sequence: bool = True) -> [list, BaseGeometry]: """ 将Geohash字符串转成矩形 Parameters ---------- geo : str 所求Geohash字符串 Geometry : boolean, optional 返回格式可以为Polygon或是点的列表,默认为Polygon True返回shapely.geometry.Polygon,False返回点的列表 sequence : bool, optional 返回时点的格式,默认为[lon, lat]。 True为[lon, lat], False为[lat, lon] Returns ---------- list or shapely.geometry.base.BaseGeometry geohash所对应的矩形 Example --------- >>> G = GeohashOperator() >>> G.geohash_to_polygon('wx4ervz', True, True) POLYGON ((116.3658142089844 39.97787475585938, 116.3671875 39.97787475585938, 116.3671875 39.979248046875, 116.3658142089844 39.979248046875, 116.3658142089844 39.97787475585938)) 求一个geohash字符串对应的 """ lat_centroid, lng_centroid, lat_offset, lng_offset = geohash.decode_exactly(geo) corner_1 = (lat_centroid - lat_offset, lng_centroid - lng_offset) corner_2 = (lat_centroid - lat_offset, lng_centroid + lng_offset) corner_3 = (lat_centroid + lat_offset, lng_centroid + lng_offset) corner_4 = (lat_centroid + lat_offset, lng_centroid - lng_offset) if sequence: corner_1, corner_2, corner_3, corner_4 = corner_1[::-1], corner_2[::-1], corner_3[::-1], corner_4[::-1] if Geometry: return Polygon([corner_1, corner_2, corner_3, corner_4, corner_1]) else: return [corner_1, corner_2, corner_3, corner_4, corner_1]
def make_squares(data,presicion): import numpy as np import pandas as pd hashs=[] count=0 boxes=[['HASH','LAT1','LONG1','LAT2','LONG2','LAT3','LONG3','LAT4','LONG4']] for row in data[1:]: #processing out the 4 points hashreturn=geohash.decode_exactly(row[-1]) #getting lat and long datu latdatum=hashreturn[0] longdatum=hashreturn[1] #getting delta latdelta=hashreturn[2] longdelta=hashreturn[3] point1=[latdatum-latdelta,longdatum+longdelta] point2=[latdatum-latdelta,longdatum-longdelta] point3=[latdatum+latdelta,longdatum+longdelta] point4=[latdatum+latdelta,longdatum-longdelta] pointrow=[row[-1]]+point1+point2+point3+point4 boxes.append(pointrow) hashs.append(row[-1]) newlist=[['GEOHASH','LAT1','LONG1','LAT2','LONG2','LAT3','LONG3','LAT4','LONG4']] boxes=pd.DataFrame(boxes[1:],columns=newlist[0]) boxes['COUNT']=1 boxes=boxes.groupby(newlist[0],sort=True).sum() boxes=boxes.sort_values(by=['COUNT'],ascending=False) boxes.to_csv('squares'+str(presicion)+'.csv') return data
def ind_dec_points(alignmentdf): # getting alignment df header = alignmentdf.columns.values.tolist() count =0 for row in header: if 'lat' in row.lower(): latpos = count elif 'long' in row.lower(): longpos = count elif 'geohash' in row.lower(): hashpos = count count += 1 xs = [] ys = [] for row in alignmentdf.values.tolist(): lat = row[latpos] long = row[longpos] ghash = row[hashpos] midlat,midlong,latdelta,longdelta = geohash.decode_exactly(ghash) ulcornerpoint = [midlat + latdelta,midlong - longdelta] latsize = latdelta * 2 longsize = longdelta * 2 x = abs(ulcornerpoint[1] - long) / longsize y = abs(ulcornerpoint[0] - lat) / latsize xs.append(x) ys.append(y) alignmentdf['x'] = xs alignmentdf['y'] = ys return alignmentdf
def ind_dec_points(alignmentdf): # getting alignment df header = alignmentdf.columns.values.tolist() count = 0 for row in header: if 'lat' in row.lower(): latpos = count elif 'long' in row.lower(): longpos = count elif 'geohash' in row.lower(): hashpos = count count += 1 xs = [] ys = [] for row in alignmentdf.values.tolist(): lat = row[latpos] long = row[longpos] ghash = row[hashpos] midlat, midlong, latdelta, longdelta = geohash.decode_exactly(ghash) ulcornerpoint = [midlat + latdelta, midlong - longdelta] latsize = latdelta * 2 longsize = longdelta * 2 x = abs(ulcornerpoint[1] - long) / longsize y = abs(ulcornerpoint[0] - lat) / latsize xs.append(x) ys.append(y) alignmentdf['x'] = xs alignmentdf['y'] = ys return alignmentdf
def get_lon(geohash): x, y, x_s, y_s = geo.decode_exactly(geohash) return y
def get_aggregates(): start = datetime.strptime(request.args.get('datetime'), fmt) points = collection.find({"$and": [{"aggtime": start}, {"hashlen": 7}]}) output = [] heatdata = "" for p in points: loc = gh.decode_exactly(p['geohash7']) str = "{" + "location: new google.maps.LatLng({1}, {2}), weight: {0}".format( p['count'] * 20, loc[0], loc[1]) + "}," heatdata += str return """ <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>Heatmaps</title> <style> #map { height: 100%; } html, body { height: 100%; margin: 0; padding: 0; } </style> </head> <body> <div id="map"></div> <script> var map, heatmap; function initMap() { map = new google.maps.Map(document.getElementById('map'), { zoom: 13, center: {lat: 37.775, lng: -122.434}, mapTypeId: 'satellite' }); heatmap = new google.maps.visualization.HeatmapLayer({ data: getPoints(), map: map, opacity: 1 }); } function getPoints() { return [ #HEATDATA# ]; } </script> <script async defer src="https://maps.googleapis.com/maps/api/js?key=AIzaSyDA4bPlz1-ity0j9rUXInpx4nvUxJVpQ28&libraries=visualization&callback=initMap"> </script> </body> </html> """.replace("#HEATDATA#", heatdata)
def Tucker(datain, _x, _y, _z): # cell size and data range of coordinates xmax, xmin, ymax, ymin = 117.073, 115.668, 40.465, 39.465 xsize, ysize = round((xmax - xmin) * 200), round((ymax - ymin) * 200) # import data and decode geohashed location df = pd.read_csv(datain) #,date_parser='starttime') loc = [geohash.decode_exactly(i) for i in df['geohashed_start_loc']] df['start_x'], df['start_y'] = [i[1] for i in loc], [i[0] for i in loc] ## prepare a new dataset for group calculation df['datetime'] = pd.to_datetime(df['starttime']) dt = pd.DataFrame({ 'ts': df['datetime'], 'id': df['orderid'], 'x': df['start_x'], 'y': df['start_y'] }) #dt=dt.query("ts >= '{}' and ts <= '{}'".format('2018-03-08 00:00:00', '2018-04-10 00:00:00')) dt = dt.set_index(['ts']) dt['date'] = [(month - 5) * 31 + day - 10 for month, day in zip(dt.index.month, dt.index.day)] dt['hour'] = dt.index.hour dt = dt[dt['x'] <= xmax] dt = dt[dt['x'] >= xmin] dt = dt[dt['y'] <= ymax] dt = dt[dt['y'] >= ymin] x = ((np.array(dt['x']) - xmin) * ysize) // 1 y = ((np.array(dt['y']) - ymin) * ysize) // 1 dt['loc'] = y * xsize + x # transform x,y to cell id # join data idx = [(x, y) for x in range(24) for y in [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14]] dt_join = pd.DataFrame({'idx': idx}) dt_new = dt.groupby(['loc', 'hour', 'date']).count()['id'] group_count = dt_new.unstack(['loc']) group_count['idx'] = group_count.index group_count = pd.merge(dt_join, group_count, how='left', on='idx') group_count = group_count.set_index(['idx']) group_count = group_count.fillna(0) # generate tensor to decomposition X = np.array(group_count) #max_cell=np.max(b,axis=0) #m=np.tile(max_cell,(15*24,1)) #b=b/m X = X.reshape(24, 14, -1) #15327 X = X.transpose([2, 0, 1]) #张量转置 # tensor decomposition X = tl.tensor(X) core, factors = non_negative_tucker(X, rank=[_x, _y, _z]) #non_negative_ # plot for i in range(factors[1].shape[1]): plt.plot(factors[1][:, i]) plt.ylabel('Mode Value') plt.xlabel('Hour') plt.savefig('pic/M1.png', dpi=300) plt.show() for i in range(factors[2].shape[1]): plt.plot(factors[2][:, i]) plt.ylabel('Mode Value') plt.xlabel('Day') plt.savefig('pic/M2.png', dpi=300) plt.show() space_out = pd.DataFrame(factors[0], index=group_count.columns) space_out['id'] = space_out.index space_out.to_csv('space.csv', index=False) generateShp('space.csv') return factors[1], factors[2]
else: return int(i) filt_in = "test_geostart.txt" file_output = 'test_geostart_latlng.txt' radar_echos = [] # outputfile = open(file_output, "w") i = 0 with open(filt_in, "r") as infile: for line in infile: context = line.split(",") #geohash.decode(context[0]) print(context[0]) print(geohash.decode(context[0])) outputfile.write(context[0]) outputfile.write(',') geolatlng = geohash.decode_exactly(context[0]) outputfile.write(str(geolatlng[0])) outputfile.write(',') outputfile.write(str(geolatlng[1])) outputfile.write(',') outputfile.write(str(geolatlng[2])) outputfile.write(',') outputfile.write(str(geolatlng[3])) outputfile.write('\n') #break outputfile.close()
def encdec(): print 'geohash.encode '+geohash.encode(12.963787,77.637789) print 'geohash.encode with precision-5: '+geohash.encode(12.963787,77.637789, precision=5) print 'geohash.decode '+str(geohash.decode('tdr1wxype953')) print 'geohash.decode exactly'+str(geohash.decode_exactly('tdr1wxype953'))
def map_points(point): lat,long,delta1,delta2 = geohash.decode_exactly(point) return str(lat) + ',' + str(long)
def make_line_index(data,h5filename,multiprocessing=False,split=1,processes=1,appendsize=5000,gidheader='gid'): # renaming columns gidbool = False newlist = [] # checking to see if gid exists for row in data.columns: if row == gidheader: gidbool = True newlist.append('gid') else: newlist.append(row) # creating new column list data.columns = newlist # making the default number of processing for the platform if processes == 1 and multiprocessing == False: processes = cpu_count() if not processes == 1: multiprocessing = True # logic for making gid non comma separated data['BOOL'] = data.gid.astype(str).str.contains('[',regex=False) if not len(data[data['BOOL'] == True]) == 0: newlist = [] for i in data['gid'].values: if '[' in i: i = i[1:-1] i = i.replace(' ','') i = str.split(i,',') i = '|'.join(i) newlist.append(i) data['gid'] = newlist # removing processfiles remove_process_files() print('Creating multiple processes.') make_processes(data,h5filename,processes,appendsize) return [] csvfilenamealignment = 'process%sa.csv' % split csvfilenameneighbor = 'process%sn.csv' % split with open(csvfilenamealignment,'wb') as b: b.write(b'"GEOHASH","TEXT"') with open(csvfilenameneighbor,'wb') as b: b.write(b'"GEOHASH","TEXT"') try: os.remove(h5filename) except: pass coordbool = False # checking to see if gid exists for row in data.columns: if row.lower() == 'coords': coordbool = True coordheader = row # getting maxdistance if applicable maxdistance = False maxdistancebool = False for i in data.columns: if 'maxdistance' in str(i).lower(): maxdistanceheader = i maxdistancebool = True # adding the correct maxdistance field if maxdistancebool == False: newlist = [] for i in data['coords'].values: newlist.append(get_max_distance(get_cords_json(i))) data['maxdistance'] = newlist maxdistancebool = True maxdistanceheader = 'maxdistance' # logic for zipping together right obects if gidbool == True: iterdata = data[['gid',coordheader,maxdistanceheader]].values else: iterdata = zip(range(len(data)),data[coordheader],data[maxdistanceheader]) # retriving the sise of 1 geohash firstpoint = get_cords_json(data[coordheader].values[0])[0] lat,long,latdelta,longdelta = geohash.decode_exactly(geohash.encode(firstpoint[1],firstpoint[0],9)) ghashsize = ((latdelta*2)**2 + (longdelta*2)**2) ** .5 # checking for cardinal coords and getting extrema if applicable ind = 0 for row in data.columns: if 'north' in str(row).lower(): nhead = row ind += 1 if 'south' in str(row).lower(): shead = row ind += 1 if 'east' in str(row).lower(): ehead = row ind += 1 if 'west' in str(row).lower(): whead = row ind += 1 # logic for getting the extrema if ind == 4: extrema = { 'n':data[nhead].max(), 's':data[shead].min(), 'w':data[whead].min(), 'e':data[ehead].max()} else: extrema = [] size = len(data) addgeohashs = [] total = 0 count = 0 msgsize = 0 for gid,coords,maxdistance in iterdata: coords = get_cords_json(coords) addgeohashs += fill_geohashs(coords,gid,9,maxdistance,ghashsize) count += 1 if count == appendsize: count = 0 total += appendsize make_csvs(addgeohashs,csvfilenamealignment,csvfilenameneighbor,split,total,size) addgeohashs = [] # appending add geohashs that are left over from the last append size if not count == 0: make_csvs(addgeohashs,csvfilenamealignment,csvfilenameneighbor,split,size,size) if multiprocessing == False: # making lines mask for no reason linemask1,linemask2 = make_line_mask(data) # make line index metadata metadata = make_meta_lines(8,9,len(data),extrema) df = pd.DataFrame(['stringdf',json.dumps(linemask2),json.dumps(metadata)],index=['ultindex','areamask','metadata']) # writing output to h5 file if not h5filename == False: with pd.HDFStore(h5filename) as out: out['combined'] = df out['alignmentdf'] = data print('Made output h5 file containing datastructures:') print('\t- alignmentdf (type: pd.DataFrame)') print('\t- areamask (type: dict)') print('\t- ultindex (type: dict)') print('\t- metadata (type: dict)')
def geohash_decode(hashed_code): temp = gh.decode_exactly(hashed_code) return [temp[0], temp[1]]
import geohash a = ['dre9', 'drft', 'dr8g', 'drfw', 'dr8j', 'dr8k', 'drfz', 'dre2', 'dr8n', 'dre7', 'dr8s', 'drdm', 'dr8v', 'drdk', 'dr8t', 'dr8u', 'dr95', 'dr8y', 'dr7b', 'dr9h', 'drk0', 'drhp', 'dre3', 'dpxf', 'dr7r', 'dr7k', 'dr7j', 'dr7h', 'dr7m', 'drk8', 'dr5x', 'dr5z', 'dr7w', 'dpxu', 'dpxv', 'dr7t', 'dr5q', 'dr5r', 'dr9g', 'dr5w', 'dre0', 'dr8h', 'drgw', 'drf5', 'dr9k', 'drd9', 'dr9e', 'dres', 'dreh', 'drek', 'dr8e', 'dr85', 'dr9t', 'dr9v', 'dree', 'dred', 'dr9s', 'dr6g', 'drfh', 'dpxg', 'dr72', 'dr77', 'dr75', 'dr78'] y1 = max(a, key = lambda x : geohash.decode_exactly(x)[0] + geohash.decode_exactly(x)[2]) print geohash.decode_exactly(y1)[0] + geohash.decode_exactly(y1)[2] y1 = max(a, key = lambda x : geohash.decode_exactly(x)[1] + geohash.decode_exactly(x)[3]) print geohash.decode_exactly(y1)[1] + geohash.decode_exactly(y1)[3] y1 = min(a, key = lambda x : geohash.decode_exactly(x)[0] - geohash.decode_exactly(x)[2]) print geohash.decode_exactly(y1)[0] - geohash.decode_exactly(y1)[2] y1 = min(a, key = lambda x : geohash.decode_exactly(x)[1] - geohash.decode_exactly(x)[3]) print geohash.decode_exactly(y1)[1] + geohash.decode_exactly(y1)[3]