def build_geo_index_from_coord_index(coord_index, precision=5): geo_index = GeoGridIndex(precision=precision) for (lat, lng) in coord_index: geo_index.add_point(GeoPoint(lat, lng)) return geo_index
def __init__(self): self.geo_index = GeoGridIndex(precision=3) self.conn = sqlite3.connect('rockstar_02.db', isolation_level="DEFERRED") self.conn.text_factory = str self.cursor = self.conn.cursor() self.debug = False
def __load_geo_index(self): self.geo_index = GeoGridIndex() for lodging in self.data: # if self.debug: print 'loading geo for:' + lodging["name"] lat = float(lodging["lat"]) lng = float(lodging["lng"]) self.geo_index.add_point(GeoPoint(lat, lng, ref=lodging))
def test_distance_km(self, precision=3): index = GeoGridIndex(precision=precision) map(index.add_point, self.points) for pt, distance in index.get_nearest_points(self.point_1bluxome, 10): if pt == self.point_1bluxome: self.assertEquals(distance, 0) if pt == self.point_market_street: self.assertEquals(distance, 1.301272755220718)
def build_geo_index_from_point_index(index, precision=5): geo_index = GeoGridIndex(precision=precision) for id_, point_info in index.iteritems(): lat, lon = point_info.get('latitude', point_info.get('lat')), point_info.get( 'longitude', point_info.get('lon')) geo_index.add_point(GeoPoint(lat, lon, point_info['id'])) return geo_index
def test_distance_mi(self, precision=3): index = GeoGridIndex(precision=precision) map(index.add_point, self.points) for pt, distance in index.get_nearest_points(self.point_1bluxome, 10, 'mi'): if pt == self.point_1bluxome: self.assertEquals(distance, 0) if pt == self.point_market_street: self.assertEquals(distance, 0.808573403337458)
def test_simple_accurate(self, precision=3): glen = lambda x: len(list(x)) index = GeoGridIndex(precision=precision) map(index.add_point, self.points) ls = index.get_nearest_points(self.point_1bluxome, 10) ls = list(ls) eq_(glen(ls), 2) points = map(itemgetter(0), ls) self.assertIn(self.point_1bluxome, points) self.assertIn(self.point_market_street, points) eq_(glen(index.get_nearest_points(self.point_1bluxome, 15)), 3) eq_(glen(index.get_nearest_points(self.point_1bluxome, 34)), 4)
def test_bounds(self): glen = lambda x: len(list(x)) # ezv block point1 = GeoPoint(43.59375, -4.21875) # ezv point2 = GeoPoint(43.59375, -4.218750001) # ezu point3 = GeoPoint(43.59375, -2.812500001) # ezv point4 = GeoPoint(43.59375, -2.8125) # ezy point5 = GeoPoint(43.59375, (-4.21875 + -2.8125) / 2) points = [point1, point2, point3, point4, point5] index = GeoGridIndex(precision=3) # import ipdb; ipdb.set_trace() map(index.add_point, points) self.assertEquals(glen(index.get_nearest_points(point1, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point2, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point3, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point4, 57)), 3) self.assertEquals(glen(index.get_nearest_points(point5, 57)), 5)
def match_stops_to_nodes(gtfs, walk_network): """ Parameters ---------- gtfs : a GTFS object walk_network : networkx.Graph Returns ------- stop_I_to_node: dict maps stop_I to closest walk_network node stop_I_to_dist: dict maps stop_I to the distance to the closest walk_network node """ network_nodes = walk_network.nodes(data="true") stop_Is = set(gtfs.get_straight_line_transfer_distances()['from_stop_I']) stops_df = gtfs.stops() geo_index = GeoGridIndex(precision=6) for net_node, data in network_nodes: geo_index.add_point(GeoPoint(data['lat'], data['lon'], ref=net_node)) stop_I_to_node = {} stop_I_to_dist = {} for stop_I in stop_Is: stop_lat = float(stops_df[stops_df.stop_I == stop_I].lat) stop_lon = float(stops_df[stops_df.stop_I == stop_I].lon) geo_point = GeoPoint(stop_lat, stop_lon) min_dist = float('inf') min_dist_node = None search_distances_m = [0.100, 0.500] for search_distance_m in search_distances_m: for point, distance in geo_index.get_nearest_points( geo_point, search_distance_m, "km"): if distance < min_dist: min_dist = distance * 1000 min_dist_node = point.ref if min_dist_node is not None: break if min_dist_node is None: warn("No OSM node found for stop: " + str(stops_df[stops_df.stop_I == stop_I])) stop_I_to_node[stop_I] = min_dist_node stop_I_to_dist[stop_I] = min_dist return stop_I_to_node, stop_I_to_dist
def calc_transfers(conn, threshold_meters=1000): geohash_precision = _get_geo_hash_precision(threshold_meters / 1000.) geo_index = GeoGridIndex(precision=geohash_precision) g = GTFS(conn) stops = g.get_table("stops") stop_geopoints = [] cursor = conn.cursor() for stop in stops.itertuples(): stop_geopoint = GeoPoint(stop.lat, stop.lon, ref=stop.stop_I) geo_index.add_point(stop_geopoint) stop_geopoints.append(stop_geopoint) for stop_geopoint in stop_geopoints: nearby_stop_geopoints = geo_index.get_nearest_points_dirty( stop_geopoint, threshold_meters / 1000.0, "km") from_stop_I = int(stop_geopoint.ref) from_lat = stop_geopoint.latitude from_lon = stop_geopoint.longitude to_stop_Is = [] distances = [] for nearby_stop_geopoint in nearby_stop_geopoints: to_stop_I = int(nearby_stop_geopoint.ref) if to_stop_I == from_stop_I: continue to_lat = nearby_stop_geopoint.latitude to_lon = nearby_stop_geopoint.longitude distance = math.ceil( wgs84_distance(from_lat, from_lon, to_lat, to_lon)) if distance <= threshold_meters: to_stop_Is.append(to_stop_I) distances.append(distance) n_pairs = len(to_stop_Is) from_stop_Is = [from_stop_I] * n_pairs cursor.executemany( 'INSERT OR REPLACE INTO stop_distances VALUES (?, ?, ?, ?, ?, ?);', zip(from_stop_Is, to_stop_Is, distances, [None] * n_pairs, [None] * n_pairs, [None] * n_pairs)) cursor.execute( 'CREATE INDEX IF NOT EXISTS idx_sd_fsid ON stop_distances (from_stop_I);' )
def initialize_matching(overwrite=False): # 1. fetch all unmatched Amadeus hotels print("Loading Amadeus hotels") amdh = load_amadeus_from_db() namdh = len(amdh) print("Loaded %d hotels" % namdh) # 2. fetch all unmatched Booking.com hotels print("Loading Booking hotels") bkgh = load_booking() # bkgh = load_booking_from_mysql() print("Loaded %d hotels" % len(bkgh)) # 3. load existing matches if not overwrite: print("Loading previous matches") matches = load_matches() else: matches = {} matched_amdids = matches.keys() matched_bkgids = set(matches.values()) # 4. Exclude already matched amdh = amdh[~amdh.amd_id.isin(matched_amdids)] bkgh = bkgh[~bkgh.bkg_id.isin(matched_bkgids)] print "%d Amadeus hotels left to match to %d Booking hotels" % (len(amdh), len(bkgh)) # 5. Build geo index print("Building Geo Index") geo_index = GeoGridIndex() for i, hb in bkgh.iterrows(): if hb['lat'] == 90: hb['lat'] = -90.0 geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb)) return amdh, bkgh, matches, geo_index, namdh
amdh_full = pd.read_excel( './Amadeus All Properties - FEB 2017 17022017 .xlsx', header=0, index_col='PROPERTY_CODE', skiprows=1) amdh = convert_amd_df_to_matching_format(amdh_full) namdh = len(amdh) print("Loaded %d hotels" % namdh) print("Loading Booking hotels") bkgh = load_booking() # bkgh = load_booking_from_mysql() print("Loaded %d hotels" % len(bkgh)) print("Building Geo Index") geo_index = GeoGridIndex() for i, hb in bkgh.iterrows(): if hb['lat'] == 90: hb['lat'] = -90.0 geo_index.add_point(GeoPoint(hb['lat'], hb['lng'], ref=hb)) matches = {} print("1st pass") match_in_neighborhood(amdh, geo_index, 1, 0.6, matches, namdh) print("2nd pass") match_in_neighborhood(amdh, geo_index, 2, 0.75, matches, namdh) print("3rd pass") match_in_neighborhood(amdh, geo_index, 4, 0.8, matches, namdh)
def test_wrong_precision(self): index = GeoGridIndex(precision=4) self.assertRaisesRegexp( Exception, 'precision=2', lambda: list( index.get_nearest_points(self.point_market_street, 100)))
def test_big_distance(self): index = GeoGridIndex(precision=2) map(index.add_point, self.points) ls = list(index.get_nearest_points(self.point_la, 600)) self.assertEquals(len(ls), len(self.points))