Exemplo n.º 1
0
async def getToilet(request):
    pool = request.app["pool"]
    data = await request.post()
    try:
        maxSize = int(data["mx"])
        w = float(data["w"])
        j = float(data["j"])
    except:
        data = request.query
        maxSize = int(data["mx"])
        w = float(data["w"])
        j = float(data["j"])
    geo = geohash.encode(w, j)
    neib = getN(geo)
    neib.append(geo)
    sql = ""
    for item in neib:
        sql += f"or geo like '{item[:6]}%' "
    sql = sql[3:]
    try:
        async with pool.acquire() as conn:
            values = await conn.fetch("select * from geo where " + sql)
            to = [[x["tid"], x["w"], x["j"]] for x in values]
            to.sort(
                key=lambda x: haversine(float(j), float(w), float(x[1]), float(x[2]))
            )
            res = {"mx": min(maxSize, len(to)), "ans": to[:maxSize]}
    except asyncpg.exceptions.UniqueViolationError:
        return web.Response(text="Already exist")
    return web.json_response(data=res)
Exemplo n.º 2
0
 def height(self):
     left = self.mpoly.envelope.tuple[0][0][0]
     bottom = self.mpoly.envelope.tuple[0][0][1]
     right = self.mpoly.envelope.tuple[0][1][0]
     top = self.mpoly.envelope.tuple[0][2][1]
     
     return haversine((left+right)/2, top, (left+right)/2, bottom)
Exemplo n.º 3
0
def get_retailer_closest_wholesalers(retailer, wholesalers):
    # Shamelessly pulled from the GeoRasterViewer code because it works really well.

    # Convert the wholesalers into arrays of lats/lons.
    wholesale_lats = wholesalers.Latitude.values
    wholesale_lons = wholesalers.Longitude.values
    wholesale_names = wholesalers.Wholesale_Name.values
    wholesale_ids = wholesalers.Location_ID.values

    # Calculate the distances and prune out the farther ones and isolate variables.
    distances = np.ndarray.flatten(
        haversine(retailer[0], retailer[1], wholesale_lats, wholesale_lons))
    closest_index = np.argmin(distances)
    closest_distance = distances[closest_index]

    # Consolidate into a dictionary and return
    closest = {
        "Wholesaler":
        [wholesale_lats[closest_index], wholesale_lons[closest_index]],
        "Wholesaler_Name":
        wholesale_names[closest_index],
        "Wholesaler_ID":
        wholesale_ids[closest_index],
        "Distance":
        closest_distance
    }
    return closest
Exemplo n.º 4
0
 def width(self):
     left = self.mpoly.envelope.tuple[0][0][0]
     bottom = self.mpoly.envelope.tuple[0][0][1]
     right = self.mpoly.envelope.tuple[0][1][0]
     top = self.mpoly.envelope.tuple[0][2][1]
     
     return haversine(left, (top + bottom)/2, right, (top + bottom)/2)
Exemplo n.º 5
0
def home(request):
	try:
		location = request.GET.get('location')
	except:
		location = None

	if location:
		lat, lon = get_location(location)
		a =get_restaurants(lat,lon)
		custom = []

		count = 0

		for restaurant in a['restaurants']:
			restaurant['restaurant']['review'] = get_reviews_details(restaurant['restaurant']['url'].split("?")[0])
			rlat = float(restaurant['restaurant']['location']['latitude'])
			rlong = float(restaurant['restaurant']['location']['longitude'])

			
			restaurant['restaurant']['distance'] = str(haversine(lon, lat, rlong, rlat))+' km'





		restaurants = json_normalize( a['restaurants'])
		# print restaurants
		return HttpResponse(restaurants.to_html())
		# print restaurants

		# return render_to_response("result.html",{'a':custom},context_instance=RequestContext(request))
	else:
		return render_to_response("home.html",{},context_instance=RequestContext(request))
Exemplo n.º 6
0
    def process_item(self, item, spider):
        col_loc = get_mongodb('geo', 'Locality', profile='mongodb-general')

        # get country
        country_code = item['country_code']
        if country_code not in QyerCityProcPipeline.country_map:
            col_country = get_mongodb('geo', 'Country', profile='mongodb-general')
            country = col_country.find_one({'code': country_code})
            assert country != None
            QyerCityProcPipeline.country_map[country_code] = country
        else:
            country = QyerCityProcPipeline.country_map[country_code]

        city_id = item['city_id']
        city = col_loc.find_one({'source.qyer.id': city_id})

        if not city:
            city = col_loc.find_one({'alias': item['zh_name'].lower(),
                                     'location': {
                                         '$near': {'type': 'Point', 'coordinates': [item['lng'], item['lat']]}},
                                     'country._id': country['_id']})
            if city:
                dist = utils.haversine(city['location']['coordinates'][0], city['location']['coordinates'][1],
                                       item['lng'], item['lat'])
                if dist > 100:
                    city = {}

        if not city:
            city = {}

        city['enName'] = item['en_name']
        zh_name = item['zh_name']
        short_name = utils.get_short_loc(zh_name)
        city['zhName'] = short_name

        alias1 = city['alias'] if 'alias' in city and city['alias'] else []
        alias2 = item['alias'] if 'alias' in item and item['alias'] else []
        alias1.extend(alias2)
        alias1.append(short_name)
        city['alias'] = list(set(filter(lambda val: val, [tmp.lower().strip() for tmp in alias1])))

        source = city['source'] if 'source' in city else {}
        source['qyer'] = {'id': item['city_id'], 'url': item['url']}
        city['source'] = source
        city['country'] = {'id': country['_id'], '_id': country['_id']}
        for k in ('enName', 'zhName'):
            if k in country:
                city['country'][k] = country[k]

        city['level'] = 2
        city['desc'] = item['desc']
        city['imageList'] = item['imageList']
        city['images'] = []
        city['location'] = {'type': 'Point', 'coordinates': [item['lng'], item['lat']]}
        city['abroad'] = country_code != 'CN'
        city['isHot'] = item['is_hot'] > 0

        col_loc.save(city)

        return item
Exemplo n.º 7
0
    def search_proximity(cls, lat=50.848, lon=4.351, radius=8):
        """List stations within given radius from a location.

        Args:
            lat: latitude of the center of search, in decimal degrees
            lon: longitude of the center of search, in decimal degrees
            radius: maximum distance from center, in kilometers

        Default values are the approximate center and radius of Brussels.

        Returns:
            Dataframe of matching stations, listing sensor types,
                locations and distances in kilometers from the search
                center, indexed by station ID

        The search is based on the station list retrieved as part of the
        metadata.

        The irceline.be API offers an alternative way to get an
        (unordered) list of stations near a location:
        `https://geo.irceline.be/sos/api/v1/stations?
        near={{"center":{{"type":"Point","coordinates":[{lon},
        {lat}]}},"radius":{radius}}}`
        """
        near_stations = cls.stations.copy()
        near_stations["distance"] = (near_stations.apply(
            lambda x: haversine(lon, lat, x["lon"], x["lat"]), axis=1))
        near_stations = near_stations[near_stations["distance"] <= radius]
        near_stations.sort_values("distance", inplace=True)
        return near_stations
Exemplo n.º 8
0
def distance():
    # in meters
    lon1 = float(request.args.get('lon1'))
    lat1 = float(request.args.get('lat1'))
    lon2 = float(request.args.get('lon2'))
    lat2 = float(request.args.get('lat2'))
    d = round(utils.haversine(lon1, lat1, lon2, lat2), 2)
    return json.dumps(d)
Exemplo n.º 9
0
    def parse_geocode(self, response):
        item = response.meta['item']
        lang = response.meta['lang']
        try:
            data = json.loads(response.body)
            if data['status'] == 'OVER_QUERY_LIMIT':
                return Request(url=response.url, callback=self.parse_geocode, meta={'item': item, 'lang': lang},
                               headers={'Accept-Language': response.request.headers['Accept-Language'][0]},
                               dont_filter=True)
            elif data['status'] == 'ZERO_RESULTS':
                return
            elif data['status'] != 'OK':
                self.log('ERROR GEOCODING. STATUS=%s, URL=%s' % (data['status'], response.url))
                return

            city_result = None
            location = None
            for result in data['results']:
                # 必须和原来的经纬度比较接近,才能采信
                geometry = result['geometry']
                lat = geometry['location']['lat']
                lng = geometry['location']['lng']
                dist = utils.haversine(lng, lat, item['lng'], item['lat'])
                if dist > 100:
                    continue
                else:
                    city_result = result
                    location = [lng, lat]
                    break

            if city_result:
                # 查找第一个types包含political的项目
                address_components = filter(lambda val: 'political' in val['types'], city_result['address_components'])
                data = address_components[0]

                short_name = data['short_name']
                long_name = data['long_name']
                s = set(item['alias'])
                s.add(short_name.lower())
                s.add(long_name.lower())
                k = 'zh_name' if lang == 'zh' else 'en_name'
                s.add(item[k].lower())
                item[k] = long_name
                item['alias'] = list(s)
                if location:
                    item['lng'] = location[0]
                    item['lat'] = location[1]

        except (KeyError, IndexError):
            self.log('ERROR GEOCODEING: %s' % response.url, log.WARNING)

        if lang == 'zh':
            return Request(url='http://maps.googleapis.com/maps/api/geocode/json?address=%s,%s&sensor=false' % (
                item['en_name'], item['en_country']), callback=self.parse_geocode, meta={'item': item, 'lang': 'en'},
                           headers={'Accept-Language': 'en-US'}, dont_filter=True)
        else:
            return item
Exemplo n.º 10
0
    def way(self, w):  # default function called on edge

        # way starts and ends at one point
        if w.nodes[0].ref == w.nodes[-1].ref:
            return
        for i in range(len(w.nodes) - 1):
            start = GraphNode(w.nodes[i].ref, w.nodes[i].location)
            end = GraphNode(w.nodes[i + 1].ref, w.nodes[i + 1].location)
            distance = haversine(start, end)
            edge = GraphEdge(start.node_id, end.node_id, distance, 4)
            self.neo_handler.add_edge(edge)
Exemplo n.º 11
0
 def get_bounds(self, indices):
     tolerance = .0001  # about 11 meters.   Need this, otherwise we lose some records
     xmin, ymin, xmax, ymax = self.gdf.iloc[indices].total_bounds
     xmin = xmin - tolerance
     ymin = ymin - tolerance
     xmax = xmax + tolerance
     ymax = ymax + tolerance
     partition_size = len(
         indices
     )  # we want to keep regions small enough so cross-product matching will not be too slow
     partition_span = utils.haversine(xmin, ymin, xmax, ymax)
     return xmin, ymin, xmax, ymax, partition_size, partition_span
Exemplo n.º 12
0
def maps_directions(start_lat, start_lon, end_lat, end_lon, gmaps):
    # directions API can take many types of input, including strings
    start_string = "{},{}".format(start_lat, start_lon)
    end_string = "{},{}".format(end_lat, end_lon)
    result = gmaps.directions(origin=start_string, destination=end_string)

    # we only care about finding the travelled length in the JSON output, which we convert from meters to km, * 0.001
    manhattan_distance = float(
        result[0]['legs'][0]['distance']['value']) * 0.001
    euclidean_distance = haversine(float(start_lat), float(start_lon),
                                   float(end_lat), float(end_lon))
    ratio = float(manhattan_distance / euclidean_distance)
    return euclidean_distance, manhattan_distance, ratio
Exemplo n.º 13
0
def get_data(data, lon1, lat1,iata1,departure_date):
    iata2 = data.iata
    if iata1 != iata2:
        lat2 = data.lat
        lon2 = data.lon
        iata = data.iata
        distance = haversine(lon1,lat1,lon2,lat2)
        url_mockup = 'http://stub.2xt.com.br/air/search/qhjvlDvYOwbbu9yq9Dq9DpfCqEbqWfvO/'+iata1+'/'+iata2+'/'+departure_date
        value = requests.get(url_mockup,
                                auth=HTTPBasicAuth('leandroalmeida','tefvlD'))
        options = pd.DataFrame(value.json()['options'])
        if not options.empty:
            x = options.apply(lambda column: aircraft_values(column,distance), axis=1)  
            save(x,url_mockup,iata1,iata2)
Exemplo n.º 14
0
    def find_closest_store(self, address, units='mi'):
        store = utils.convert_to_1D({'address': address})

        (lo, hi) = self.t.closest(store['id'])

        if lo:
            dist1 = utils.haversine(store['lat'], store['lon'], lo['lat'],
                                    lo['lon'], units)
        else:
            dist1 = float('inf')

        if hi:
            dist2 = utils.haversine(store['lat'], store['lon'], hi['lat'],
                                    hi['lon'], units)
        else:
            dist2 = float('inf')

        if dist1 < dist2:
            lo['distance'] = "%0.3f %s" % (dist1, units)
            return lo
        else:
            hi['distance'] = "%0.3f %s" % (dist2, units)
            return hi
Exemplo n.º 15
0
def search_proximity(lat=50.848, lon=4.351, radius=8):
    """Find sensors within given radius from a location.

    Args:
        lat: latitude of the center of search, in decimal degrees
        lon: longitude of the center of search, in decimal degrees
        radius: maximum distance from center, in kilometers

    Default values are the approximate center and radius of Brussels.

    Returns:
        Dataframe of matching sensors, listing sensor types, locations
        and distances in kilometers from the search center, indexed by
        sensor ID
    """
    url = (API_ENDPOINTS["proximity search pattern"].format(lat=lat,
                                                            lon=lon,
                                                            radius=radius))
    _json = requests.get(url).json()
    sensors = json_normalize(_json)
    if len(sensors) == 0:
        sensors = pd.DataFrame(
            columns=["sensor_type", "latitude", "longitude", "distance"])
        sensors.index.name = "sensor_id"
        return sensors
    sensors = (sensors[[
        "sensor.id", "sensor.sensor_type.name", "location.latitude",
        "location.longitude"
    ]].rename(
        columns={
            "sensor.id": "sensor_id",
            "sensor.sensor_type.name": "sensor_type",
            "location.latitude": "latitude",
            "location.longitude": "longitude"
        }))
    for col in "latitude", "longitude":
        sensors[col] = pd.to_numeric(sensors[col], downcast="float")
    sensors.set_index("sensor_id", inplace=True)

    # Drop duplicates - sensors appear once for each measurement in past 5 mins
    sensors = sensors[~sensors.index.duplicated()]

    # Calculate distances from search center and sort by those distances
    sensors["distance"] = sensors.apply(lambda x: haversine(
        lat, lon, float(x["latitude"]), float(x["longitude"])),
                                        axis=1)
    sensors.sort_values("distance", inplace=True)

    return sensors
Exemplo n.º 16
0
 def __get_centroid_medoid_node_region(region):
     lat_tot = lon_tot = 0
     for v in region:
         lat_tot += region[v][2]['lat']
         lon_tot += region[v][2]['lon']
     lat_centroid = lat_tot / len(region.keys())
     lon_centroid = lon_tot / len(region.keys())
     min_distance = sys.maxint
     medoid = None
     for v in region:
         dist = haversine(region[v][2]['lat'], region[v][2]['lon'], lat_centroid, lon_centroid)
         if dist < min_distance:
             min_distance = dist
             medoid = v
     return (lat_centroid, lon_centroid), medoid
Exemplo n.º 17
0
def reverse(db, lon, lat):
    min_distance = 50000000  # bigger than Earth's circumference
    match = None
    h = geohash(lon, lat)
    arg = find_index(h, db.numbers_geohash_index, db.numbers['geohash'])
    k = 100  # k-nearest
    lo = max(0, arg - k // 2)
    hi = min(db.numbers_geohash_index.size, arg + k // 2)
    for hyp in db.numbers_geohash_index[lo:hi]:
        hlon, hlat = reverse_geohash(db.numbers[hyp]['geohash'])
        # Calculate haversine distance to get the real orthonormic distance
        d = haversine(hlon, hlat, lon, lat)
        if d < min_distance:
            min_distance = d
            match = hyp
    return Result.from_plate(db, match, 0, distance=min_distance)
Exemplo n.º 18
0
    def dianping_match(self, entry):
        """
        进行match操作
        """
        city_info = self.city_map[entry['locality']['_id']]
        city_id = city_info['city_id']
        shop_name = entry['zhName']

        context = {'city_info': city_info, 'shop_name': shop_name}

        url = 'http://www.dianping.com/search/keyword/%d/0_%s' % (city_id,
                                                                  shop_name)
        search_response = self.request.get(url,
                                           user_data={
                                               'ProxyMiddleware': {
                                                   'validator':
                                                   self.default_validator
                                               }
                                           })
        if search_response.status_code == 404:
            return

        shop_list = list(self.parse_search_list(search_response, context))
        if not shop_list:
            return

        self.store_shops(shop_list)

        the_shop = shop_list[0]
        # 检查经纬度是否一致
        try:
            coords1 = entry['location']['coordinates']
            coords2 = [the_shop['lng'], the_shop['lat']]
        except KeyError:
            return

        from utils import haversine

        # 最多允许1km的误差
        max_distance = 1
        try:
            if haversine(coords1[0], coords1[1], coords2[0],
                         coords2[1]) < max_distance:
                self.bind_shop_id(entry, the_shop['shop_id'])
        except TypeError:
            self.log('Unable to locate shop: %d' % the_shop['shop_id'],
                     logging.WARN)
async def get_data_test(data1, data2, departure_date, session, df):
    iata1 = df[data1].iata
    iata2 = df[data2].iata
    if iata1 != iata2:
        lat1 = df[data1].lat
        lat2 = df[data2].lat
        lon1 = df[data1].lon
        lon2 = df[data2].lon
        distance = haversine(lon1, lat1, lon2, lat2)
        url_mockup = 'http://stub.2xt.com.br/air/search/qhjvlDvYOwbbu9yq9Dq9DpfCqEbqWfvO/' + iata1 + '/' + iata2 + '/' + departure_date
        async with session.get(url_mockup,
                               auth=aiohttp.BasicAuth('leandroalmeida',
                                                      'tefvlD')) as response:
            value = await response.json()
        options = pd.DataFrame(value['options'])
        if not options.empty:
            return opapply(options, distance, url_mockup, iata1, iata2)
Exemplo n.º 20
0
def villages_in_range(villages, origin):
    near_lat = np.array([origin['lat']])
    near_lon = np.array([origin['lon']])
    far_lats = villages.Latitude.values
    far_lons = villages.Longitude.values

    distance = np.ndarray.flatten(haversine(near_lat, near_lon, far_lats, far_lons))
    villages["From_Center"] = distance

    within_range = villages.loc[villages["From_Center"] <= origin['radius']]
    within_range.to_csv("villages_within_{}.csv".format(origin['radius']), index=False)

    # Now we print out some quick data for the user to feed into georasterviewer, with a little extra buffer space
    print("\nGeoraster Values:\n===================\nMin Lat: {}\nMax Lat: {}\nMin Lon: {}\nMax Lon: {}".format(
        within_range['Latitude'].min() + 0.15,
        within_range['Latitude'].max() + 0.15,
        within_range['Longitude'].min() + 0.15,
        within_range['Longitude'].max() + 0.15,
    ))
Exemplo n.º 21
0
 def get_neighbor(self, lat, lng):
     '''
         rtype: tuple(error, json(result))
         Find the nearest station by calculate distance from lat, lng
     '''
     self.check_full()
     dist_dict = {i:utils.haversine(self.crawl_data[i]['latlng'] ,float(lat), float(lng)) \
             for i in self.crawl_data if self.crawl_data[i]['num_ubike']}
     sort_dict = OrderedDict(sorted(dist_dict.items(), key=lambda t: t[1]))
     near_station = []
     for idx in sort_dict:
         print self.crawl_data[idx]['sna']
     for idx in sort_dict:
         Order = OrderedDict(
             (('station', self.crawl_data[idx]['sna']),
              ('num_ubike', self.crawl_data[idx]['num_ubike'])))
         near_station.append(Order)
         if len(near_station) == 2:
             return 0, near_station
Exemplo n.º 22
0
def main(api_key):
	# Set up gmaps object with API key
	gmaps = googlemaps.Client(key=api_key)

	## Get global variables
	start_string = 'The Anchor, 34 Park St, Southwark, London SE1 9EF'
	end_string = 'Southwark Brewing Company'
	n_places = 2
	radius_factor = 1
	keywords = ['bar','beer']
	mode="bicycling"

	## Get location info on start and end points
	start = get_lat_lng(gmaps,start_string)
	end = get_lat_lng(gmaps,end_string)

	print start_string , start
	print end_string , end

	##Find centre and radius
	c_cen = get_centre_lat_long(start,end)
	
	print "centre coord", c_cen

	# You may prefer to use the text_search API, instead.
	radius = haversine(c_cen['lat'],c_cen['lng'],start['lat'],start['lng'])*1000

	print "radius metres", radius
	
	##Get nearby places
	nearby = gmaps.places_radar(
		(c_cen['lat'], c_cen['lng']),
		radius=radius*radius_factor,
		keyword=keywords
		)

	###get info lists
	place_ids = map(lambda x: x['place_id'],nearby['results'])
	place_info = map(lambda x: gmaps.place(x),place_ids)
	place_names = map(lambda x: x['result']['formatted_address'],place_info)
def localization_error(camera_measured, camera_true, alt, subsampling=100):
    """
    Compute localization errors for a given camera wrt a given true camera.

    Args:
        camera_measured: measured camera
        camera_true: true camera, the error is computed with respect to that one
        alt: altitude (above the sea level) at which the localization error is
            computed
        subsampling (default 100): number of localization error samples

    Returns:
        list of localization errors.
    """
    out = []
    c = camera_true.instrument.n_pix / 2  # middle of the row
    for r in xrange(0, np.round(camera_true.lig_f).astype(int), subsampling):
        lon1, lat1 = camera_measured.locdir(r, c, alt)
        lon2, lat2 = camera_true.locdir(r, c, alt)
        radius = ps.PhysicalConstants.earth_radius + alt
        out.append(utils.haversine(radius, lon1, lat1, lon2, lat2))
    return out
Exemplo n.º 24
0
    def query_time_series(cls, phenomenon, lat_nearest=None, lon_nearest=None):
        """Convenience method to filter time series for those that
        measure a given phenomenon, and sort by distance to a point if
        given.

        Args:
            phenomenon: character sequence or regular expression to
                filter phenomena by; operates on the "phenomenon" column
                of the time_series dataframe
            lat_nearest: latitude of the reference point
            lon_nearest: longitude of the reference point

        Returns:
            Subset of time_series property. If lat_nearest and
                lon_nearest are given, the result has an additional
                column indicating distance in km from that point, and is
                sorted by that distance.

        Raises:
            ValueError if only one of lat_nearest, lon_nearest is given
        """
        if bool(lat_nearest is None) != bool(lon_nearest is None):
            raise ValueError("Provide both or none of lat_nearest, "
                             "lon_nearest")
        phenomena_lower = cls.time_series["phenomenon"].str.lower()
        matches = phenomena_lower.str.contains(phenomenon.lower())
        results = cls.time_series[matches].copy()
        if lat_nearest is None:
            return results
        if len(results) == 0:
            results["distance"] = None
            return results
        results["distance"] = results.apply(lambda row:
                                            haversine(lat_nearest, lon_nearest,
                                                      row["station_lat"],
                                                      row["station_lon"]),
                                            axis=1)
        results = results.sort_values("distance")
        return results
def localization_error(camera_measured, camera_true, alt, subsampling=100):
    """
    Compute localization errors for a given camera wrt a given true camera.

    Args:
        camera_measured: measured camera
        camera_true: true camera, the error is computed with respect to that one
        alt: altitude (above the sea level) at which the localization error is
            computed
        subsampling (default 100): number of localization error samples

    Returns:
        list of localization errors.
    """
    out = []
    c = camera_true.instrument.n_pix / 2  # middle of the row
    for r in xrange(0, np.round(camera_true.lig_f).astype(int), subsampling):
        lon1, lat1 = camera_measured.locdir(r, c, alt)
        lon2, lat2 = camera_true.locdir(r, c, alt)
        radius = ps.PhysicalConstants.earth_radius + alt
        out.append(utils.haversine(radius, lon1, lat1, lon2, lat2))
    return out
Exemplo n.º 26
0
    def locate_point(point, nodes):
        """
        Find the best corresponding node to the point in the given list.
        Parameters:
            point Point to evaluate.
            nodes List of nodes.
        Returns:
            The best node in the list or None if not considered in a node.
        """

        best_dist = 10000
        best_node = None

        for node in nodes:
            dist = haversine(point, node.position)

            # Take the closest node, make sure the point is in the city circle
            if dist < best_dist and dist < node.radius:
                best_dist = dist
                best_node = node

        return best_node
Exemplo n.º 27
0
    def trackSummary(self):
        """ Parse only the key data needed for the model in GPXViewer.
        Details will only be read on demand.

        This function populates the summary dictionary.

        """
        # populate the summary dict w/ the filename
        self.summary["file"] = self._source

        # date as ISO string
        # with some sources the date is only in the metadata and not each track point
        # self.summary["date"] = self.allPoints[0].find('gpx:time', self.namespaces).text
        metaData = self._root.findall('gpx:metadata', self.namespaces)
        for md in metaData:
            time = md.findall('gpx:time', self.namespaces)

        self.summary["date"] = time[0].text

        # duration
        t0 = dateutil.parser.parse(self.allPoints[0].find('gpx:time',
            self.namespaces).text)
        t1 = dateutil.parser.parse(self.allPoints[-1].find('gpx:time',
            self.namespaces).text)
        self.summary["duration"] = (t1-t0).total_seconds()

        # distance
        lat = []
        lon = []
        for point in self.allPoints:
            lat.append(float(point.attrib['lat']))
            lon.append(float(point.attrib['lon']))
        _dist = sum([haversine(y0, x0, y1, x1)  for x0, x1, y0, y1 in zip(
            lat[:-1], lat[1:], lon[:-1], lon[1:])])
        self.summary["distance"] = _dist

        # and average speed in km/h
        self.summary["speed"] = 3.6 * self.summary["distance"]/self.summary["duration"]
Exemplo n.º 28
0
 def calc_length(self):
     return utils.haversine(self.n_station.lng, self.n_station.lat, self.s_station.lng, self.s_station.lat)
Exemplo n.º 29
0
def RestaurantsListView(request, username):

    user = User.objects.get(username=username)
    user_restaurants_ids = [r.id for r in user.restaurants_following.all()]

    restaurants = Restaurant.objects.all().select_related('user', 'restaurant', 'food')
    restaurants_list = []

    if request.GET.get('search', False):
        query_string = request.GET.get('search', False)
        restaurants = restaurants.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string) | Q(location_name__icontains=query_string))
    if request.GET.get('following', False):
        restaurants = restaurants.filter(restaurants_following__in=[user])
    if request.GET.get('friends_following', False):
        friends = [u['id'] for u in user.following.values('id')]
        restaurants = restaurants.filter(restaurants_following__in=friends)
    if request.GET.get('recommended', False):
        restaurants = restaurants.filter(is_recommended=True)
    if request.GET.get('me_like', False):
        resturant_ids = user.foods_liked.values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('friends_like', False):
        resturant_ids = user.following.values('foods_liked__restaurant__id')
        seen = set()
        unique_rids = [r['foods_liked__restaurant__id'] for r in resturant_ids if r['foods_liked__restaurant__id'] not in seen and not seen.add(r['foods_liked__restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('me_review', False):
        resturant_ids = Review.objects.filter(user=user).values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('recommended_people_review', False):
        users = User.objects.filter(is_recommended=True)
        resturant_ids = Review.objects.filter(user__in=users).values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('friends_review', False):
        friends = user.following.all()
        resturant_ids = Review.objects.filter(user__in=friends).values('restaurant__id')
        seen = set()
        unique_rids = [r['restaurant__id'] for r in resturant_ids if r['restaurant__id'] not in seen and not seen.add(r['restaurant__id'])]
        restaurants = restaurants.filter(id__in=unique_rids)
    if request.GET.get('amenity_ids', False):
        amenity_ids = request.GET.get('amenity_ids', False).split(',')
        restaurants = restaurants.filter(amenities__in=amenity_ids)
    if request.GET.get('dietary_ids', False):
        dietary_ids = request.GET.get('dietary_ids', False).split(',')
        food_ids = Food.objects.filter(dietary__in=dietary_ids)
        restaurants = restaurants.filter(food__in=food_ids)
    if request.GET.get('cuisine_ids', False):
        cuisine_ids = request.GET.get('cuisine_ids', False).split(',')
        food_ids = Food.objects.filter(cuisine__in=cuisine_ids)
        restaurants = restaurants.filter(food__in=food_ids)

    # filter by range
    if request.GET.get('price_max', False):
        price_max = request.GET.get('price_max', False)
        restaurants = restaurants.filter(price_high__lte=int(price_max))
    if request.GET.get('price_min', False):
        price_min = request.GET.get('price_min', False)
        restaurants = restaurants.filter(price_low__gte=int(price_min))

    distance_max = request.GET.get('distance_max', False)
    if distance_max:
        distance_max = float(distance_max)
    distance_min = request.GET.get('distance_min', False)
    if distance_min:
        distance_min = float(distance_min)

    #sorting by non-derived field
    sort = request.GET.get('sort', False)
    if sort == 'price':
        restaurants = restaurants.extra(select={'price_range': 'price_high + price_low'}).extra(order_by=['price_range'])

    # get distinct restaurants
    restaurants = unique(restaurants)

    for restaurant in restaurants:
        restaurant_obj = {}
        restaurant_obj['name'] = restaurant.name
        restaurant_obj['id'] = restaurant.id
        restaurant_obj['location_name'] = restaurant.location_name
        restaurant_obj['location'] = {'x':restaurant.location_x, 'y':restaurant.location_y}

        restaurant_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(restaurant.location_x), float(restaurant.location_y))
        restaurant_obj['distance'] = '{0:0.2f}km'.format(restaurant_obj['dist'])

        # if a distance filter has been set, we only add qualifying restaurants
        if distance_max:
            if restaurant_obj['dist'] > distance_max:
                continue
        if distance_min:
            if restaurant_obj['dist'] < distance_min:
                continue

        restaurant_obj['photo'] = restaurant.photo
        restaurant_obj['price_low'] = '${0:0.0f}'.format(restaurant.price_low)
        restaurant_obj['price_high'] = '${0:0.0f}'.format(restaurant.price_high)
        # restaurant_obj['amenities'] = [{'id': res.id, 'image': res.image} for res in restaurant.amenities.all()]


        # get the people following this restaurant
        restaurant_obj['followed_by'] = [{'user_id':person.id, 'username': person.username, 'photo': person.photo} for person in User.objects.filter(restaurants_following__in=[restaurant])[:7]]
        restaurant_obj['following_count'] = User.objects.filter(restaurants_following__in=[restaurant]).count()

        restaurant_obj['is_following'] = (restaurant.id in user_restaurants_ids)
        restaurant_obj['is_recommended'] = restaurant.is_recommended

        # ratings
        reviews = Review.objects.filter(restaurant__in=[restaurant])
        if reviews.count():
            rating = 0
            for review in reviews:
                rating = rating + review.rating
            rating = rating / reviews.count()
        else:
            rating = 0

        restaurant_obj['rating'] = rating
        restaurant_obj['reviews_count'] = reviews.count()

        restaurants_list.append(restaurant_obj)

    # sorting by derived field
    if sort == 'followers':
        restaurants_list = sorted(restaurants_list, key=lambda x: x['following_count'], reverse=True)
    elif sort == 'location':
        restaurants_list = sorted(restaurants_list, key=lambda x: x['dist'])
    elif sort == 'ratings':
        restaurants_list = sorted(restaurants_list, key=lambda x: x['rating'])

    return HttpResponse(json.dumps(restaurants_list), content_type="application/json")
Exemplo n.º 30
0
def FoodListView(request, username):

    food_list_serialized = []
    user = User.objects.get(username=username)

    food_list = Food.objects.all().select_related('user', 'restaurant', 'food')

    if request.GET.get('search', False):
        query_string = request.GET.get('search', False)
        food_list = food_list.filter(Q(name__icontains=query_string) | Q(description__icontains=query_string))
    if request.GET.get('liked', False):
        food_list = food_list.filter(foods_liked__in=[user]).order_by('-id')
    if request.GET.get('friends_like', False):
        friends = [u['id'] for u in user.following.values('id')]
        food_list = food_list.filter(foods_liked__in=friends).order_by('-id')
    if request.GET.get('recommended', False):
        restaurants = Restaurant.objects.filter(is_recommended=True)
        food_list = food_list.filter(restaurant__in=restaurants).order_by('-id')
    if request.GET.get('following', False):
        restaurants = Restaurant.objects.filter(restaurants_following__in=[user])
        food_list = food_list.filter(restaurant__in=restaurants).order_by('-id')
    if request.GET.get('friends_following', False):
        friends = user.following.all()
        restaurants = Restaurant.objects.filter(restaurants_following__in=friends)
        food_list = food_list.filter(restaurant__in=restaurants).order_by('-id')
    if request.GET.get('disliked', False):
        food_list = food_list.filter(foods_disliked__in=[user]).order_by('-id')
    if request.GET.get('explore', False):
        food_list = food_list.exclude(foods_liked__in=[user]).exclude(foods_disliked__in=[user]).order_by('id')
    if request.GET.get('dietary_ids', False):
        dietary_ids = request.GET.get('dietary_ids', False).split(',')
        food_list = food_list.filter(dietary__in=dietary_ids)
    if request.GET.get('cuisine_ids', False):
        cuisine_ids = request.GET.get('cuisine_ids', False).split(',')
        food_list = food_list.filter(cuisine__in=cuisine_ids)

    # filter by range
    if request.GET.get('price_max', False):
        price_max = request.GET.get('price_max', False)
        food_list = food_list.filter(price__lte=int(price_max))
    if request.GET.get('price_min', False):
        price_min = request.GET.get('price_min', False)
        food_list = food_list.filter(price__gte=int(price_min))

    distance_max = request.GET.get('distance_max', False)
    if distance_max:
        distance_max = float(distance_max)
    distance_min = request.GET.get('distance_min', False)
    if distance_min:
        distance_min = float(distance_min)

    #sorting by non-derived field
    sort = request.GET.get('sort', False)
    if sort == 'price':
        food_list = food_list.order_by('price')

    food_list = unique(food_list)

    for food in food_list:

        food_obj = {}
        food_obj['id'] = food.id
        food_obj['name'] = food.name
        # food_obj['description'] = food.description
        food_obj['price'] = '${0:0.2f}'.format(food.price)

        food_obj['dist'] = haversine(float(user.location_x), float(user.location_y), float(food.restaurant.location_x), float(food.restaurant.location_y))
        food_obj['distance'] = '{0:0.2f}km'.format(food_obj['dist'])

        # if a distance filter has been set, we only add qualifying restaurants
        if distance_max:
            if food_obj['dist'] > distance_max:
                continue
        if distance_min:
            if food_obj['dist'] < distance_min:
                continue

        food_obj['photo'] = food.photo
        food_obj['restaurant'] = food.restaurant.name
        food_obj['restaurant_id'] = food.restaurant.id
        food_obj['dietary_ids'] = [{'id':i.id, 'name':i.name} for i in food.dietary.all()]
        food_obj['cuisine_ids'] = [{'id':i.id, 'name':i.name} for i in food.cuisine.all()]

        food_obj['is_liked'] = food in user.foods_liked.all()
        food_obj['num_likes'] = User.objects.filter(foods_liked__in=[food]).count()

        food_list_serialized.append(food_obj)

    # sorting by derived field
    if sort == 'likes':
        food_list_serialized = sorted(food_list_serialized, key=lambda x: x['num_likes'], reverse=True)
    elif sort == 'location':
        food_list_serialized = sorted(food_list_serialized, key=lambda x: x['dist'])

    return HttpResponse(json.dumps(food_list_serialized), content_type="application/json")
Exemplo n.º 31
0
 def dist(self, other):
     """ Returns the distance between two nodes in kilometers. """
     return haversine(self.position, other.position)
Exemplo n.º 32
0
def compare():
    """
    将a问与c问结果比较
    :return:
    """
    ll_data_2g = utils.gongcan_to_ll()
    train_data = utils.ll_to_grid(ll_data_2g)

    # print(train_data)
    # 删除原有的ID,不作为训练特征
    for i in range(1, 8):
        train_data.drop(['RNCID_' + str(i)], axis=1, inplace=True)
        train_data.drop(['CellID_' + str(i)], axis=1, inplace=True)
    # 将空余的信号强度,用0补填补
    train_data = train_data.fillna(0)

    # features和labels
    X_ = train_data.drop(
        ['MRTime', 'Longitude', 'Latitude', 'Num_connected', 'grid_num'],
        axis=1,
        inplace=False).as_matrix()
    y_ = train_data[['grid_num', 'Longitude', 'Latitude']].as_matrix()
    # 通过设置每一次的随机数种子,保证不同分类器每一次的数据集是一样的
    random_states = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

    start = datetime.datetime.now()
    errors_all = []

    for i in range(10):

        # 切分训练集和验证集
        X_train, X_test, y_train, y_test = train_test_split(
            X_, y_, test_size=0.2, random_state=random_states[i])

        clf = RandomForestClassifier(max_depth=20, random_state=0)
        y_pred = clf.fit(np.delete(X_train, 0, axis=1),
                         y_train[:, 0]).predict(np.delete(X_test, 0, axis=1))

        ll_pred = []
        for y in y_pred:
            X_box = int(y % X_box_num)
            y_box = int(y / X_box_num) + 1
            if X_box == 0:
                X_box = X_box_num
                y_box -= 1
            lon = lb_Longitude + per_lon * X_box - 0.5 * per_lon
            lat = lb_Latitude + per_lat * y_box - 0.5 * per_lat

            ll_pred.append([lon, lat])
        ll_true = np.delete(y_test, 0, axis=1).tolist()
        errors = []
        for (true, pred) in zip(ll_true, ll_pred):
            error = utils.haversine(true[0], true[1], pred[0], pred[1])
            errors.append(error)
        errors.sort()
        errors_all.append(errors)

    print("RandomForest")
    print("Median error: {}".format(
        np.percentile(np.array(errors_all).mean(axis=0), 50)))
    print("Time: {}".format(datetime.datetime.now() - start))
    print("****************************")

    # 获得 c 问结果
    start = datetime.datetime.now()
    c_errors = main()
    print("Time: {}".format(datetime.datetime.now() - start))

    plt.figure('Comparision 2G DATA')
    plt.xlabel('Comparision 2G DATA - CDF figure')
    plt.ylabel('Error(meters)')

    # 绘制 c 问的结果的总体CDF曲线
    mean_errors = []
    for i in range(len(c_errors)):
        errors = np.array(c_errors[i][1])
        mean_error = errors.mean(axis=0)
        mean_errors.extend(mean_error)
    mean_errors.sort()
    plt.plot(
        [float(i) / float(len(mean_errors)) for i in range(len(mean_errors))],
        list(mean_errors),
        '--',
        linewidth=1,
        alpha=0.6,
        label="c-method median error(m): %.3f" %
        np.percentile(mean_errors, 50))

    # 绘制 a 问的结果的总体CDF曲线
    errors = np.array(errors_all)
    mean_errors = errors.mean(axis=0)
    # print(mean_errors)
    plt.plot(
        [float(i) / float(len(mean_errors)) for i in range(len(mean_errors))],
        list(mean_errors),
        '--',
        linewidth=1,
        alpha=0.6,
        label="a-method median error: %.3f" % np.percentile(mean_errors, 50))
    plt.legend()
    plt.show()
    def isRelevant(self,record):
        """
        Checks whether record passes the specified filters
        """
        
        if record["status"]["name"] == "Abandoned":
            if self.startTS is not None:             #Probes abandoned before measurement period should be dropped
                if record["status"]["since"] < self.startTS:
                    return False
            else:
                return False

        if record["status"]["name"] == "Never Connected":       #Never connected probes should be dropped
            return False

        if record["status"]["name"] == "Noisy":       # Probe constantly disconnecting
            return False

        if (record["status"]["name"] == "Connected") or (record["status"]["name"] == "Disconnected"):
            if self.endTS is not None:          
                if "first_connected" not in record or record['first_connected'] is None:
                    return False
                #Probes connected after end of measurement period should be dropped
                if record["first_connected"] > self.endTS:
                    return False

        if (record["status"]["name"] == "Disconnected"): 
            if self.startTS is not None:     #Probes disconnected before start of measurement period should be dropped
                if record["status"]["since"] < self.startTS:
                    return False
            else:
                return False

        probeASNv4 = record["asn_v4"]
        probeASNv6 = record["asn_v6"]
        
        asnCheckPassed = False

        if self.asnFilters == []:
            asnCheckPassed = True
        else:
            for asn in self.asnFilters:
                if (probeASNv4 == asn) or (probeASNv6 == asn):
                    asnCheckPassed = True
                    break

        probeCountry = record["country_code"]
        probeAdmin1 = record["admin1"]

        countryCheckPassed = False

        if self.countryFilters == []:
            countryCheckPassed = True
        else:
            for country in self.countryFilters:
                if probeCountry == country:
                    countryCheckPassed = True
                    break

                if not probeCountry:
                    countryCheckPassed = True

                if probeAdmin1:
                    if country in probeAdmin1:
                        countryCheckPassed = True


        probeLocation = record["geometry"]["coordinates"]

        probeLat = probeLocation[0]
        probeLongt = probeLocation[1]

        proximityCheckPassed = False

        if self.proximityFilters == []:
            proximityCheckPassed = True
        elif (probeLat is None) or (probeLongt is None):
            #A lot of cases without coordinates. Currently discarding these probes
            proximityCheckPassed = False
        else:
            for coordinates in self.proximityFilters:
                lat = coordinates[0]
                longt = coordinates[1]

                distance = haversine(probeLat,probeLongt,lat,longt)

                if distance <= self.proximityThreshold:
                    proximityCheckPassed = True
                    break


        if asnCheckPassed and countryCheckPassed and proximityCheckPassed:
            return True
        else:
            return False
Exemplo n.º 34
0
    def trackDetails(self):
        """Read the track in the GPX file and populate the `track` dictionary.
        The time is read twice once local (as datetime so we can easily compute
        offsets) and once as an ISO 8601 string (as it is written) to be stored
        in the `track` dict.

        """
        # empty list for all the vars for the details
        self.track["source"] = self._source
        self.track["lat"] = []
        self.track["lon"] = []
        self.track["ele"] = []
        self.track["time"] = []
        self.track["distances"] = []    # steps in space
        self.track["durations"] = []    # steps in time (sec)
        self.track["speed"] = []        # speed in km/h

        # locate time version for datetime objects
        times = []

        # extract original lat/lon and cast as float
        for point in self.allPoints:

            # lat/lon/ele are just list of floats
            self.track["lat"].append(float(point.attrib['lat']))
            self.track["lon"].append(float(point.attrib['lon']))
            self.track["ele"].append(float(point.find('gpx:ele',
                self.namespaces).text))

            # self.time  are datetime instances whereas track["time"] are
            # ISO strings (so the can be serialized)
            times.append(dateutil.parser.parse(
                point.find('gpx:time', self.namespaces).text
                ))
            self.track["time"].append(point.find('gpx:time',
                self.namespaces).text)

        # here we already compute individual steps between trackpoints

        # 1) distance (haversine)
        # list comprehension is fancy but a tad unreadable
        # also decide wether we need haversion or if euclidean is enough
        self.track["distances"] = [haversine(y0, x0, y1, x1) for x0, x1, y0, y1 in zip(
            self.track["lat"][:-1], self.track["lat"][1:],
            self.track["lon"][:-1], self.track["lon"][1:])]

        # # the euclidean should work for small distances too and is less demanding
        # self.track["distances"] = [euclidean(y0,x0,y1,x1)  for x0,x1,y0,y1 in zip(
        #                     self.track["lat"][:-1],self.track["lat"][1:],
        #                     self.track["lon"][:-1],self.track["lon"][1:])]

        # 2) durations (these have to be converted to datatime object)
        # actually we could just substract the seconds since the GPS *should* be
        # sampled every few secs but you never know
        self.track["durations"] = [(t1-t0).total_seconds() for t0, t1 in zip(
            times[:-1], times[1:])]

        # 3) speed in segments
        # d/t * 3.6 since we are in m/s but want km/h
        self.track["speed"] = [(d/t)*3.6 if t > 0.0 else 0.0 for d, t in zip(
            self.track["distances"], self.track["durations"])]

        # distance, duration and speed can be computer for n=2 only and thus
        # are too short
        self.track["distances"] = [0] + self.track["distances"]
        self.track["durations"] = [0] + self.track["durations"]
        self.track["speed"] = [0] + self.track["speed"]

        # sum distances consecutively
        self.track["distances"] = [sum(self.track["distances"][:i+1]) for i in
                                    range(len(self.track["distances"]))]
        # sum distances consecutively
        self.track["durations"] = [sum(self.track["durations"][:i+1]) for i in
                                    range(len(self.track["durations"]))]
Exemplo n.º 35
0
    def _calc(self):
        # find lap length
        max_time = 0
        end_time = 0
        start_time = 0
        min_time = 999999
        utc = ""
        total_distance = 0
        is_utc = self.fixes[0].is_utc
        if self.fixes and is_utc:
            tzname = " UTC"
        else:
            tzname = tzlocal.get_localzone()._tzname

        # Peek speed / gforce variables
        peek_state = {}

        for fix in self.fixes:

            def peek_metric_calc(state, metric_name, storage_name):
                last_metric_name = "last_%s" % metric_name
                last_direction_name = "last_%s_direction" % metric_name
                last_metric = state.get(last_metric_name, None)
                last_direction = state.get(last_direction_name, None)
                last_fix = state.get('last_fix', None)

                cur_metric = getattr(fix, metric_name)
                cur_direction = None

                if last_metric is not None:
                    if cur_metric > last_metric:
                        cur_direction = 1
                    elif cur_metric < last_metric:
                        cur_direction = -1
                    else:
                        cur_direction = last_direction

                    if (last_direction is not None
                            and cur_direction is not None
                            and cur_direction != last_direction):

                        # We've found either a straight vmax or a corner vmin
                        getattr(self, storage_name).append({
                            "metric":
                            last_metric,
                            "direction":
                            last_direction,
                            "fix":
                            last_fix,
                            "seconds":
                            last_fix.lap_time
                        })

                state[last_direction_name] = cur_direction
                state[last_metric_name] = cur_metric

            if 'last_fix' in peek_state:
                peek_metric_calc(peek_state, 'speed_mph', 'speed_markers')
                peek_metric_calc(peek_state, 'lat_g', 'lat_g_markers')
                peek_metric_calc(peek_state, 'lin_g', 'lin_g_markers')

                last_fix = peek_state['last_fix']
                hav = utils.haversine(last_fix.lat, last_fix.long, fix.lat,
                                      fix.long)

                total_distance += hav
                self.distance_at_fix[fix] = total_distance

            if fix.lap_time <= min_time:
                min_time = fix.lap_time
                start_time = fix.wall_time
                self.date = parser.parse(fix.date)

            if fix.lap_time > max_time:
                max_time = fix.lap_time
                end_time = fix.wall_time

            peek_state['last_fix'] = fix

        self.total_distance = total_distance
        self.lap_time = max_time
        datestr = "%s %s %s" % (fix.date, start_time, tzname)
        self.start_time = parser.parse(datestr)
        self.end_time = parser.parse("%s %s %s" % (fix.date, end_time, tzname))
        if is_utc:
            self.start_time = self.start_time.astimezone(
                tzlocal.get_localzone())
            self.end_time = self.end_time.astimezone(tzlocal.get_localzone())
        else:
            lz = tzlocal.get_localzone()
            self.start_time = lz.localize(self.start_time)
            self.end_time = lz.localize(self.end_time)
Exemplo n.º 36
0
# We can then get the number of people covered by each location.
people_per_market = odisha_population.sum_by_labels(closest_villages)
for market_index in range(0, len(villages)):
    if market_index in people_per_market.keys():
        villages_service.append(people_per_market[market_index])
    else:
        villages_service.append(0)

# From here, we want to find the closest Wholesaler to each Village.
villages_closest_wm_name = []
villages_closest_wm_lat = []
villages_closest_wm_lon = []
villages_closest_wm_distance = []
for village in villages.itertuples():
    # Amass distances, calculate the closest, and append the data to lists.
    distances = haversine(village.Latitude, village.Longitude,
                          wholesales["Latitude"], wholesales["Longitude"])
    closest = np.argmin(np.ndarray.flatten(distances))
    raw_distance = distances[closest][0][0]
    closest_wm = wholesales.iloc[closest]

    villages_closest_wm_name.append(closest_wm['Wholesale_Name'])
    villages_closest_wm_lat.append(closest_wm['Latitude'])
    villages_closest_wm_lon.append(closest_wm['Longitude'])
    villages_closest_wm_distance.append(raw_distance)

# And produce an accessible village market list with the population served and closest wholesaler data.
villages["Closest_Wholesaler"] = villages_closest_wm_name
villages["Wholesaler_Latitude"] = villages_closest_wm_lat
villages["Wholesaler_Longitude"] = villages_closest_wm_lon
villages["Wholesaler_Distance"] = villages_closest_wm_distance
villages["Population_Served"] = villages_service
Exemplo n.º 37
0
 def __init__(self,
              graph,
              terminals,
              hot_spots=None,
              generator=None,
              distances=None):
     # Check whether graph is node-weighted.
     if not graph.is_node_weighted():
         raise (ValueError,
                "Lazy Steiner Tree only works with node-weighted graphs.")
     # Extract POI from the terminals list.
     if len(terminals) > 0:
         self.__poi = terminals[0]
     else:
         return
     # Set object variables.
     self.__graph = SuitabilityGraph()
     self.__graph.append_graph(graph)
     self.__terminals = terminals
     self.__hot_spots = None
     self.__nodes = None
     self.__s_d = {}
     self.__paths = {}
     self.__refs = {}
     # Set hot spots.
     if hot_spots is None:
         if generator is None:
             generator = SuitableNodeWeightGenerator()
         self.__hot_spots = self.__graph.get_suitable_nodes(
             generator, excluded_nodes=terminals)
     else:
         self.__hot_spots = list(hot_spots)
     # Set nodes = hot spots + terminals.
     self.__nodes = list(self.__hot_spots)
     for t in terminals:
         self.__nodes.append(t)
     # Set distances.
     if distances is None:
         len_hot_spots = len(self.__hot_spots)
         self.__distances = {}
         for t in self.__terminals:
             dist, paths = dijkstra(self.__graph, t, self.__nodes)
             for n in self.__nodes:
                 try:
                     self.__distances[tuple(sorted([t,
                                                    n]))] = (dist[n], 'N')
                     self.__paths[tuple(sorted([t, n]))] = paths[n]
                 except KeyError:
                     self.__distances[tuple(sorted([t, n]))] = (sys.maxint,
                                                                'N')
                     self.__paths[tuple(sorted([t, n]))] = []
         for h1 in self.__hot_spots:
             for i in range(self.__hot_spots.index(h1), len_hot_spots):
                 h2 = self.__hot_spots[i]
                 distance = 0
                 d_type = 'E'
                 if h1 == h2:
                     d_type = 'N'
                 else:
                     distance = haversine(self.__graph[h1][2]['lat'],
                                          self.__graph[h1][2]['lon'],
                                          self.__graph[h2][2]['lat'],
                                          self.__graph[h2][2]['lon'])
                 self.__distances[tuple(sorted([h1,
                                                h2]))] = (distance, d_type)
     else:
         self.__distances = dict(distances)
Exemplo n.º 38
0
            def func(entry=val):
                self.log('Parsing: %s, id=%d' % (entry['title'], entry['id']),
                         logging.DEBUG)
                data = {}

                tmp = self.parse_name(entry['title'])
                if not tmp:
                    self.log('Failed to get names for id=%d' % entry['id'],
                             logging.ERROR)
                    return

                for key in ['enName', 'zhName', 'locName']:
                    if key in tmp:
                        data[key] = tmp[key]

                alias = set([])
                # 去除名称中包含国家的条目
                for a in tmp['alias']:
                    c = col_country.find_one({'alias': a}, {'_id': 1})
                    if not c:
                        alias.add(a)
                data['alias'] = list(alias)

                if 'tags' in entry:
                    data['tags'] = list(
                        set(
                            filter(
                                lambda val: val,
                                [tmp.lower().strip()
                                 for tmp in entry['tags']])))

                # 热门程度
                if 'comment_cnt' in entry:
                    data['commentCnt'] = entry['comment_cnt']
                if 'vs_cnt' in entry:
                    data['visitCnt'] = entry['vs_cnt']

                # 计算hotness
                def calc_hotness(key):
                    if key not in entry:
                        return 0.5
                    x = entry[key]
                    sig = '%s:%d' % (key, x)
                    if sig not in hotness_cache:
                        hotness_cache[sig] = col_raw.find({
                            key: {
                                '$lt': x
                            }
                        }).count() / float(tot_num)
                    return hotness_cache[sig]

                hotness_terms = map(calc_hotness,
                                    ('comment_cnt', 'images_tot', 'vs_cnt'))
                data['hotness'] = sum(hotness_terms) / float(
                    len(hotness_terms))

                crumb_ids = []
                for crumb_entry in entry['crumb']:
                    if isinstance(crumb_entry, int):
                        cid = crumb_entry
                    else:
                        cid = int(
                            re.search(
                                r'travel-scenic-spot/mafengwo/(\d+)\.html',
                                crumb_entry['url']).group(1))
                    if cid not in crumb_ids:
                        crumb_ids.append(cid)

                data['crumbIds'] = crumb_ids

                data['source'] = {'mafengwo': {'id': entry['id']}}

                if 'lat' in entry and 'lng' in entry:
                    data['location'] = {
                        'type': 'Point',
                        'coordinates': [entry['lng'], entry['lat']]
                    }
                else:
                    if self.args.type == 'mdd':
                        tmp = self.retrieve_loc(entry['id'])
                        if tmp:
                            data['location'] = tmp
                    else:
                        tmp = self.poi_info(entry['id'])
                        if tmp:
                            data['location'] = {
                                'type': 'Point',
                                'coordinates': [tmp['lng'], tmp['lat']]
                            }

                # 获得对应的图像
                sig = 'MafengwoMdd-%d' % data['source']['mafengwo']['id']
                image_list = [{
                    'key': md5(tmp['url']).hexdigest()
                } for tmp in col_raw_im.find({
                    'itemIds': sig
                }).limit(10)]
                if image_list:
                    data['images'] = image_list

                if self.args.type == 'mdd':
                    self.parse_mdd_contents(entry, data)
                else:
                    self.parse_vs_contents(entry, data)

                if self.args.baidu_match:
                    if 'location' in data:
                        coords = data['location']['coordinates']
                        ret = self.get_baidu_sug(data['zhName'], coords)
                        if not ret:
                            ret = []

                        for val in ret:
                            val['dist'] = haversine(coords[0], coords[1],
                                                    val['lng'], val['lat'])

                        ret = filter(lambda val: val['sname'] == data['zhName'] and \
                                                 (5 >= val['type_code'] >= 3 if self.args.type == 'mdd'
                                                  else val['type_code'] >= 5)
                                                 and val['dist'] < 400 if self.args.type == 'mdd' else 200, ret)
                        ret = sorted(ret,
                                     key=lambda val:
                                     (val['type_code'], val['dist']))
                        if ret:
                            data['source']['baidu'] = {
                                'id': ret[0]['sid'],
                                'surl': ret[0]['surl']
                            }
                            self.log('Matched: %s => %s' %
                                     (data['zhName'], ret[0]['sname']))

                    if 'baidu' not in data['source']:
                        self.log('Not matched: %s' % data['zhName'])

                self.log('Parsing done: %s / %s / %s' %
                         tuple(data[key] if key in data else None
                               for key in ['zhName', 'enName', 'locName']))

                col_proc.update(
                    {'source.mafengwo.id': data['source']['mafengwo']['id']},
                    {'$set': data},
                    upsert=True)
Exemplo n.º 39
0
def generate_graph(results, generator, cost_type="distance", capacitated=False):
    graph = SuitabilityGraph(capacitated=capacitated)
    #
    prev_way_id = None
    prev_node_id = None
    hotspots = set()
    pois = set()
    for r in results:
        way_id = r[0]
        node_id = r[1]
        type_ = r[3]
        stype = r[4]
        poi_name = r[5]
        lat = float(r[6])
        lon = float(r[7])
        sa1_code = r[8]
        sa2_code = r[9]
        hw_type = r[10]
        if node_id not in graph:
            if type_ == "hotspot":
                graph[node_id] = (generator.weights["VERY_SUITABLE"][0], {}, {'lat': lat, 'lon': lon, 'sa1': sa1_code,
                                                                              'sa2': sa2_code, 'subtype': stype})
                hotspots.add(node_id)
            else:
                if type_ == "poi":
                    pois.add(node_id)
                graph[node_id] = (generator.weights["WARNING"][0], {}, {'lat': lat, 'lon': lon, 'sa1': sa1_code,
                                                                        'sa2': sa2_code, 'subtype': stype,
                                                                        'name': poi_name})
        if prev_way_id == way_id:
            prev_lat = graph[prev_node_id][2]['lat']
            prev_lon = graph[prev_node_id][2]['lon']
            # Cost estimation
            cost = 0
            distance = haversine(lat, lon, prev_lat, prev_lon)
            if cost_type == "distance":
                cost = distance
            elif cost_type == "travel_time":
                cost = osm_avg(distance, hw_type)
            #
            graph[node_id][1][prev_node_id] = cost
            graph[prev_node_id][1][node_id] = cost
        prev_way_id = way_id
        prev_node_id = node_id
    #
    # pdb.set_trace()
    isolated = []
    # Both dictionaries will INCLUDE HOT SPOTS AND POIs.
    nodes_by_sa1_code = {}
    nodes_by_sa2_code = {}
    #
    for node_id, info in graph.iteritems():
        if len(info[1]) == 0 or (len(info[1]) == 1 and info[1].keys()[0] == node_id):
            isolated.append(node_id)
        else:
            sa1_code = info[2]['sa1']
            sa2_code = info[2]['sa2']
            if sa1_code in nodes_by_sa1_code:
                nodes_by_sa1_code[sa1_code].append(node_id)
            else:
                nodes_by_sa1_code[sa1_code] = [node_id]
            if sa2_code in nodes_by_sa2_code:
                nodes_by_sa2_code[sa2_code].append(node_id)
            else:
                nodes_by_sa2_code[sa2_code] = [node_id]
    for node_id in isolated:
        del graph[node_id]
        if node_id in hotspots:
            hotspots.remove(node_id)
        if node_id in pois:
            pois.remove(node_id)
    #
    print "h:", len(hotspots), "p:", len(pois)

    return graph, list(hotspots), list(pois), nodes_by_sa1_code, nodes_by_sa2_code
Exemplo n.º 40
0
def split_data_in_user_groups():
    """
    :param raw_data : raw data is in the form of  'shopId,userId,score,"date",filtered,latitude,longitude'

    :param count : for each user, this value is used to record the number of bad reviews.

    :param cur_label: this value decide whether this user is a review spammer.

    :param loc_arr: a record of this users location for different reviews

    :param cur_data : a list of values measuring the distance based on the distance mode

    :return: output two csv files  mode_x_data.csv and mode_x_label.csv based on the distance mode
    """

    raw_data = defaultdict(list)
    data = []
    label = []

    os.chdir(config.dataset_dir)
    with open(config.raw_data_name, "r", encoding="latin-1") as csvfile:
        csvfile.readline()
        read = csv.reader(csvfile)
        for i in read:
            i[3] = i[3].replace('-', '')
            raw_data[i[1]].append(i[1:])

    for key in raw_data:
        if len(raw_data[key]) > config.filter_num:
            sorted_data = sorted(raw_data[key], key=lambda x: int(x[2]))
            count = sum([int(item[3]) for item in sorted_data])
            cur_label = 0 if count / len(sorted_data) <= 0.5 else 1
            loc_arr = [[float(item[-2]), float(item[-1])]
                       for item in sorted_data]

            if config.distance_mode == 0:
                center = list(zip(*loc_arr))
                center_point = np.array([
                    np.mean(np.array(center[0])),
                    np.mean(np.array(center[1]))
                ])
                loc_arr = np.array(loc_arr)
                cur_data = [haversine(center_point, item) for item in loc_arr]
            else:
                loc_arr = np.array(loc_arr)
                cur_data = [
                    haversine(loc_arr[i - 1], loc_arr[i])
                    for i in range(1, len(loc_arr))
                ]
            data.append(cur_data)
            label.append(cur_label)

    path = [
        'mode',
        str(config.distance_mode),
        '_',
    ]
    file = ['data.csv', 'label.csv']
    with open('{0[0]}{0[1]}{0[2]}{1[0]}'.format(path, file), "w",
              newline="") as data_csv:

        data_writer = csv.writer(data_csv)
        for item in data:
            data_writer.writerow(item)
    with open('{0[0]}{0[1]}{0[2]}{1[1]}'.format(path, file), "w",
              newline="") as label_csv:

        label_writer = csv.writer(label_csv)
        label_writer.writerow(label)

    return 0
Exemplo n.º 41
0
 def distance(self, otherLocation):
    return utils.haversine(self, otherLocation)