示例#1
0
    def _search_get_page(self, point, start_index):

        logging.debug("Loading page from start_index: %d", start_index)

        if start_index == 0:
            # first request has to load normal search page
            logging.debug("Using normal search endpoint")

            params = urlencode({"origin": point.format(None, "", "", "")})
            url = self._urls["search"] + "?" + params

            # make request
            try:
                return str(self._browser.get(url).soup.find(id="geocaches"))
            except requests.exceptions.ConnectionError as e:
                raise Error("Cannot load search results.") from e

        else:
            # other requests can use AJAX endpoint
            logging.debug("Using AJAX search endpoint")

            params = urlencode({
                "inputOrigin": point.format(None, "", "", ""),
                "startIndex": start_index,
                "originTreatment": 0
            })
            url = self._urls["search_more"] + "?" + params

            # make request
            try:
                return self._browser.get(url).json()["HtmlString"].strip()
            except requests.exceptions.ConnectionError as e:
                raise Error("Cannot load search results.") from e
示例#2
0
    def download(self, get_png_first=False):
        """Download UTFGrid from geocaching.com

        Return generator object of Cache instances, store geocaches also
        in self.geocaches.

        It appears to be mandatory to first download map tile (.png
        file) and only then UTFGrid.  However, this is not enforced all
        the time.  There is probably some time limit from previous
        loading of the same tile and also a general traffic regulator
        involved.  Try first to download grid and if it does not work,
        get .png first and then try again.

        TODO It might be useful to store time when tile is last
        downloaded and act based on that.  Logging some statistics (time
        when tile is loaded + received status code + content length +
        time spent on request) might help in algorithm design and
        evaluating if additional traffic from .png loading is tolerable
        and if this should be done all the time.  Requesting for UTFgrid
        and waiting for 204 response takes also its time."""

        logging.info("Downloading UTFGrid for tile ({}, {}, {})".format(
            self.x, self.y, self.z))
        try:
            if get_png_first:
                logging.debug(".. getting .png file first")
                self._gc._browser.get(self._urls["tile"])
            logging.debug(".. getting UTFGrid")
            res = self._gc._browser.get(self._urls["grid"])
            if res.status_code == 204:
                if get_png_first:
                    logging.debug("There is really no content! Returning 0 caches.")
                    return
                logging.debug("Cannot load UTFgrid: no content. "
                              "Trying to load .png tile first")
                new_caches = self.download(get_png_first=True)
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load UTFgrid.") from e
        if res.status_code == 200:
            try:
                json_grid = res.json()
            except ValueError as e:
                # This happened during testing, don't know why.
                if get_png_first:
                    raise Error("Cannot load UTFgrid.") from e
                else:
                    logging.debug("JSON parsing failed, trying .png first")
                    return self.download(get_png_first=True)
            new_caches = self._parse_utfgrid(json_grid)
        for c in new_caches:
            self.geocaches.append(c)
            yield c
示例#3
0
    def _request(self, url, *, expect="soup", method="GET", login_check=True, **kwargs):
        """
        Do a HTTP request and return a response based on expect param.

        :param str url: Request target.
        :param str method: HTTP method to use.
        :param str expect: Expected type of data (either :code:`soup`, :code:`json` or :code:`raw`).
        :param bool login_check: Whether to check if user is logged in or not.
        :param kwargs: Passed to `requests.request
            <http://docs.python-requests.org/en/latest/api/#requests.request>`_ as is.
        """
        # check login unless explicitly turned off
        if login_check and not self._logged_in:
            raise NotLoggedInException("Login is needed.")

        url = url if "//" in url else urljoin(self._baseurl, url)

        try:
            res = self._session.request(method, url, **kwargs)
            res.raise_for_status()

            # return bs4.BeautifulSoup, JSON dict or raw requests.Response
            if expect == "soup":
                return bs4.BeautifulSoup(res.text, "html.parser")
            elif expect == "json":
                return res.json()
            elif expect == "raw":
                return res

        except requests.exceptions.RequestException as e:
            raise Error("Cannot load page: {}".format(url)) from e
示例#4
0
    def _search_get_page(self, point, page_num):
        """Returns one page of caches as a list.

        Searches for a caches around a point and returns N-th page (specifiend by page argument)."""

        assert isinstance(point, Point)
        assert type(page_num) is int

        logging.info("Fetching page %d.", page_num)

        # assemble request
        params = urlencode({"lat": point.latitude, "lng": point.longitude})
        url = self._urls["caches_nearest"] + "?" + params

        # we have to add POST for other pages than 1st
        if page_num == 1:
            post = None
        else:
            # TODO handle searching on second page without first
            post = self._pagging_helpers
            post["__EVENTTARGET"] = self._pagging_postbacks[page_num]
            post["__EVENTARGUMENT"] = ""

        # make request
        try:
            root = self._browser.post(url, post).soup
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load search page #{}.".format(page_num)) from e

        # root of a few following elements
        widget_general = root.find_all("td", "PageBuilderWidget")

        # parse pagging widget
        caches_total, page_num, page_count = [
            int(elm.text) for elm in widget_general[0].find_all("b")
        ]
        logging.debug("Found %d results. Showing page %d of %d.", caches_total,
                      page_num, page_count)

        # save search postbacks for future usage
        if page_num == 1:
            pagging_links = [
                _ for _ in widget_general[1].find_all("a") if _.get("id")
            ]
            self._pagging_postbacks = {
                int(link.text): link.get("href").split("'")[1]
                for link in pagging_links
            }

            # other nescessary fields
            self._pagging_helpers = {
                field["name"]: field["value"]
                for field in root.find_all("input", type="hidden")
            }

        # parse results table
        data = root.find("table", "SearchResultsTable").find_all("tr", "Data")
        return [self._search_parse_cache(c) for c in data]
示例#5
0
    def _download_utfgrid(self, *, get_png=False):
        """Load UTFGrid tile from geocaching.com.

        It appears to be mandatory to first download map tile (.png file) and only then UTFGrid.
        However, this is not enforced all the time. There is probably some time limit from previous
        loading of the same tile and also a general traffic regulator involved. Try first to
        download grid and if it does not work, get .png and then try it again.

        :param bool get_png: Whether to download .png first.
        :return: JSON with raw tile data.
        :rtype: :class:`dict`
        """
        # TODO: It might be useful to store time when tile is last downloaded and act based on that.
        # Logging some statistics (time when tile is loaded + received status code + content length
        # + time spent on request) might help in algorithm design and evaluating if additional
        # traffic from .png loading is tolerable and if this should be done all the time.
        # Requesting for UTFgrid and waiting for 204 response takes also its time.

        logging.debug("Downloading UTFGrid for {}".format(self))

        params = {"x": self.x, "y": self.y, "z": self.z}

        if get_png:
            logging.debug("Getting .png file")
            self.geocaching._request(self._urls["tile"],
                                     params=params,
                                     expect="raw")

        logging.debug("Getting UTFGrid")
        res = self.geocaching._request(self._urls["grid"],
                                       params=params,
                                       expect="raw")

        if res.status_code == 204:
            if get_png:
                logging.debug(
                    "There is really no content! Returning 0 caches.")
                return
            logging.debug(
                "Cannot load UTFgrid: no content. Trying to load .png tile first"
            )
            return self._download_utfgrid(get_png=True)

        if res.status_code == 200:
            try:
                return res.json()
            except ValueError as e:
                # this happened during testing, don't know why
                if get_png:
                    raise Error("Cannot load UTFgrid.") from e
                else:
                    logging.debug("JSON parsing failed, trying .png first")
                    return self._download_utfgrid(get_png=True)
示例#6
0
    def geocode(self, query):
        """Tries to fetch coordinates for given query."""

        assert type(query) is str

        url = self._urls["geocode"] + "?q=" + query
        try:
            res = self._browser.get(url).json()
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load geocode page.") from e

        if res["status"] != "success":
            raise GeocodeError(res["msg"])

        return Point(float(res["data"]["lat"]), float(res["data"]["lng"]))
示例#7
0
    def _get_middle_point(self):
        """Get middle point from list of x, y coordinates

        The points form a rectangular matrix, whose maximum size is
        self.size ** 2, but it can be smaller if the matrix is at the
        edge of UTFGrid.  Investigate block and return x, y coordinates
        of uncut square block middle point."""

        check_status = self._check_block()
        if check_status == 0:
            raise Error("Something went wrong with geocache coordinate "
                        "parsing from UTFGrid.  Either the JSON parsing "
                        "failed or Groundspeak has changed something.")
        elif check_status == 1:
            return [sum(i) / 2 for i in [self._xlim, self._ylim]]
        else:
            return [sum(self._find_limits(axis)) / 2 for axis in ["x", "y"]]
示例#8
0
    def load_trackable_by_url(self, url, destination=None):
        try:
            root = self._browser.get(url).soup
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load cache details page.") from e
        title_tuple = re.split("[\(\)-]", root.title.string)
        tid = title_tuple[1]
        trackable_type = title_tuple[2]

        name = ''
        for n in title_tuple[3:]:
            name += n + '-'
        name = name.rstrip('-')

        owner_raw = root.findAll(
            "a", {"id": "ctl00_ContentBody_BugDetails_BugOwner"})
        #return owner_raw
        owner = re.split("[\<\>]", str(owner_raw))[2]

        location_raw = root.findAll(
            "a", {"id": "ctl00_ContentBody_BugDetails_BugLocation"})
        #return owner_raw
        location_url = location_raw[0].get('href')
        if 'cache_details' in location_url:
            location = self.load_cache_by_url(location_url).location
        else:
            location = re.split("[\<\>]", str(location_raw))[2]

        description_raw = root.findAll("div", {"id": "TrackableDetails"})
        description = description_raw[0].text

        goal_raw = root.findAll("div", {"id": "TrackableGoal"})
        goal = goal_raw[0].text

        # create trackable object
        t = destination or Trackable(tid, self)
        assert isinstance(t, Trackable)
        t.tid = tid
        t.name = name
        t.owner = owner
        t.location = location
        t.type = trackable_type
        t.description = description
        t.goal = goal
        return t
示例#9
0
    def load_cache_quick(self, wp, destination=None):
        """Loads details from map server.

        Loads just basic cache details, but very quickly."""

        assert type(wp) is str and wp.startswith("GC")
        logging.info("Loading quick details about %s...", wp)

        # assemble request
        params = urlencode({"i": wp})
        url = self._urls["map"] + "?" + params

        try:
            res = self._browser.get(url).json()
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load quick cache details page.") from e

        if res["status"] == "failed" or len(res["data"]) != 1:
            raise LoadError("Waypoint '{}' cannot be loaded: {}".format(
                wp, res["msg"]))

        data = res["data"][0]

        # create cache object
        c = destination or Cache(wp, self)
        assert isinstance(c, Cache)

        # prettify data
        c.name = data["name"]
        c.cache_type = data["type"]["text"]
        c.state = data["available"]
        c.size = data["container"]["text"]
        c.difficulty = data["difficulty"]["text"]
        c.terrain = data["terrain"]["text"]
        c.hidden = Util.parse_date(data["hidden"])
        c.author = data["owner"]["text"]
        c.favorites = int(data["fp"])
        c.pm_only = data["subrOnly"]

        logging.debug("Cache loaded: %r", c)
        return c
示例#10
0
    def load_trackable_list(self, url):
        try:
            root = self._browser.get(url).soup
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load cache details page.") from e

        trackable_table = root.find_all("table")[1]
        urls_raw = trackable_table.find_all("a")
        # filter out all urls for trackables
        urls = [
            url.get("href") for url in urls_raw if "track" in url.get("href")
        ]
        # find the names matching the trackble urls
        names = [
            re.split("[\<\>]", str(url))[2] for url in urls_raw
            if "track" in url.get("href")
        ]
        # create trackables and build list to return
        trackables = []
        for n, u in zip(names, urls):
            trackables.append(Trackable(None, self, name=n, trackable_page=u))
        return trackables
示例#11
0
    def login(self, username, password):
        """Logs the user in.

        Downloads the relevant cookies to keep the user logged in."""

        logging.info("Logging in...")

        try:
            login_page = self._browser.get(self._urls["login_page"])
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load login page.") from e

        logging.debug("Checking for previous login.")
        logged = self.get_logged_user(login_page)
        if logged:
            if logged == username:
                logging.info("Already logged as %s.", logged)
                self._logged_in = True
                return
            else:
                logging.info("Already logged as %s, but want to log in as %s.",
                             logged, username)
                self.logout()

        # continue logging in
        post = {}
        logging.debug("Assembling POST data.")

        # login fields
        login_elements = login_page.soup.find_all(
            "input", type=["text", "password", "checkbox"])
        post.update({
            field["name"]: val
            for field, val in zip(login_elements, [username, password, 1])
        })

        # other nescessary fields
        other_elements = login_page.soup.find_all("input",
                                                  type=["hidden", "submit"])
        post.update(
            {field["name"]: field["value"]
             for field in other_elements})

        # login to the site
        logging.debug("Submiting login form.")

        try:
            after_login_page = self._browser.post(self._urls["login_page"],
                                                  post)
        except requests.exceptions.ConnectionError as e:
            raise Error(
                "Cannot load response after submiting login form.") from e

        logging.debug("Checking the result.")
        if self.get_logged_user(after_login_page):
            logging.info("Logged in successfully as %s.", username)
            self._logged_in = True
            return
        else:
            self.logout()
            raise LoginFailedException(
                "Cannot login to the site (probably wrong username or password)."
            )
示例#12
0
    def load_cache(self, wp, destination=None):
        """Loads details from cache page.

        Loads all cache details and return fully populated cache object."""

        assert type(wp) is str and wp.startswith("GC")
        logging.info("Loading details about %s...", wp)

        # assemble request
        params = urlencode({"wp": wp})
        url = self._urls["cache_details"] + "?" + params

        try:
            root = self._browser.get(url).soup
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load cache details page.") from e

        cache_details = root.find(id="cacheDetails")

        # check for PM only caches if using free account
        if cache_details is None:
            if root.select(".PMOWarning") is not None:
                raise PMOnlyException("Premium Members only.")

        # parse raw data
        name = cache_details.find("h2")
        cache_type = cache_details.find("img").get("alt")
        author = cache_details("a")[1]
        hidden = cache_details.find("div",
                                    "minorCacheDetails").find_all("div")[1]
        location = root.find(id="uxLatLon")
        state = root.find("ul", "OldWarning")
        found = root.find("div", "FoundStatus")
        D_T = root.find("div", "CacheStarLabels").find_all("img")
        size = root.find("div", "CacheSize").find("img")
        attributes_raw = root.find_all(
            "div", "CacheDetailNavigationWidget")[0].find_all("img")
        user_content = root.find_all("div", "UserSuppliedContent")
        hint = root.find(id="div_hint")
        favorites = root.find("span", "favorite-value")

        # create cache object
        c = destination or Cache(wp, self)
        assert isinstance(c, Cache)

        # prettify data
        c.name = name.text
        c.cache_type = cache_type
        c.author = author.text
        c.hidden = Util.parse_date(hidden.text.split()[2])
        c.location = Point.from_string(location.text)
        c.state = state is None
        c.found = found and "Found It!" in found.text or False
        c.difficulty, c.terrain = [float(_.get("alt").split()[0]) for _ in D_T]
        c.size = " ".join(size.get("alt").split()[1:])
        attributes_raw = [
            _.get("src").split('/')[-1].rsplit("-", 1) for _ in attributes_raw
        ]
        c.attributes = {
            attribute_name: appendix.startswith("yes")
            for attribute_name, appendix in attributes_raw
            if not appendix.startswith("blank")
        }
        c.summary = user_content[0].text
        c.description = str(user_content[1])
        c.hint = Util.rot13(hint.text.strip())
        c.favorites = int(favorites.text)

        logging.debug("Cache loaded: %r", c)
        return c
示例#13
0
    def load_cache_by_url(self, url, destination=None):
        try:
            root = self._browser.get(url).soup
        except requests.exceptions.ConnectionError as e:
            raise Error("Cannot load cache details page.") from e

        cache_details = root.find(id="cacheDetails")

        # check for PM only caches if using free account
        if cache_details is None:
            if root.select(".PMOWarning") is not None:
                raise PMOnlyException("Premium Members only.")

        # parse raw data
        wp = root.title.string.split(' ')[0]

        name = cache_details.find("h2")
        cache_type = cache_details.find("img").get("src")
        author = cache_details("a")[1]
        hidden = cache_details.find("div",
                                    "minorCacheDetails").find_all("div")[1]
        location = root.find(id="uxLatLon")
        state = root.find("ul", "OldWarning")
        found = root.find("div", "FoundStatus")
        D_T = root.find("div", "CacheStarLabels").find_all("img")
        size = root.find("div", "CacheSize").find("img")
        attributes_raw = root.find_all(
            "div", "CacheDetailNavigationWidget")[0].find_all("img")
        user_content = root.find_all("div", "UserSuppliedContent")
        hint = root.find(id="div_hint")
        favorites = root.find("span", "favorite-value")

        # check for trackables
        inventory_raw = root.find_all("div", "CacheDetailNavigationWidget")
        inventory_links = inventory_raw[1].find_all("a")
        if len(inventory_links) >= 3:
            trackable_page = self._urls['trackable_base'] + inventory_links[
                -3].get("href")
        else:
            trackable_page = None

        # create cache object
        c = destination or Cache(wp, self)
        assert isinstance(c, Cache)

        # prettify data
        c.name = name.text
        c.cache_type = Cache.get_cache_type_by_img(cache_type)
        c.author = author.text
        c.hidden = Util.parse_date(hidden.text.split(":")[-1])
        c.location = Point.from_string(location.text)
        c.state = state is None
        c.found = found and "Found It!" in found.text or False
        c.difficulty, c.terrain = [float(_.get("alt").split()[0]) for _ in D_T]
        c.size = size.get("src").split("/")[-1].rsplit(
            ".", 1)[0]  # filename of img[src]
        attributes_raw = [
            _.get("src").split('/')[-1].rsplit("-", 1) for _ in attributes_raw
        ]
        c.attributes = {
            attribute_name: appendix.startswith("yes")
            for attribute_name, appendix in attributes_raw
            if not appendix.startswith("blank")
        }
        c.summary = user_content[0].text
        c.description = str(user_content[1])
        c.hint = Util.rot13(hint.text.strip())
        if favorites is None:
            c.favorites = 0
        else:
            c.favorites = int(favorites.text)
        if trackable_page is not None:
            c.trackables = self.load_trackable_list(trackable_page)
        else:
            c.trackables = []
        logging.debug("Cache loaded: %r", c)
        return c