示例#1
0
    def _get_log_counts_from_print_page(soup):
        """Return a dictionary of all log counts found in the page
        representation, based on the print page.

        :param bs4.BeautifulSoup soup: Parsed html document of the cache print page.
        """
        # The print page does not use any IDs, so we use some more complicated approach here and
        # search for the paragraph containing the log type images.
        p_meta = soup.find_all("p", "Meta")
        element = None
        for entry in p_meta:
            if "images/logtypes" in str(entry):
                element = entry
                break

        if not element:
            raise errors.ValueError("Log counts could not be found.")

        # Text gives numbers and verbose descriptions of the current values as well as an
        # introductory text. So we have to perform number checks for each element and only keep
        # the numbers.
        # The values might contain thousand separators, which we have to remove before converting
        # them to real numbers.
        words = element.text.split()
        values = []
        for word in words:
            word = word.replace(",", "").replace(".", "")
            if word and word.isdigit():
                values.append(int(word))

        # Retrieve the list of image sources.
        images = element.find_all("img")
        types = []
        for image in images:
            type = image["src"]  # "../images/logtypes/2.png"
            type = type.split("/")[-1].split(".")[0]  # "2"
            type = LogType.from_filename(type)
            types.append(type)

        # Prevent possible wrong assignments when the list sizes differ for some unknown reason.
        if not len(values) == len(types):
            raise errors.ValueError(
                "Different list sizes getting log counts: {} types and {} counts."
                .format(len(types), len(values)))

        # Finally create the mapping.
        log_counts = dict(zip(types, values))

        return log_counts
示例#2
0
    def test_from_filename(self):
        with self.subTest("valid types"):
            self.assertEqual(Type.found_it, Type.from_filename("2"))
            self.assertEqual(Type.visit, Type.from_filename("75"))

        with self.subTest("special valid types"):
            self.assertEqual(Type.visit, Type.from_filename("1001"))
            self.assertEqual(Type.publish_listing, Type.from_filename("1003"))

        with self.subTest("invalid type"):
            with self.assertRaises(PycachingValueError):
                Type.from_filename("6666")
示例#3
0
    def test_from_filename(self):
        with self.subTest("valid types"):
            self.assertEqual(Type.found_it, Type.from_filename("2"))
            self.assertEqual(Type.visit, Type.from_filename("75"))

        with self.subTest("special valid types"):
            self.assertEqual(Type.visit, Type.from_filename("1001"))
            self.assertEqual(Type.publish_listing, Type.from_filename("1003"))

        with self.subTest("invalid type"):
            with self.assertRaises(PycachingValueError):
                Type.from_filename("6666")
示例#4
0
    def load_logbook(self, limit=float("inf")):
        """Return a generator of logs for this cache.

        Yield instances of :class:`.Log` filled with log data.

        :param int limit: Maximum number of logs to generate.
        """
        logging.info("Loading logbook for {}...".format(self))

        page = 0
        per_page = min(limit,
                       100)  # max number to fetch in one request is 100 items

        while True:
            # get one page
            logbook_page = self._logbook_get_page(page, per_page)
            page += 1

            if not logbook_page:
                # result is empty - no more logs
                return

            for log_data in logbook_page:

                limit -= 1  # handle limit
                if limit < 0:
                    return

                img_filename = log_data["LogTypeImage"].rsplit(
                    ".", 1)[0]  # filename w/o extension

                # create and fill log object
                l = Log()
                l.type = LogType.from_filename(img_filename)
                l.text = log_data["LogText"]
                l.visited = log_data["Visited"]
                l.author = log_data["UserName"]
                yield l
示例#5
0
    def _get_log_counts_from_cache_details(soup):
        """Return a dictionary of all log counts found in the page
        representation, based on the cache details page.

        :param bs4.BeautifulSoup soup: Parsed html document of the cache details page.
        """
        lbl_find_counts = soup.find("span",
                                    {"id": "ctl00_ContentBody_lblFindCounts"})
        log_totals = lbl_find_counts.find("p", "LogTotals")

        # Text gives numbers separated by a lot of spaces, splitting retrieves the numbers.
        # The values might contain thousand separators, which we have to remove before converting
        # them to real numbers.
        values = log_totals.text.split()
        values = [
            int(value.replace(",", "").replace(".", "")) for value in values
        ]

        # Retrieve the list of image sources.
        images = log_totals.find_all("img")
        types = []
        for image in images:
            type = image["src"]  # "../images/logtypes/2.png"
            type = type.split("/")[-1].split(".")[0]  # "2"
            type = LogType.from_filename(type)
            types.append(type)

        # Prevent possible wrong assignments when the list sizes differ for some unknown reason.
        if not len(values) == len(types):
            raise errors.ValueError(
                "Different list sizes getting log counts: {} types and {} counts."
                .format(len(types), len(values)))

        # Finally create the mapping.
        log_counts = dict(zip(types, values))

        return log_counts
示例#6
0
    def load_logbook(self, limit=float("inf")):
        """Return a generator of logs for this cache.

        Yield instances of :class:`.Log` filled with log data.

        :param int limit: Maximum number of logs to generate.
        """
        logging.info("Loading logbook for {}...".format(self))

        page = 0
        per_page = min(limit, 100)  # max number to fetch in one request is 100 items

        while True:
            # get one page
            logbook_page = self._logbook_get_page(page, per_page)
            page += 1

            if not logbook_page:
                # result is empty - no more logs
                return

            for log_data in logbook_page:

                limit -= 1  # handle limit
                if limit < 0:
                    return

                img_filename = log_data["LogTypeImage"].rsplit(".", 1)[0]  # filename w/o extension

                # create and fill log object
                l = Log()
                l.type = LogType.from_filename(img_filename)
                l.text = log_data["LogText"]
                l.visited = log_data["Visited"]
                l.author = log_data["UserName"]
                yield l
示例#7
0
    def load(self):
        """Load all possible cache details.

        Use full cache details page. Therefore all possible properties are filled in, but the
        loading is a bit slow.

        If you want to load basic details about a PM only cache, the :class:`.PMOnlyException` is
        still thrown, but avaliable details are filled in. If you know, that the cache you are
        loading is PM only, please consider using :meth:`load_quick` as it will load the same
        details, but quicker.

        .. note::
           This method is called automatically when you access a property which isn't yet filled in
           (so-called "lazy loading"). You don't have to call it explicitly.

        :raise .PMOnlyException: If cache is PM only and current user is basic member.
        :raise .LoadError: If cache loading fails (probably because of not existing cache).
        """
        try:
            # pick url based on what info we have right now
            if hasattr(self, "url"):
                root = self.geocaching._request(self.url)
            elif hasattr(self, "_wp"):
                root = self.geocaching._request(self._urls["cache_details"],
                                                params={"wp": self._wp})
            else:
                raise errors.LoadError("Cache lacks info for loading")
        except errors.Error as e:
            # probably 404 during cache loading - cache not exists
            raise errors.LoadError("Error in loading cache") from e

        # check for PM only caches if using free account
        self.pm_only = root.find("section", "pmo-banner") is not None

        cache_details = root.find(
            id="ctl00_divContentMain") if self.pm_only else root.find(
                id="cacheDetails")

        # details also avaliable for basic members for PM only caches -----------------------------

        if self.pm_only:
            self.wp = cache_details.find("li", "li__gccode").text.strip()

            self.name = cache_details.find("h1").text.strip()

            type = cache_details.find("img").get(
                "src")  # "/play/Content/images/cache-types/3.png"
            type = type.split("/")[-1].rsplit(".", 1)[0]  # "3"
            self.type = Type.from_filename(type)

            author = cache_details.find(id="ctl00_ContentBody_uxCacheBy").text
            self.author = author[len("A cache by "):]

            # parse cache detail list into a python list
            details = cache_details.find("ul",
                                         "ul__hide-details").text.split("\n")

            self.difficulty = float(details[2])

            self.terrain = float(details[5])

            self.size = Size.from_string(details[8])

            self.favorites = int(details[11])
        else:
            # parse from <title> - get first word
            try:
                self.wp = root.title.string.split(" ")[0]
            except:
                raise errors.LoadError()
            self.name = cache_details.find("h2").text

            type = cache_details.select_one("svg.cache-icon use").get(
                "xlink:href")  # "cache-types.svg#icon-3-disabled"
            type = type.split("#")[-1].replace("_", "-").split("-")[1]  # "3"
            self.type = Type.from_filename(type)

            self.author = cache_details("a")[1].text

            D_and_T_img = root.find("div", "CacheStarLabels").find_all("img")
            self.difficulty, self.terrain = [
                float(img.get("alt").split()[0]) for img in D_and_T_img
            ]

            size = root.find("div", "CacheSize")
            size = size.find("img").get("src")  # size img src
            size = size.split("/")[-1].rsplit(".",
                                              1)[0]  # filename w/o extension
            self.size = Size.from_filename(size)

        if self.pm_only:
            raise errors.PMOnlyException()

        # details not avaliable for basic members for PM only caches ------------------------------
        pm_only_warning = root.find("p", "Warning NoBottomSpacing")
        self.pm_only = pm_only_warning and ("Premium Member Only"
                                            in pm_only_warning.text) or False

        attributes_widget, inventory_widget, *_ = root.find_all(
            "div", "CacheDetailNavigationWidget")

        hidden = cache_details.find(
            "div", "minorCacheDetails").find_all("div")[1].text
        self.hidden = parse_date(hidden.split(":")[-1])

        self.location = Point.from_string(root.find(id="uxLatLon").text)

        self.state = root.find("ul", "OldWarning") is None

        log_image = root.find(id="ctl00_ContentBody_GeoNav_logTypeImage")
        if log_image:
            log_image_filename = log_image.get("src").split("/")[-1].rsplit(
                ".", 1)[0]  # filename w/o extension
            self._found_status = Log(
                type=LogType.from_filename(log_image_filename))
        else:
            self._found_status = None

        attributes_raw = attributes_widget.find_all("img")
        attributes_raw = [
            _.get("src").split("/")[-1].rsplit("-", 1) for _ in attributes_raw
        ]

        self.attributes = {
            attribute_name: appendix.startswith("yes")
            for attribute_name, appendix in attributes_raw
            if not appendix.startswith("blank")
        }

        self.summary = root.find(id="ctl00_ContentBody_ShortDescription").text
        self.description = root.find(
            id="ctl00_ContentBody_LongDescription").text

        self.hint = rot13(root.find(id="div_hint").text.strip())

        favorites = root.find("span", "favorite-value")
        if favorites:
            self.favorites = int(favorites.text)
        else:
            self.favorites = 0

        js_content = "\n".join(map(lambda i: i.text, root.find_all("script")))
        self._logbook_token = re.findall("userToken\\s*=\\s*'([^']+)'",
                                         js_content)[0]
        # find original location if any
        if "oldLatLng\":" in js_content:
            old_lat_long = js_content.split("oldLatLng\":")[1].split(
                ']')[0].split('[')[1]
            self.original_location = Point(old_lat_long)
        else:
            self.original_location = None

        # if there are some trackables
        if len(inventory_widget.find_all("a")) >= 3:
            trackable_page_url = inventory_widget.find(
                id="ctl00_ContentBody_uxTravelBugList_uxViewAllTrackableItems")
            self._trackable_page_url = trackable_page_url.get("href")[
                3:]  # has "../" on start
        else:
            self._trackable_page_url = None

        # Additional Waypoints
        self.waypoints = Waypoint.from_html(root,
                                            "ctl00_ContentBody_Waypoints")

        logging.debug("Cache loaded: {}".format(self))
示例#8
0
    def load(self):
        """Load all possible cache details.

        Use full cache details page. Therefore all possible properties are filled in, but the
        loading is a bit slow.

        If you want to load basic details about a PM only cache, the :class:`.PMOnlyException` is
        still thrown, but avaliable details are filled in. If you know, that the cache you are
        loading is PM only, please consider using :meth:`load_quick` as it will load the same
        details, but quicker.

        .. note::
           This method is called automatically when you access a property which isn't yet filled in
           (so-called "lazy loading"). You don't have to call it explicitly.

        :raise .PMOnlyException: If cache is PM only and current user is basic member.
        :raise .LoadError: If cache loading fails (probably because of not existing cache).
        """
        try:
            # pick url based on what info we have right now
            if hasattr(self, "url"):
                root = self.geocaching._request(self.url)
            elif hasattr(self, "_wp"):
                root = self.geocaching._request(self._urls["cache_details"],
                                                params={"wp": self._wp})
            else:
                raise errors.LoadError("Cache lacks info for loading")
        except errors.Error as e:
            # probably 404 during cache loading - cache not exists
            raise errors.LoadError("Error in loading cache") from e

        # check for PM only caches if using free account
        self.pm_only = root.find("section", "pmo-banner") is not None

        cache_details = root.find(id="ctl00_divContentMain") if self.pm_only else root.find(id="cacheDetails")

        # details also avaliable for basic members for PM only caches -----------------------------

        if self.pm_only:
            self.wp = cache_details.find("li", "li__gccode").text.strip()

            self.name = cache_details.find("h1").text.strip()

            type = cache_details.find("img").get("src")  # "/play/Content/images/cache-types/3.png"
            type = type.split("/")[-1].rsplit(".", 1)[0]  # "3"
            self.type = Type.from_filename(type)

            author = cache_details.find(id="ctl00_ContentBody_uxCacheBy").text
            self.author = author[len("A cache by "):]

            # parse cache detail list into a python list
            details = cache_details.find("ul", "ul__hide-details").text.split("\n")

            self.difficulty = float(details[2])

            self.terrain = float(details[5])

            self.size = Size.from_string(details[8])

            self.favorites = int(details[11])
        else:
            # parse from <title> - get first word
            try:
                self.wp = root.title.string.split(" ")[0]
            except:
                raise errors.LoadError()
            self.name = cache_details.find("h2").text

            type = cache_details.select_one("svg.cache-icon use").get("xlink:href")  # "cache-types.svg#icon-3-disabled"
            type = type.split("#")[-1].replace("_", "-").split("-")[1]  # "3"
            self.type = Type.from_filename(type)

            self.author = cache_details("a")[1].text

            D_and_T_img = root.find("div", "CacheStarLabels").find_all("img")
            self.difficulty, self.terrain = [float(img.get("alt").split()[0]) for img in D_and_T_img]

            size = root.find("div", "CacheSize")
            size = size.find("img").get("src")  # size img src
            size = size.split("/")[-1].rsplit(".", 1)[0]  # filename w/o extension
            self.size = Size.from_filename(size)

        if self.pm_only:
            raise errors.PMOnlyException()

        # details not avaliable for basic members for PM only caches ------------------------------
        pm_only_warning = root.find("p", "Warning NoBottomSpacing")
        self.pm_only = pm_only_warning and ("Premium Member Only" in pm_only_warning.text) or False

        attributes_widget, inventory_widget, *_ = root.find_all("div", "CacheDetailNavigationWidget")

        hidden = cache_details.find("div", "minorCacheDetails").find_all("div")[1].text
        self.hidden = parse_date(hidden.split(":")[-1])

        self.location = Point.from_string(root.find(id="uxLatLon").text)

        self.state = root.find("ul", "OldWarning") is None

        log_image = root.find(id="ctl00_ContentBody_GeoNav_logTypeImage")
        if log_image:
            log_image_filename = log_image.get("src").split("/")[-1].rsplit(".", 1)[0]  # filename w/o extension
            self._found_status = Log(type=LogType.from_filename(log_image_filename))
        else:
            self._found_status = None

        attributes_raw = attributes_widget.find_all("img")
        attributes_raw = [_.get("src").split("/")[-1].rsplit("-", 1) for _ in attributes_raw]

        self.attributes = {attribute_name: appendix.startswith("yes") for attribute_name, appendix
                           in attributes_raw if not appendix.startswith("blank")}

        self.summary = root.find(id="ctl00_ContentBody_ShortDescription").text
        self.description = root.find(id="ctl00_ContentBody_LongDescription").text

        self.hint = rot13(root.find(id="div_hint").text.strip())

        favorites = root.find("span", "favorite-value")
        if favorites:
            self.favorites = int(favorites.text)
        else:
            self.favorites = 0

        js_content = "\n".join(map(lambda i: i.text, root.find_all("script")))
        self._logbook_token = re.findall("userToken\\s*=\\s*'([^']+)'", js_content)[0]
        # find original location if any
        if "oldLatLng\":" in js_content:
            old_lat_long = js_content.split("oldLatLng\":")[1].split(']')[0].split('[')[1]
            self.original_location = Point(old_lat_long)
        else:
            self.original_location = None

        # if there are some trackables
        if len(inventory_widget.find_all("a")) >= 3:
            trackable_page_url = inventory_widget.find(id="ctl00_ContentBody_uxTravelBugList_uxViewAllTrackableItems")
            self._trackable_page_url = trackable_page_url.get("href")[3:]  # has "../" on start
        else:
            self._trackable_page_url = None

        # Additional Waypoints
        self.waypoints = Waypoint.from_html(root, "ctl00_ContentBody_Waypoints")

        logging.debug("Cache loaded: {}".format(self))