def _get_log_counts_from_print_page(soup): """Return a dictionary of all log counts found in the page representation, based on the print page. :param bs4.BeautifulSoup soup: Parsed html document of the cache print page. """ # The print page does not use any IDs, so we use some more complicated approach here and # search for the paragraph containing the log type images. p_meta = soup.find_all("p", "Meta") element = None for entry in p_meta: if "images/logtypes" in str(entry): element = entry break if not element: raise errors.ValueError("Log counts could not be found.") # Text gives numbers and verbose descriptions of the current values as well as an # introductory text. So we have to perform number checks for each element and only keep # the numbers. # The values might contain thousand separators, which we have to remove before converting # them to real numbers. words = element.text.split() values = [] for word in words: word = word.replace(",", "").replace(".", "") if word and word.isdigit(): values.append(int(word)) # Retrieve the list of image sources. images = element.find_all("img") types = [] for image in images: type = image["src"] # "../images/logtypes/2.png" type = type.split("/")[-1].split(".")[0] # "2" type = LogType.from_filename(type) types.append(type) # Prevent possible wrong assignments when the list sizes differ for some unknown reason. if not len(values) == len(types): raise errors.ValueError( "Different list sizes getting log counts: {} types and {} counts." .format(len(types), len(values))) # Finally create the mapping. log_counts = dict(zip(types, values)) return log_counts
def test_from_filename(self): with self.subTest("valid types"): self.assertEqual(Type.found_it, Type.from_filename("2")) self.assertEqual(Type.visit, Type.from_filename("75")) with self.subTest("special valid types"): self.assertEqual(Type.visit, Type.from_filename("1001")) self.assertEqual(Type.publish_listing, Type.from_filename("1003")) with self.subTest("invalid type"): with self.assertRaises(PycachingValueError): Type.from_filename("6666")
def load_logbook(self, limit=float("inf")): """Return a generator of logs for this cache. Yield instances of :class:`.Log` filled with log data. :param int limit: Maximum number of logs to generate. """ logging.info("Loading logbook for {}...".format(self)) page = 0 per_page = min(limit, 100) # max number to fetch in one request is 100 items while True: # get one page logbook_page = self._logbook_get_page(page, per_page) page += 1 if not logbook_page: # result is empty - no more logs return for log_data in logbook_page: limit -= 1 # handle limit if limit < 0: return img_filename = log_data["LogTypeImage"].rsplit( ".", 1)[0] # filename w/o extension # create and fill log object l = Log() l.type = LogType.from_filename(img_filename) l.text = log_data["LogText"] l.visited = log_data["Visited"] l.author = log_data["UserName"] yield l
def _get_log_counts_from_cache_details(soup): """Return a dictionary of all log counts found in the page representation, based on the cache details page. :param bs4.BeautifulSoup soup: Parsed html document of the cache details page. """ lbl_find_counts = soup.find("span", {"id": "ctl00_ContentBody_lblFindCounts"}) log_totals = lbl_find_counts.find("p", "LogTotals") # Text gives numbers separated by a lot of spaces, splitting retrieves the numbers. # The values might contain thousand separators, which we have to remove before converting # them to real numbers. values = log_totals.text.split() values = [ int(value.replace(",", "").replace(".", "")) for value in values ] # Retrieve the list of image sources. images = log_totals.find_all("img") types = [] for image in images: type = image["src"] # "../images/logtypes/2.png" type = type.split("/")[-1].split(".")[0] # "2" type = LogType.from_filename(type) types.append(type) # Prevent possible wrong assignments when the list sizes differ for some unknown reason. if not len(values) == len(types): raise errors.ValueError( "Different list sizes getting log counts: {} types and {} counts." .format(len(types), len(values))) # Finally create the mapping. log_counts = dict(zip(types, values)) return log_counts
def load_logbook(self, limit=float("inf")): """Return a generator of logs for this cache. Yield instances of :class:`.Log` filled with log data. :param int limit: Maximum number of logs to generate. """ logging.info("Loading logbook for {}...".format(self)) page = 0 per_page = min(limit, 100) # max number to fetch in one request is 100 items while True: # get one page logbook_page = self._logbook_get_page(page, per_page) page += 1 if not logbook_page: # result is empty - no more logs return for log_data in logbook_page: limit -= 1 # handle limit if limit < 0: return img_filename = log_data["LogTypeImage"].rsplit(".", 1)[0] # filename w/o extension # create and fill log object l = Log() l.type = LogType.from_filename(img_filename) l.text = log_data["LogText"] l.visited = log_data["Visited"] l.author = log_data["UserName"] yield l
def load(self): """Load all possible cache details. Use full cache details page. Therefore all possible properties are filled in, but the loading is a bit slow. If you want to load basic details about a PM only cache, the :class:`.PMOnlyException` is still thrown, but avaliable details are filled in. If you know, that the cache you are loading is PM only, please consider using :meth:`load_quick` as it will load the same details, but quicker. .. note:: This method is called automatically when you access a property which isn't yet filled in (so-called "lazy loading"). You don't have to call it explicitly. :raise .PMOnlyException: If cache is PM only and current user is basic member. :raise .LoadError: If cache loading fails (probably because of not existing cache). """ try: # pick url based on what info we have right now if hasattr(self, "url"): root = self.geocaching._request(self.url) elif hasattr(self, "_wp"): root = self.geocaching._request(self._urls["cache_details"], params={"wp": self._wp}) else: raise errors.LoadError("Cache lacks info for loading") except errors.Error as e: # probably 404 during cache loading - cache not exists raise errors.LoadError("Error in loading cache") from e # check for PM only caches if using free account self.pm_only = root.find("section", "pmo-banner") is not None cache_details = root.find( id="ctl00_divContentMain") if self.pm_only else root.find( id="cacheDetails") # details also avaliable for basic members for PM only caches ----------------------------- if self.pm_only: self.wp = cache_details.find("li", "li__gccode").text.strip() self.name = cache_details.find("h1").text.strip() type = cache_details.find("img").get( "src") # "/play/Content/images/cache-types/3.png" type = type.split("/")[-1].rsplit(".", 1)[0] # "3" self.type = Type.from_filename(type) author = cache_details.find(id="ctl00_ContentBody_uxCacheBy").text self.author = author[len("A cache by "):] # parse cache detail list into a python list details = cache_details.find("ul", "ul__hide-details").text.split("\n") self.difficulty = float(details[2]) self.terrain = float(details[5]) self.size = Size.from_string(details[8]) self.favorites = int(details[11]) else: # parse from <title> - get first word try: self.wp = root.title.string.split(" ")[0] except: raise errors.LoadError() self.name = cache_details.find("h2").text type = cache_details.select_one("svg.cache-icon use").get( "xlink:href") # "cache-types.svg#icon-3-disabled" type = type.split("#")[-1].replace("_", "-").split("-")[1] # "3" self.type = Type.from_filename(type) self.author = cache_details("a")[1].text D_and_T_img = root.find("div", "CacheStarLabels").find_all("img") self.difficulty, self.terrain = [ float(img.get("alt").split()[0]) for img in D_and_T_img ] size = root.find("div", "CacheSize") size = size.find("img").get("src") # size img src size = size.split("/")[-1].rsplit(".", 1)[0] # filename w/o extension self.size = Size.from_filename(size) if self.pm_only: raise errors.PMOnlyException() # details not avaliable for basic members for PM only caches ------------------------------ pm_only_warning = root.find("p", "Warning NoBottomSpacing") self.pm_only = pm_only_warning and ("Premium Member Only" in pm_only_warning.text) or False attributes_widget, inventory_widget, *_ = root.find_all( "div", "CacheDetailNavigationWidget") hidden = cache_details.find( "div", "minorCacheDetails").find_all("div")[1].text self.hidden = parse_date(hidden.split(":")[-1]) self.location = Point.from_string(root.find(id="uxLatLon").text) self.state = root.find("ul", "OldWarning") is None log_image = root.find(id="ctl00_ContentBody_GeoNav_logTypeImage") if log_image: log_image_filename = log_image.get("src").split("/")[-1].rsplit( ".", 1)[0] # filename w/o extension self._found_status = Log( type=LogType.from_filename(log_image_filename)) else: self._found_status = None attributes_raw = attributes_widget.find_all("img") attributes_raw = [ _.get("src").split("/")[-1].rsplit("-", 1) for _ in attributes_raw ] self.attributes = { attribute_name: appendix.startswith("yes") for attribute_name, appendix in attributes_raw if not appendix.startswith("blank") } self.summary = root.find(id="ctl00_ContentBody_ShortDescription").text self.description = root.find( id="ctl00_ContentBody_LongDescription").text self.hint = rot13(root.find(id="div_hint").text.strip()) favorites = root.find("span", "favorite-value") if favorites: self.favorites = int(favorites.text) else: self.favorites = 0 js_content = "\n".join(map(lambda i: i.text, root.find_all("script"))) self._logbook_token = re.findall("userToken\\s*=\\s*'([^']+)'", js_content)[0] # find original location if any if "oldLatLng\":" in js_content: old_lat_long = js_content.split("oldLatLng\":")[1].split( ']')[0].split('[')[1] self.original_location = Point(old_lat_long) else: self.original_location = None # if there are some trackables if len(inventory_widget.find_all("a")) >= 3: trackable_page_url = inventory_widget.find( id="ctl00_ContentBody_uxTravelBugList_uxViewAllTrackableItems") self._trackable_page_url = trackable_page_url.get("href")[ 3:] # has "../" on start else: self._trackable_page_url = None # Additional Waypoints self.waypoints = Waypoint.from_html(root, "ctl00_ContentBody_Waypoints") logging.debug("Cache loaded: {}".format(self))
def load(self): """Load all possible cache details. Use full cache details page. Therefore all possible properties are filled in, but the loading is a bit slow. If you want to load basic details about a PM only cache, the :class:`.PMOnlyException` is still thrown, but avaliable details are filled in. If you know, that the cache you are loading is PM only, please consider using :meth:`load_quick` as it will load the same details, but quicker. .. note:: This method is called automatically when you access a property which isn't yet filled in (so-called "lazy loading"). You don't have to call it explicitly. :raise .PMOnlyException: If cache is PM only and current user is basic member. :raise .LoadError: If cache loading fails (probably because of not existing cache). """ try: # pick url based on what info we have right now if hasattr(self, "url"): root = self.geocaching._request(self.url) elif hasattr(self, "_wp"): root = self.geocaching._request(self._urls["cache_details"], params={"wp": self._wp}) else: raise errors.LoadError("Cache lacks info for loading") except errors.Error as e: # probably 404 during cache loading - cache not exists raise errors.LoadError("Error in loading cache") from e # check for PM only caches if using free account self.pm_only = root.find("section", "pmo-banner") is not None cache_details = root.find(id="ctl00_divContentMain") if self.pm_only else root.find(id="cacheDetails") # details also avaliable for basic members for PM only caches ----------------------------- if self.pm_only: self.wp = cache_details.find("li", "li__gccode").text.strip() self.name = cache_details.find("h1").text.strip() type = cache_details.find("img").get("src") # "/play/Content/images/cache-types/3.png" type = type.split("/")[-1].rsplit(".", 1)[0] # "3" self.type = Type.from_filename(type) author = cache_details.find(id="ctl00_ContentBody_uxCacheBy").text self.author = author[len("A cache by "):] # parse cache detail list into a python list details = cache_details.find("ul", "ul__hide-details").text.split("\n") self.difficulty = float(details[2]) self.terrain = float(details[5]) self.size = Size.from_string(details[8]) self.favorites = int(details[11]) else: # parse from <title> - get first word try: self.wp = root.title.string.split(" ")[0] except: raise errors.LoadError() self.name = cache_details.find("h2").text type = cache_details.select_one("svg.cache-icon use").get("xlink:href") # "cache-types.svg#icon-3-disabled" type = type.split("#")[-1].replace("_", "-").split("-")[1] # "3" self.type = Type.from_filename(type) self.author = cache_details("a")[1].text D_and_T_img = root.find("div", "CacheStarLabels").find_all("img") self.difficulty, self.terrain = [float(img.get("alt").split()[0]) for img in D_and_T_img] size = root.find("div", "CacheSize") size = size.find("img").get("src") # size img src size = size.split("/")[-1].rsplit(".", 1)[0] # filename w/o extension self.size = Size.from_filename(size) if self.pm_only: raise errors.PMOnlyException() # details not avaliable for basic members for PM only caches ------------------------------ pm_only_warning = root.find("p", "Warning NoBottomSpacing") self.pm_only = pm_only_warning and ("Premium Member Only" in pm_only_warning.text) or False attributes_widget, inventory_widget, *_ = root.find_all("div", "CacheDetailNavigationWidget") hidden = cache_details.find("div", "minorCacheDetails").find_all("div")[1].text self.hidden = parse_date(hidden.split(":")[-1]) self.location = Point.from_string(root.find(id="uxLatLon").text) self.state = root.find("ul", "OldWarning") is None log_image = root.find(id="ctl00_ContentBody_GeoNav_logTypeImage") if log_image: log_image_filename = log_image.get("src").split("/")[-1].rsplit(".", 1)[0] # filename w/o extension self._found_status = Log(type=LogType.from_filename(log_image_filename)) else: self._found_status = None attributes_raw = attributes_widget.find_all("img") attributes_raw = [_.get("src").split("/")[-1].rsplit("-", 1) for _ in attributes_raw] self.attributes = {attribute_name: appendix.startswith("yes") for attribute_name, appendix in attributes_raw if not appendix.startswith("blank")} self.summary = root.find(id="ctl00_ContentBody_ShortDescription").text self.description = root.find(id="ctl00_ContentBody_LongDescription").text self.hint = rot13(root.find(id="div_hint").text.strip()) favorites = root.find("span", "favorite-value") if favorites: self.favorites = int(favorites.text) else: self.favorites = 0 js_content = "\n".join(map(lambda i: i.text, root.find_all("script"))) self._logbook_token = re.findall("userToken\\s*=\\s*'([^']+)'", js_content)[0] # find original location if any if "oldLatLng\":" in js_content: old_lat_long = js_content.split("oldLatLng\":")[1].split(']')[0].split('[')[1] self.original_location = Point(old_lat_long) else: self.original_location = None # if there are some trackables if len(inventory_widget.find_all("a")) >= 3: trackable_page_url = inventory_widget.find(id="ctl00_ContentBody_uxTravelBugList_uxViewAllTrackableItems") self._trackable_page_url = trackable_page_url.get("href")[3:] # has "../" on start else: self._trackable_page_url = None # Additional Waypoints self.waypoints = Waypoint.from_html(root, "ctl00_ContentBody_Waypoints") logging.debug("Cache loaded: {}".format(self))