def test_load_url_string(): url1 = "https://httpbin.org/get" data1 = Commons.load_url_string(url1) data1split = data1.split("\n") assert data1split[0] == "{", "First line incorrect." assert data1split[1] == ' "args": {}, ', "String response incorrect." url2 = "https://httpbin.org/headers" headers2 = [["User-Agent", "Example data"]] data2 = Commons.load_url_string(url2, headers2) data2split = data2.split("\n") assert data2split[0] == "{", "First line incorrect." assert data2split[1] == ' "headers": {', "String response incorrect." assert '"User-Agent": "Example data"' in data2, "Headers missing from request."
def update_from_european_bank_data(self, repo): """ Updates the value of conversion currency units using The European Bank data. :type repo: ConvertRepo """ # Get currency ConvertType currency_type = repo.get_type_by_name("currency") # Pull xml data from european bank website url = "https://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml" xml_string = Commons.load_url_string(url) # Parse data doc = minidom.parseString(xml_string) root = doc.getElementsByTagName("gesmes:Envelope")[0] cube_one_elem = root.getElementsByTagName("Cube")[0] cube_two_elem = cube_one_elem.getElementsByTagName("Cube")[0] for cube_three_elem in cube_two_elem.getElementsByTagName("Cube"): # Get currency code from currency Attribute currency_code = cube_three_elem.getAttributeNode( "currency").nodeValue # Get value from rate attribute and get reciprocal. currency_value = 1 / float( cube_three_elem.getAttributeNode("rate").nodeValue) # Get currency unit currency_unit = currency_type.get_unit_by_name(currency_code) # If unrecognised currency, SKIP if currency_unit is None: continue # Set Value currency_unit.update_value(currency_value)
def update_from_forex_data(self, repo): """ Updates the value of conversion currency units using Forex data. :type repo: ConvertRepo """ # Get currency ConvertType currency_type = repo.get_type_by_name("currency") # Pull xml data from forex website url = "https://rates.fxcm.com/RatesXML3" xml_string = Commons.load_url_string(url) # Parse data doc = minidom.parseString(xml_string) rates_elem = doc.getElementsByTagName("Rates")[0] for rate_elem in rates_elem.getElementsByTagName("Rate"): # Get data from element symbol_data = rate_elem.getElementsByTagName( "Symbol")[0].firstChild.data if not symbol_data.startswith("EUR"): continue bid_data = float( rate_elem.getElementsByTagName("Bid")[0].firstChild.data) ask_data = float( rate_elem.getElementsByTagName("Ask")[0].firstChild.data) # Get currency code and value from data currency_code = symbol_data[3:] currency_value = 1 / (0.5 * (bid_data + ask_data)) # Get currency unit currency_unit = currency_type.get_unit_by_name(currency_code) # If unrecognised code, skip if currency_unit is None: continue # Set Value currency_unit.update_value(currency_value)
def get_rss_data(self) -> str: headers = None # Tumblr feeds need "GoogleBot" in the URL, or they'll give a GDPR notice if "tumblr.com" in self.url: headers = [[ "User-Agent", "Hallo IRCBot [email protected] (GoogleBot/4.5.1)" ]] # Actually get the data rss_data = Commons.load_url_string(self.url, headers) # PHDComics doesn't always escape ampersands correctly if "phdcomics" in self.url: rss_data = rss_data.replace("& ", "& ") # Chainsaw suit has a blank first line if "chainsawsuit" in self.url and rss_data.startswith("\r\n"): rss_data = rss_data[2:] return rss_data
def _direct_url_red(url: str) -> Optional[str]: redgifs_regex = re.compile( r"(?:https?://)?(?:www\.)?redgifs\.com/watch/([a-z]+)", re.IGNORECASE) redgifs_match = redgifs_regex.match(url) if redgifs_match is None: return None page_source = Commons.load_url_string(redgifs_match.group(0)) page_soup = BeautifulSoup(page_source, "html.parser") sources = page_soup.select(".video-player-wrapper video source") if not sources: return None best_sources = [ source["src"] for source in sources if source["type"] == "video/mp4" and not source["src"].endswith("-mobile.mp4") ] if best_sources: return best_sources[0] mp4_sources = [ source["src"] for source in sources if source["type"] == "video/mp4" ] if mp4_sources: return mp4_sources[0] return sources[0]["src"]
def _format_custom_sites( self, server: Server, channel: Optional[Channel], user: Optional[User], item: ElementTree.Element) -> Optional[EventMessage]: if "xkcd.com" in self.url: item_title = item.find("title").text item_link = item.find("link").text comic_number = item_link.strip("/").split("/")[-1] json_link = f"https://xkcd.com/{comic_number}/info.0.json" comic_json = Commons.load_url_json(json_link) alt_text = comic_json["alt"] output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}\nAlt text: {alt_text}' return EventMessage(server, channel, user, output, inbound=False) if "awoocomic" in self.feed_title: item_title = item.find("title").text if " - " in item_title: item_title = item_title.split(" - ")[0] item_link = item.find("link").text output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}' return EventMessage(server, channel, user, output, inbound=False) if "smbc-comics.com" in self.url: item_title = item.find("title").text item_link = item.find("link").text page_code = Commons.load_url_string(item_link) soup = BeautifulSoup(page_code, "html.parser") comic_img = soup.select_one("img#cc-comic") alt_text = comic_img["title"] after_comic_img = soup.select_one("#aftercomic img") return EventMessageWithPhoto( server, channel, user, f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}\nAlt text: {alt_text}', [comic_img["src"], after_comic_img["src"]], inbound=False) if "rss.app" in self.url: item_title = _get_item_title(item) item_link = get_rss_item_link(item) page_code = Commons.load_url_string(item_link) soup = BeautifulSoup(page_code, "html.parser") head_script = soup.select_one("head script") if head_script is None: return None url_regex = re.compile(r"var url = \"([^\"]+)\";", re.IGNORECASE) url_result = url_regex.search(head_script.text) if url_result is None: return None output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {url_result.group(1)}' return EventMessage(server, channel, user, output, inbound=False) if "nitter.net" in self.url: item_title = _get_item_title(item) item_link = get_rss_item_link(item).replace( "nitter.net", "twitter.com") # Construct output output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}' output_evt = EventMessage(server, channel, user, output, inbound=False) return output_evt return None