Python Commons.load_url_string示例

def test_load_url_string():
    url1 = "https://httpbin.org/get"
    data1 = Commons.load_url_string(url1)
    data1split = data1.split("\n")
    assert data1split[0] == "{", "First line incorrect."
    assert data1split[1] == '  "args": {}, ', "String response incorrect."
    url2 = "https://httpbin.org/headers"
    headers2 = [["User-Agent", "Example data"]]
    data2 = Commons.load_url_string(url2, headers2)
    data2split = data2.split("\n")
    assert data2split[0] == "{", "First line incorrect."
    assert data2split[1] == '  "headers": {', "String response incorrect."
    assert '"User-Agent": "Example data"' in data2, "Headers missing from request."

示例#2

显示文件

文件： update_currencies.py 项目： wirenic/Hallo

 def update_from_european_bank_data(self, repo):
     """
     Updates the value of conversion currency units using The European Bank data.
     :type repo: ConvertRepo
     """
     # Get currency ConvertType
     currency_type = repo.get_type_by_name("currency")
     # Pull xml data from european bank website
     url = "https://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml"
     xml_string = Commons.load_url_string(url)
     # Parse data
     doc = minidom.parseString(xml_string)
     root = doc.getElementsByTagName("gesmes:Envelope")[0]
     cube_one_elem = root.getElementsByTagName("Cube")[0]
     cube_two_elem = cube_one_elem.getElementsByTagName("Cube")[0]
     for cube_three_elem in cube_two_elem.getElementsByTagName("Cube"):
         # Get currency code from currency Attribute
         currency_code = cube_three_elem.getAttributeNode(
             "currency").nodeValue
         # Get value from rate attribute and get reciprocal.
         currency_value = 1 / float(
             cube_three_elem.getAttributeNode("rate").nodeValue)
         # Get currency unit
         currency_unit = currency_type.get_unit_by_name(currency_code)
         # If unrecognised currency, SKIP
         if currency_unit is None:
             continue
         # Set Value
         currency_unit.update_value(currency_value)

示例#3

显示文件

文件： update_currencies.py 项目： wirenic/Hallo

 def update_from_forex_data(self, repo):
     """
     Updates the value of conversion currency units using Forex data.
     :type repo: ConvertRepo
     """
     # Get currency ConvertType
     currency_type = repo.get_type_by_name("currency")
     # Pull xml data from forex website
     url = "https://rates.fxcm.com/RatesXML3"
     xml_string = Commons.load_url_string(url)
     # Parse data
     doc = minidom.parseString(xml_string)
     rates_elem = doc.getElementsByTagName("Rates")[0]
     for rate_elem in rates_elem.getElementsByTagName("Rate"):
         # Get data from element
         symbol_data = rate_elem.getElementsByTagName(
             "Symbol")[0].firstChild.data
         if not symbol_data.startswith("EUR"):
             continue
         bid_data = float(
             rate_elem.getElementsByTagName("Bid")[0].firstChild.data)
         ask_data = float(
             rate_elem.getElementsByTagName("Ask")[0].firstChild.data)
         # Get currency code and value from data
         currency_code = symbol_data[3:]
         currency_value = 1 / (0.5 * (bid_data + ask_data))
         # Get currency unit
         currency_unit = currency_type.get_unit_by_name(currency_code)
         # If unrecognised code, skip
         if currency_unit is None:
             continue
         # Set Value
         currency_unit.update_value(currency_value)

示例#4

显示文件

文件： source_rss.py 项目： wirenic/Hallo

 def get_rss_data(self) -> str:
     headers = None
     # Tumblr feeds need "GoogleBot" in the URL, or they'll give a GDPR notice
     if "tumblr.com" in self.url:
         headers = [[
             "User-Agent",
             "Hallo IRCBot [email protected] (GoogleBot/4.5.1)"
         ]]
     # Actually get the data
     rss_data = Commons.load_url_string(self.url, headers)
     # PHDComics doesn't always escape ampersands correctly
     if "phdcomics" in self.url:
         rss_data = rss_data.replace("& ", "&amp; ")
     # Chainsaw suit has a blank first line
     if "chainsawsuit" in self.url and rss_data.startswith("\r\n"):
         rss_data = rss_data[2:]
     return rss_data

示例#5

显示文件

文件： source_reddit.py 项目： wirenic/Hallo

def _direct_url_red(url: str) -> Optional[str]:
    redgifs_regex = re.compile(
        r"(?:https?://)?(?:www\.)?redgifs\.com/watch/([a-z]+)", re.IGNORECASE)
    redgifs_match = redgifs_regex.match(url)
    if redgifs_match is None:
        return None
    page_source = Commons.load_url_string(redgifs_match.group(0))
    page_soup = BeautifulSoup(page_source, "html.parser")
    sources = page_soup.select(".video-player-wrapper video source")
    if not sources:
        return None
    best_sources = [
        source["src"] for source in sources if source["type"] == "video/mp4"
        and not source["src"].endswith("-mobile.mp4")
    ]
    if best_sources:
        return best_sources[0]
    mp4_sources = [
        source["src"] for source in sources if source["type"] == "video/mp4"
    ]
    if mp4_sources:
        return mp4_sources[0]
    return sources[0]["src"]

示例#6

显示文件

文件： source_rss.py 项目： wirenic/Hallo

 def _format_custom_sites(
         self, server: Server, channel: Optional[Channel],
         user: Optional[User],
         item: ElementTree.Element) -> Optional[EventMessage]:
     if "xkcd.com" in self.url:
         item_title = item.find("title").text
         item_link = item.find("link").text
         comic_number = item_link.strip("/").split("/")[-1]
         json_link = f"https://xkcd.com/{comic_number}/info.0.json"
         comic_json = Commons.load_url_json(json_link)
         alt_text = comic_json["alt"]
         output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}\nAlt text: {alt_text}'
         return EventMessage(server, channel, user, output, inbound=False)
     if "awoocomic" in self.feed_title:
         item_title = item.find("title").text
         if " - " in item_title:
             item_title = item_title.split(" - ")[0]
         item_link = item.find("link").text
         output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}'
         return EventMessage(server, channel, user, output, inbound=False)
     if "smbc-comics.com" in self.url:
         item_title = item.find("title").text
         item_link = item.find("link").text
         page_code = Commons.load_url_string(item_link)
         soup = BeautifulSoup(page_code, "html.parser")
         comic_img = soup.select_one("img#cc-comic")
         alt_text = comic_img["title"]
         after_comic_img = soup.select_one("#aftercomic img")
         return EventMessageWithPhoto(
             server,
             channel,
             user,
             f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}\nAlt text: {alt_text}',
             [comic_img["src"], after_comic_img["src"]],
             inbound=False)
     if "rss.app" in self.url:
         item_title = _get_item_title(item)
         item_link = get_rss_item_link(item)
         page_code = Commons.load_url_string(item_link)
         soup = BeautifulSoup(page_code, "html.parser")
         head_script = soup.select_one("head script")
         if head_script is None:
             return None
         url_regex = re.compile(r"var url = \"([^\"]+)\";", re.IGNORECASE)
         url_result = url_regex.search(head_script.text)
         if url_result is None:
             return None
         output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {url_result.group(1)}'
         return EventMessage(server, channel, user, output, inbound=False)
     if "nitter.net" in self.url:
         item_title = _get_item_title(item)
         item_link = get_rss_item_link(item).replace(
             "nitter.net", "twitter.com")
         # Construct output
         output = f'Update on "{self.feed_title}" RSS feed. "{item_title}" {item_link}'
         output_evt = EventMessage(server,
                                   channel,
                                   user,
                                   output,
                                   inbound=False)
         return output_evt
     return None