示例#1
0
def test_make_absolute():
    TEST_CASES = [
        ("http://base.url", "relative", "http://base.url/relative"),
        ("http://base.url", ".", "http://base.url/"),
        ("http://base.url/with_folder", ".", "http://base.url/"),
        ("http://base.url/with_folder", "./with_dot",
         "http://base.url/with_dot"),
        ("http://base.url/with_folder", "..", "http://base.url/"),
        ("http://base.url/with_folder", "../folder", "http://base.url/folder"),
        ("http://base.url", "http://whole.url", "http://whole.url/"),
        ("http://base.url", "https://whole.url", "https://whole.url/"),
        ("http://base.url", "http://whole.url:987", "http://whole.url:987/"),
        ("http://base.url", "https://whole.url:987", "https://whole.url:987/"),
        ("http://base.url", "/", "http://base.url/"),
        ("http://base.url", "//", ""),
        ("http://base.url", "//only_this", "http://only_this/"),
        ("http://base.url", "./..//", "http://base.url/"),
        ("http://base.url", "./wrong_folder/../good_folder/",
         "http://base.url/good_folder/"),
    ]

    request = Request("GET", "http://base.url")
    response = Response(status_code=200, request=request)
    page = Page(response)

    for base_url, relative_url, expected in TEST_CASES:
        page._base = base_url
        assert page.make_absolute(relative_url) == expected, \
            f"Absolute url from base_url='{base_url}' and relative_url='{relative_url}' is not '{expected}'"
示例#2
0
    def _extract_disconnect_urls(self, page: Page) -> List[str]:
        """
        Extract all the disconnect urls on the given page and returns them.
        """
        disconnect_urls = []
        for link in page.links:
            if self.is_in_scope(link) is False:
                continue

            if re.search(DISCONNECT_REGEX, link) is not None:
                disconnect_urls.append(page.make_absolute(link))
        return disconnect_urls