def testRequestAllLink(self):
     url = "http://www.jehovahs-witness.com"
     agent = "VegeBot-Careful"
     source = LinkChecker.get_page_source(url, agent=agent, from_src="*****@*****.**", retries=0)
     links = LinkChecker.get_all_links_from_source(source)
     for link in links:
         paras = urlsplit(link)
         page_scheme, page_domain = paras[0], paras[1]
         print(LinkChecker.get_valid_link(page_domain, link.strip(), page_scheme))
示例#2
0
 def testRequestAllLink(self):
     url = "http://www.jehovahs-witness.com"
     agent = "VegeBot-Careful"
     source = LinkChecker.get_page_source(
         url,
         agent=agent,
         from_src="*****@*****.**",
         retries=0)
     links = LinkChecker.get_all_links_from_source(source)
     for link in links:
         paras = urlsplit(link)
         page_scheme, page_domain = paras[0], paras[1]
         print(
             LinkChecker.get_valid_link(page_domain, link.strip(),
                                        page_scheme))
 def test_get_all_links(self):
     link = "http://web.archive.org/web/20140711025724/http://susodigital.com/"
     source = LinkChecker.get_page_source(link)
     all_links = LinkChecker.get_all_links_from_source(source)
     for link in all_links:
         print(link)
示例#4
0
 def test_get_all_links(self):
     link = "http://web.archive.org/web/20140711025724/http://susodigital.com/"
     source = LinkChecker.get_page_source(link)
     all_links = LinkChecker.get_all_links_from_source(source)
     for link in all_links:
         print(link)