Пример #1
0
          triage.add_resource(page,resource,str(status))
  return suite

def crawl():
  """
    The crawl function calls a crawler to gather data about a website which can be used by other test suites.
  """
  global pages
  try:
    driver=webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub',desired_capabilities=DesiredCapabilities.FIREFOX)
  except urllib2.URLError,e:
    print >> stderr, "Could not open connection to Selenium.  Did you start it?"
    exit(1)
  if not delay == 0:
    print >> stderr, "Crawler request delay: %f seconds" % delay
  crawler=qa_nettools.crawler(driver,domain_filter=domain_filter,delay=delay,excludes=crawler_excludes)
  pages=crawler.crawl(start_url)
  driver.quit

if __name__ == '__main__':
  STATUS=0
  total=0
  failures=0

  #option parsing
  usage="""\
Usage: %prog --target-url URL --domain-filter STRING --wrong-url-excludes LIST

Description:
  %prog can be used to crawl domains or sub-urls to find dead links and
  resources which are bad.
Пример #2
0
#!/usr/bin/env python
#Created by Sam Gleske
#Mon Feb 17 17:51:02 EST 2014
#Ubuntu 13.10
#Linux 3.11.0-12-generic x86_64
#Python 2.7.5+
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from qa_nettools import crawler
driver=webdriver.Remote(command_executor='http://127.0.0.1:4444/wd/hub',desired_capabilities=DesiredCapabilities.FIREFOX)
crawler=crawler(driver,domain_filter="example.com")
pages=crawler.crawl('http://example.com/')
for page in pages.keys():
  for link in pages[page]:
    print "page:%(page)s, link:%(link)s" % {'page':page,'link':link}