def test_inclusion(app, client): wanted_urls = {'/', '/page-a'} crawler = Crawler(client=client, initial_paths=['/'], rules=[ Rule(ANCHOR, '^/$', GET, Request()), Rule(ANCHOR, '^/page-a$', GET, Request()) ]) crawler.crawl() assert crawler.graph.visited_paths == wanted_urls
def test_50x_trapped_but_allowed(app, client): crawler = Crawler(client=client, initial_paths=['/'], rules=[ Rule(ANCHOR, ".*", GET, Request()), Rule(ANCHOR, ".*", GET, Allow([400, 500])), ]) crawler.crawl()
def test_50x_raising_exception(app, client): crawler = Crawler(client=client, initial_paths=['/'], capture_exceptions=False, rules=[ Rule(ANCHOR, ".*", GET, Request()), Rule(ANCHOR, ".*", GET, Allow([400])) ]) with pytest.raises(HttpStatusError) as excinfo: crawler.crawl() assert excinfo.value.status_code == 500
def test_submit_forms_with_extra_data(app, client): crawler = Crawler( client=client, initial_paths=['/'], rules=(PERMISSIVE_HYPERLINKS_ONLY_RULE_SET + SUBMIT_POST_FORMS_RULE_SET + [ Rule(FORM, ".*", GET, Request(params={'extra': 'extra'})), Rule(FORM, ".*", POST, Request(params={'extra': 'extra'})), ])) crawler.crawl() # check we always submitted extra data when we submitted any submitted_forms = [ form for form in crawler.graph.get_nodes_by_source(FORM) if form.requested ] assert len(submitted_forms) > 1 for form in submitted_forms: entries = lookup_requests(app, form.path, method=form.method) for entry in entries: if entry.params: assert 'extra' in {key for key, val in entry.params}
def test_other_crawl(client, auth): auth.login() crawler = Crawler( client=client, initial_paths=['/'], rules=( ALL_ELEMENTS_RULE_SET + SUBMIT_GET_FORMS_RULE_SET + SUBMIT_POST_FORMS_RULE_SET + [ # don't logout Rule(".*", r"/auth/logout", GET, Ignore()), # submit some data to create Rule(".*", r"/create", POST, Request(params={ "title": "A Title", "body": "body text" })), # add the missing body when updating Rule(".*", r"/\d+/update", POST, Request(params={"body": "updated body"})), ]), ) crawler.crawl()
from python_testing_crawler import ( Crawler, Rule, Request, Ignore, Allow, ) GET = "GET" POST = "POST" ALL_ELEMENTS_RULE_SET = [Rule('.*', '/.*', GET, Request())] SUBMIT_GET_FORMS_RULE_SET = [Rule("form", '.*', GET, Request())] SUBMIT_POST_FORMS_RULE_SET = [Rule("form", '.*', POST, Request())] def _crawl(client): crawler = Crawler( client=client, initial_paths=['/'], rules=( ALL_ELEMENTS_RULE_SET + SUBMIT_GET_FORMS_RULE_SET + SUBMIT_POST_FORMS_RULE_SET + [ # don't logout Rule(".*", r"/auth/logout", GET, Ignore()), # allow 400 on create and update Rule(".*", r"/create", POST, Allow([400])), Rule(".*", r"/\d+/update", POST, Allow([400])), ]), ) crawler.crawl()