Пример #1
0
    def test_set_header(self):
        """
        Tests the CarpetBag().set_header() method to make sure it adds the headers to the CarpetBag.header class var.

        """
        bagger = CarpetBag()
        assert isinstance(
            bagger.set_header("Test-Header", "Test Header Value"), dict)
        assert bagger.headers.get("Test-Header") == "Test Header Value"
Пример #2
0
    def test_get_public_proxies(self):
        """
        Tests BaseCarpetBag().get_public_proxies()

        """
        bagger = CarpetBag()
        bagger.user_agent = UNIT_TEST_AGENT
        # bagger.remote_service_api = UNIT_TEST_URL

        assert not bagger.proxy
        assert isinstance(bagger.proxy_bag, list)
        assert len(bagger.proxy_bag) == 0
        proxies = bagger.get_public_proxies()

        assert isinstance(proxies, list)
        assert len(proxies) > 5
        assert isinstance(bagger.proxy_bag, list)
        assert len(bagger.proxy_bag) > 5

        # Test the continent filtering
        proxies = bagger.get_public_proxies("Asia")
        for proxy in proxies:
            assert proxy["continent"] == "Asia"
        proxies = bagger.get_public_proxies("North America")
        for proxy in proxies:
            assert proxy["continent"] == "North America"

        # Test that we raise a No Remote Services Connection error when we can reach Bad-Actor
        bagger.remote_service_api = UNIT_TEST_URL_BROKEN
        with pytest.raises(errors.NoRemoteServicesConnection):
            bagger.get_public_proxies()
    def test__handle_sleep(self):
        """
        Tests the _handle_sleep() method to make sure sleep isnt used if mininum_wait_time is not set.
        @note: This test DOES make outbound web requests.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py._handle_sleep

        """
        MINIMUM_WAIT = 10
        bagger = CarpetBag()
        bagger.use_skip_ssl_verify(force=True)
        bagger.mininum_wait_time = MINIMUM_WAIT

        # Make the first request
        start_1 = datetime.now()
        bagger.get(UNIT_TEST_URL)
        end_1 = datetime.now()
        run_time_1 = (end_1 - start_1).seconds
        assert run_time_1 < 5

        # Make the second request, to the same domain and check for a pause.
        start_2 = datetime.now()
        bagger._handle_sleep(UNIT_TEST_URL)
        end_2 = datetime.now()
        run_time_2 = (end_2 - start_2).seconds
        assert run_time_2 >= MINIMUM_WAIT - 1
    def test__after_request(self):
        """
        Tests the CarepetBag._after_request method to make sure we're setting class vars as expected.
        @todo: This needs to have asserts waged, currently only checks to see if the method completely fails.

        """
        fake_start = int(round(time.time() * 1000)) - 5000
        bagger = CarpetBag()
        after_request = bagger._after_request(fake_start, UNIT_TEST_URL_BROKEN,
                                              GoogleDotComResponse())
        assert isinstance(after_request, int)
Пример #5
0
    def test_use_skip_ssl_verify(self):
        """
        Tests CarpetBag().use_ssl_verify() to make sure if sets and uses the value CarpetBag.ssl_verify

        """
        bagger = CarpetBag()
        assert bagger.ssl_verify
        assert bagger.use_skip_ssl_verify()
        assert not bagger.ssl_verify
        assert not bagger.use_skip_ssl_verify(False)
        assert bagger.ssl_verify
Пример #6
0
def demo_tor_usage():
    bagger = CarpetBag()
    bagger.proxy["https"] = "https://tor:8118"
    tor = bagger.check_tor()
    if tor:
        print("Congratulations, Tor is working properly")
    else:
        print("Sorry, something is not working connecting to Tor.")

    ip = bagger.get_outbound_ip()
    print(ip)
    def test__get_headers(self):
        """
        Tests that headers can be set by the CarpetBag application, and by the end-user.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py._get_headers

        """
        bagger = CarpetBag()
        bagger.headers = {"Content-Type": "application/html"}
        bagger.user_agent = "Mozilla/5.0 (Windows NT 10.0)"
        set_headers = bagger._get_headers()
        assert set_headers["Content-Type"] == "application/html"
        assert set_headers["User-Agent"] == "Mozilla/5.0 (Windows NT 10.0)"
Пример #8
0
    def test_get_outbound_ip(self):
        """
        Tests the CarpetBag().get_outbound_ip method to make sure we get and parse the outbound IP correctly.

        """
        bagger = CarpetBag()
        ip = bagger.get_outbound_ip()
        assert re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",
                        ip)  # Something to the tune of "184.153.235.188"
        assert re.match(
            r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",
            bagger.outbound_ip)  # Something to the tune of "184.153.235.188"
    def test__internal_proxies_filter_continent_param(self):
        """
        Tests the BaseCarpetBagger()._internal_proxies_filter_continent_param() to make sure we're adding the continent
        filter correctly.

        """
        payload = {"continent": "Asia"}
        bagger = CarpetBag()
        _filter = bagger._internal_proxies_filter_continent_param(payload)
        assert _filter["name"] == "continent"
        assert _filter["op"] == "eq"
        assert _filter["val"] == "Asia"
Пример #10
0
    def test_get_new_user_agent(self):
        """
        Tests the CarpetBag().get_new_user_agent() method to make sure gets user agents and does not retry the same user
        agent that is currently being used by CarpetBag.

        """
        bagger = CarpetBag()
        ua_1 = bagger.get_new_user_agent()
        bagger.user_agent = ua_1
        ua_2 = bagger.get_new_user_agent()

        assert isinstance(ua_1, str)
        assert ua_1 != ua_2
    def test___repr__(self):
        """
        Test CarpetBag's object representation.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py.__repr__

        """
        bagger = CarpetBag()
        assert str(bagger) == "<CarpetBag>"
        bagger.proxy["http"] = "http://1.20.101.234:33085"
        assert str(bagger) == "<CarpetBag Proxy:http://1.20.101.234:33085>"
        bagger.proxy.pop("http")
        bagger.proxy["https"] = "https://1.20.101.234:33085"
        assert str(bagger) == "<CarpetBag Proxy:https://1.20.101.234:33085>"
    def test__internal_proxies_filter_last_test_param(self):
        """
        Tests the BaseCarpetBagger()._internal_proxies_filter_continent_param() to make sure we're adding the continent
        filter correctly.

        """
        bagger = CarpetBag()
        _filter = bagger._internal_proxies_filter_last_test_param({})
        weeks_ago_date = arrow.utcnow().datetime - timedelta(
            weeks=bagger.public_proxies_max_last_test_weeks + 1)

        assert _filter["name"] == "last_tested"
        assert _filter["op"] == ">"
        assert ct.json_to_date(_filter["val"]) > weeks_ago_date
    def test__make(self):
        """
        Tests the _make() method of CarpetBag. This is one of the primary methods of CarpetBag, and could always use
        more tests!
        @note: This test DOES make outbound web requests.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py._make

        """
        test_url = ct.url_join(UNIT_TEST_URL, 'proxies')
        bagger = CarpetBag()
        bagger.use_skip_ssl_verify()
        bagger._start_request_manifest("GET", test_url, {})
        response = bagger._make(method="GET",
                                url=test_url,
                                headers={"Content-Type": "application/html"},
                                payload={},
                                retry=0)
        bagger.manifest.append({})
        assert response
        assert response.status_code == 200
        response = bagger._make(method="GET",
                                url=test_url,
                                headers={"Content-Type": "application/html"},
                                payload={},
                                retry=0)
        assert response
        assert response.status_code == 200
    def test__make_internal(self):
        """
        Tests the BaseCarpetBagger()._make_internal() method to make sure we're communicating with the
        Bad-Actor.Services API correctly.
        @note: This test DOES make outbound web requests.

        """
        bagger = CarpetBag()
        response = bagger._make_internal("ip")
        assert str(response.json()["ip"])

        bagger.remote_service_api = UNIT_TEST_URL_BROKEN
        with pytest.raises(errors.NoRemoteServicesConnection):
            response = bagger._make_internal("ip")
    def test__make_request(self):
        """
        Tests the BaseCarpetBag._make_request() method.
        @note: This test DOES make outbound web requests.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py._make_request

        """
        bagger = CarpetBag()
        bagger.use_skip_ssl_verify()
        request = bagger._make_request("GET",
                                       ct.url_join(UNIT_TEST_URL, 'proxies'))
        assert request
        assert request.text
        assert request.status_code == 200
    def test__start_manifest(self):
        """
        Tests the BaseCarpetBag._start_request_manifest() to make sure it creates the record manifest.

        """
        bagger = CarpetBag()
        new_manifest = bagger._start_request_manifest("GET", UNIT_TEST_URL)
        assert isinstance(new_manifest, dict)
        assert new_manifest["method"] == "GET"
        assert new_manifest["url"] == UNIT_TEST_URL
        assert isinstance(new_manifest["date_start"], datetime)
        assert not new_manifest["date_end"]
        assert not new_manifest["response"]
        assert not new_manifest["errors"]
        assert len(bagger.manifest) == 1
    def test__internal_proxies_params(self):
        """
        Tests BaseCarpetBagger()._internal_proxies_params() method to make sure we convert the payload to the correct
        API values.
        @todo: This should test other filters, order_by amd limit query params.

        """
        bagger = CarpetBag()

        test_payload = {
            "continent": "North America",
        }

        request_params = bagger._internal_proxies_params(test_payload)
        assert "q" in request_params
        assert "filters" in request_params["q"]
    def test__determine_save_file_name(self):
        """
        Tests the BaseCarpetBag()._determine_save_file_name()
        @todo: Needs more test cases

        """
        bagger = CarpetBag()
        full_phile_name = bagger._determine_save_file_name(
            ct.url_join(UNIT_TEST_URL, "test/download/hacker-man.gif"),
            "image/gif", "/opt/carpetbag/tests/data")
        assert full_phile_name == "/opt/carpetbag/tests/data/hacker-man.gif"

        full_phile_name = bagger._determine_save_file_name(
            ct.url_join(UNIT_TEST_URL, "ip"), "application/json",
            "/opt/carpetbag/tests/data")
        assert full_phile_name == "/opt/carpetbag/tests/data/ip.json"
    def test__handle_connection_error(self):
        """
        Tests the BaseCarpetBag._handle_connection_error() method, and tries to fetch data by retrying and updating
        proxies.
        @todo: Needs more test cases.

        """
        bagger = CarpetBag()
        with pytest.raises(requests.exceptions.ConnectionError):
            # bagger._start_request_manifest("GET", UNIT_TEST_URL_BROKEN, {})
            bagger.manifest.append({})
            bagger._handle_connection_error(method="GET",
                                            url=UNIT_TEST_URL_BROKEN,
                                            headers={},
                                            payload={},
                                            retry=0)
Пример #20
0
    def test_retry_on_bad_connection(self):
        """
        Tests that when the wait_and_retry_on_connection_error has a value, that we do the retries and wait the at
        least the specified ammount of time between requests.

        """
        start = datetime.now()

        bagger = CarpetBag()
        bagger.wait_and_retry_on_connection_error = 3
        with pytest.raises(requests.exceptions.ConnectionError):
            bagger.get("http://0.0.0.0:90/api/symbols/1")

        end = datetime.now()
        run_time = (end - start).seconds
        assert run_time >= bagger.wait_and_retry_on_connection_error * 4
        assert bagger.request_total == 1
        assert bagger.request_count == 1
    def test__prep_destination(self):
        """
        Tests the BaseCarpetBag._prep_destination method to make sure we have dirs ready to go when needed to store
        files into.

        """
        bagger = CarpetBag()
        dirname, filename = os.path.split(os.path.abspath(__file__))
        new_dirs = os.path.join(dirname, "..", "one", "two")
        if os.path.exists(new_dirs):
            shutil.rmtree(new_dirs)

        # Check that we build the dir
        bagger._prep_destination(new_dirs)
        path_existance = os.path.isdir(new_dirs)
        assert path_existance

        # Remove the dir we must made.
        if os.path.exists(new_dirs):
            shutil.rmtree(os.path.join(dirname, "..", "one"))
    def test__set_user_agent(self):
        """
        Tests to make sure _set_user_agent will not override a manually set user_agent.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py._set_user_agent

        """
        bagger = CarpetBag()
        bagger.user_agent = "My test user agent 1"
        bagger._set_user_agent()
        assert bagger.send_user_agent == "My test user agent 1"

        bagger.user_agent = "My test user agent 2"
        bagger._set_user_agent()
        assert bagger.send_user_agent == "My test user agent 2"
Пример #23
0
    def test_get(self):
        """
        Tests the CarpetBag.get() method and some of the many different ways that it can be used.

        """
        bagger = CarpetBag()
        bagger.mininum_wait_time = 50
        bagger.retries_on_connection_failure = 0
        bagger.use_skip_ssl_verify(force=True)
        bagger.user_agent = UNIT_TEST_AGENT
        bagger.remote_service_api = UNIT_TEST_URL

        first_successful_response = bagger.get(
            ct.url_join(UNIT_TEST_URL, 'api/proxies'))

        assert self._run_get_successful_test(bagger, first_successful_response)
        # assert self._run_inspect_manifest(bagger)
        assert self._run_unable_to_connect(bagger)
Пример #24
0
def public_proxy_with_reset():
    """
    Example grabbing a site with a random user agent and free public proxy.
    Then we reset the proxy if we get a ConnectionError.

    """
    print("Setup the bagger.")
    bagger = CarpetBag()

    print("Configure the bagger to use a random user agent.")
    bagger.use_random_user_agent()

    print("Configure the bagger to use a random public proxy.")
    bagger.use_random_public_proxy()

    try:
        response = bagger.get("http://www.google.com")
    except requests.requests.exceptions.ConnectionError:
        print("resetting bag")
        bagger.reset_proxy_from_bag()
        response = bagger.get("http://www.google.com")
    print(response)
Пример #25
0
    def test_use_random_user_agent(self):
        """
        Tests CarpetBag.use_random_user_agent()

        """
        bagger = CarpetBag()
        assert bagger.user_agent == "CarpetBag v%s" % bagger.__version__
        bagger.user_agent = UNIT_TEST_AGENT
        assert bagger.user_agent == UNIT_TEST_AGENT
        assert not bagger.random_user_agent

        assert bagger.use_random_user_agent()  # Turn on random user agent.
        assert bagger.random_user_agent
        assert not bagger.use_random_user_agent(False)
        assert bagger.user_agent == ""

        bagger.use_skip_ssl_verify()
        bagger.get(bagger.remote_service_api)
        assert bagger.send_user_agent == ""  # Test that we send the chosen user agent
    def test__end_manifest(self):
        """
        Tests the BaseCarpetBag()._end_manifest() method to make sure it caps off the end of the manifest and saves it
        to the class.

        """
        bagger = CarpetBag()
        current_manifest = {
            "method": "GET",
            "url": UNIT_TEST_URL,
            "payload_size ": 0,
            "date_start": arrow.utcnow(),
            "date_end": None,
            "roundtrip": None,
            "response": None,
            "retry": 0,
            "errors": []
        }
        bagger.manifest.insert(0, current_manifest)
        bagger._end_manifest("5", 1.54)
        assert isinstance(bagger.manifest, list)
        assert len(bagger.manifest) == 1
        assert bagger.manifest[0]["date_end"]
        assert bagger.manifest[0]["roundtrip"] == 1.54
    def test__validate_continents(self):
        """
        Tests the BaseCarpetBag._validate_continents() method to make sure we only are using valid contintent names.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py._validate_continents

        """
        bagger = CarpetBag()
        assert bagger._validate_continents(["North America"])
        assert bagger._validate_continents(["North America", "South America"])

        with pytest.raises(errors.InvalidContinent):
            bagger._validate_continents(["Nortf America"])
    def test___init__(self):
        """
        Tests that the module init has correct default values
        This test makes no outbound requests.
        @unit-tested: carpetbag/carpetbag/base_carpetbag.py.__init__

        """
        bagger = CarpetBag()
        assert bagger.headers == {}
        assert bagger.user_agent == "CarpetBag v%s" % bagger.__version__
        assert not bagger.random_user_agent
        assert bagger.mininum_wait_time == 0  # @todo: cover usage in unit test
        assert bagger.wait_and_retry_on_connection_error == 0  # @todo: cover usage in unit test
        assert bagger.retries_on_connection_failure == 5  # @todo: cover usage in unit test
        assert bagger.max_content_length == 200000000  # @todo: cover usage in unit test

        assert not bagger.username
        assert not bagger.password
        assert not bagger.auth_type
        assert bagger.change_identity_interval == 0  # @todo: build and test this functionality
        assert bagger.remote_service_api == UNIT_TEST_URL
        assert not bagger.outbound_ip
        assert bagger.request_count == 0
        assert bagger.request_total == 0
        assert not bagger.last_request_time
        assert not bagger.last_response
        assert bagger.manifest == []
        assert bagger.proxy == {}
        assert bagger.proxy_bag == []
        assert not bagger.random_proxy_bag
        assert bagger.send_user_agent == ""
        assert bagger.ssl_verify
        assert not bagger.send_usage_stats_val
        assert isinstance(bagger.usage_stats_api_key, str)
        assert not bagger.usage_stats_api_key
        assert isinstance(bagger.one_time_headers, list)
        assert not bagger.force_skip_ssl_verify

        assert bagger.paginatation_map == {
            "field_name_page": "page",
            "field_name_total_pages": "total_pages",
            "field_name_data": "objects",
        }
    def test__increment_counters(self):
        """
        Tests the increment_counters method to make sure they increment!

        """
        bagger = CarpetBag()
        assert bagger.request_count == 0
        assert bagger.request_total == 0
        bagger._increment_counters()
        assert bagger.request_count == 1
        assert bagger.request_total == 1
        bagger._increment_counters()
        assert bagger.request_count == 2
        assert bagger.request_total == 2
    def test__cleanup_one_time_headers(self):
        """
        Tests the BaseCarpetBag()._cleanup_one_time_headers() to make sure it removes headers that are supposed to be
        destroy after a single use.

        """
        bagger = CarpetBag()
        bagger.one_time_headers = ["Test", "Headers"]
        bagger.headers = {
            "Test": "Some value",
            "Headers": "Some other value",
            "One that Stays": "Value"
        }
        bagger._cleanup_one_time_headers()
        assert "Test" not in bagger.headers
        assert "Headers" not in bagger.headers
        assert "One that Stays" in bagger.headers