Exemplo n.º 1
0
    def handle_request_denied(self, status_code):
        """Checks whether Google detected a potentially harmful request.

        Whenever such potential abuse is detected, Google shows an captcha.
        This method just blocks as long as someone entered the captcha in the browser window.
        When the window is not visible (For example when using PhantomJS), this method
        makes a png from the html code and shows it to the user, which should enter it in a command
        line.

        Returns:
            The search input field.

        Raises:
            MaliciousRequestDetected when there was not way to stp Google From denying our requests.
        """
        # selenium webdriver objects have no status code :/
        super().handle_request_denied('400')

        needles = self.malicious_request_needles[self.search_engine_name]

        if needles and needles['inurl'] in self.webdriver.current_url \
                and needles['inhtml'] in self.webdriver.page_source:

            if self.config.get('manual_captcha_solving', False):
                with self.captcha_lock:
                    import tempfile

                    tf = tempfile.NamedTemporaryFile('wb')
                    tf.write(self.webdriver.get_screenshot_as_png())
                    import webbrowser

                    webbrowser.open('file://{}'.format(tf.name))
                    solution = input('enter the captcha please...')
                    self.webdriver.find_element_by_name('submit').send_keys(
                        solution + Keys.ENTER)
                    try:
                        self.search_input = WebDriverWait(
                            self.webdriver, 5).until(
                                EC.visibility_of_element_located(
                                    self._get_search_input_field()))
                    except TimeoutException:
                        raise MaliciousRequestDetected(
                            'Requesting with this ip is not possible at the moment.'
                        )
                    tf.close()

            else:
                # Just wait until the user solves the captcha in the browser window
                # 10 hours if needed :D
                logger.info('Waiting for user to solve captcha')
                return self._wait_until_search_input_field_appears(10 * 60 *
                                                                   60)
Exemplo n.º 2
0
    def handle_request_denied(self):
        """Checks whether Google detected a potentially harmful request.

        Whenever such potential abuse is detected, Google shows an captcha.
        This method just blocks as long as someone entered the captcha in the browser window.
        When the window is not visible (For example when using chrome headless), this method
        makes a png from the html code and shows it to the user, which should enter it in a command
        line.

        Returns:
            The search input field.

        Raises:
            MaliciousRequestDetected when there was not way to stp Google From denying our requests.
        """
        # selenium webdriver objects have no status code :/
        if self.malicious_request_detected():

            super().handle_request_denied('400')

            # only solve when in non headless mode
            if self.config.get(
                    'manual_captcha_solving',
                    False) and self.config.get('browser_mode') != 'headless':
                with self.captcha_lock:
                    solution = input(
                        'Please solve the captcha in the browser! Enter any key when done...'
                    )
                    try:
                        self.search_input = WebDriverWait(
                            self.webdriver, 7).until(
                                EC.visibility_of_element_located(
                                    self._get_search_input_field()))
                    except TimeoutException:
                        raise MaliciousRequestDetected(
                            'Requesting with this IP address or cookies is not possible at the moment.'
                        )

            elif self.config.get('captcha_solving_service', False):
                # implement request to manual captcha solving service such
                # as https://2captcha.com/
                pass
            else:
                # Just wait until the user solves the captcha in the browser window
                # 10 hours if needed :D
                logger.info('Waiting for user to solve captcha')
                return self._wait_until_search_input_field_appears(10 * 60 *
                                                                   60)