Exemplo n.º 1
0
def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
    # set timeout for all HTTP requests
    requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
    # reset the HTTP total time
    requests_lib.reset_time_for_thread()

    #
    engine = engines[engine_name]

    # suppose everything will be alright
    requests_exception = False

    try:
        # send requests and parse the results
        search_results = search_one_request(engine, query, request_params)

        # add results
        result_container.extend(engine_name, search_results)

        # update engine time when there is no exception
        with threading.RLock():
            engine.stats['engine_time'] += time() - start_time
            engine.stats['engine_time_count'] += 1
            # update stats with the total HTTP time
            engine.stats['page_load_time'] += requests_lib.get_time_for_thread()
            engine.stats['page_load_count'] += 1

    except Exception as e:
        search_duration = time() - start_time

        with threading.RLock():
            engine.stats['errors'] += 1

        if (issubclass(e.__class__, requests.exceptions.Timeout)):
            result_container.add_unresponsive_engine((engine_name, gettext('timeout')))
            # requests timeout (connect or read)
            logger.error("engine {0} : HTTP requests timeout"
                         "(search duration : {1} s, timeout: {2} s) : {3}"
                         .format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
            requests_exception = True
        elif (issubclass(e.__class__, requests.exceptions.RequestException)):
            result_container.add_unresponsive_engine((engine_name, gettext('request exception')))
            # other requests exception
            logger.exception("engine {0} : requests exception"
                             "(search duration : {1} s, timeout: {2} s) : {3}"
                             .format(engine_name, search_duration, timeout_limit, e))
            requests_exception = True
        else:
            result_container.add_unresponsive_engine((
                engine_name,
                u'{0}: {1}'.format(gettext('unexpected crash'), e),
            ))
            # others errors
            logger.exception('engine {0} : exception : {1}'.format(engine_name, e))

    # suspend or not the engine if there are HTTP errors
    with threading.RLock():
        if requests_exception:
            # update continuous_errors / suspend_end_time
            engine.continuous_errors += 1
            engine.suspend_end_time = time() + min(60, engine.continuous_errors)
        else:
            # no HTTP error (perhaps an engine error)
            # anyway, reset the suspend variables
            engine.continuous_errors = 0
            engine.suspend_end_time = 0
Exemplo n.º 2
0
    def search(self, query, params, result_container, start_time,
               timeout_limit):
        # set timeout for all HTTP requests
        poolrequests.set_timeout_for_thread(timeout_limit,
                                            start_time=start_time)
        # reset the HTTP total time
        poolrequests.reset_time_for_thread()
        # enable HTTP only if explicitly enabled
        poolrequests.set_enable_http_protocol(self.engine.enable_http)

        # suppose everything will be alright
        requests_exception = False
        suspended_time = None

        try:
            # send requests and parse the results
            search_results = self._search_basic(query, params)

            # check if the engine accepted the request
            if search_results is not None:
                # yes, so add results
                result_container.extend(self.engine_name, search_results)

                # update engine time when there is no exception
                engine_time = time() - start_time
                page_load_time = poolrequests.get_time_for_thread()
                result_container.add_timing(self.engine_name, engine_time,
                                            page_load_time)
                with threading.RLock():
                    self.engine.stats['engine_time'] += engine_time
                    self.engine.stats['engine_time_count'] += 1
                    # update stats with the total HTTP time
                    self.engine.stats['page_load_time'] += page_load_time
                    self.engine.stats['page_load_count'] += 1
        except Exception as e:
            record_exception(self.engine_name, e)

            # Timing
            engine_time = time() - start_time
            page_load_time = poolrequests.get_time_for_thread()
            result_container.add_timing(self.engine_name, engine_time,
                                        page_load_time)

            # Record the errors
            with threading.RLock():
                self.engine.stats['errors'] += 1

            if (issubclass(e.__class__, requests.exceptions.Timeout)):
                result_container.add_unresponsive_engine(
                    self.engine_name, 'HTTP timeout')
                # requests timeout (connect or read)
                logger.error(
                    "engine {0} : HTTP requests timeout"
                    "(search duration : {1} s, timeout: {2} s) : {3}".format(
                        self.engine_name, engine_time, timeout_limit,
                        e.__class__.__name__))
                requests_exception = True
            elif (issubclass(e.__class__,
                             requests.exceptions.RequestException)):
                result_container.add_unresponsive_engine(
                    self.engine_name, 'HTTP error')
                # other requests exception
                logger.exception(
                    "engine {0} : requests exception"
                    "(search duration : {1} s, timeout: {2} s) : {3}".format(
                        self.engine_name, engine_time, timeout_limit, e))
                requests_exception = True
            elif (issubclass(e.__class__, SearxEngineCaptchaException)):
                result_container.add_unresponsive_engine(
                    self.engine_name, 'CAPTCHA required')
                logger.exception('engine {0} : CAPTCHA'.format(
                    self.engine_name))
                suspended_time = e.suspended_time  # pylint: disable=no-member
            elif (issubclass(e.__class__,
                             SearxEngineTooManyRequestsException)):
                result_container.add_unresponsive_engine(
                    self.engine_name, 'too many requests')
                logger.exception('engine {0} : Too many requests'.format(
                    self.engine_name))
                suspended_time = e.suspended_time  # pylint: disable=no-member
            elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
                result_container.add_unresponsive_engine(
                    self.engine_name, 'blocked')
                logger.exception('engine {0} : Searx is blocked'.format(
                    self.engine_name))
                suspended_time = e.suspended_time  # pylint: disable=no-member
            else:
                result_container.add_unresponsive_engine(
                    self.engine_name, 'unexpected crash')
                # others errors
                logger.exception('engine {0} : exception : {1}'.format(
                    self.engine_name, e))
        else:
            if getattr(threading.current_thread(), '_timeout', False):
                record_error(self.engine_name, 'Timeout')

        # suspend the engine if there is an HTTP error
        # or suspended_time is defined
        with threading.RLock():
            if requests_exception or suspended_time:
                # update continuous_errors / suspend_end_time
                self.engine.continuous_errors += 1
                if suspended_time is None:
                    suspended_time = min(
                        settings['search']['max_ban_time_on_fail'],
                        self.engine.continuous_errors *
                        settings['search']['ban_time_on_fail'])
                self.engine.suspend_end_time = time() + suspended_time
            else:
                # reset the suspend variables
                self.engine.continuous_errors = 0
                self.engine.suspend_end_time = 0
Exemplo n.º 3
0
def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
    # set timeout for all HTTP requests
    requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
    # reset the HTTP total time
    requests_lib.reset_time_for_thread()

    #
    engine = engines[engine_name]

    # suppose everything will be alright
    requests_exception = False

    try:
        # send requests and parse the results
        search_results = search_one_request(engine, query, request_params)

        # add results
        result_container.extend(engine_name, search_results)

        # update engine time when there is no exception
        with threading.RLock():
            engine.stats['engine_time'] += time() - start_time
            engine.stats['engine_time_count'] += 1
            # update stats with the total HTTP time
            engine.stats['page_load_time'] += requests_lib.get_time_for_thread()
            engine.stats['page_load_count'] += 1

    except Exception as e:
        search_duration = time() - start_time

        with threading.RLock():
            engine.stats['errors'] += 1

        if (issubclass(e.__class__, requests.exceptions.Timeout)):
            result_container.add_unresponsive_engine((engine_name, gettext('timeout')))
            # requests timeout (connect or read)
            logger.error("engine {0} : HTTP requests timeout"
                         "(search duration : {1} s, timeout: {2} s) : {3}"
                         .format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
            requests_exception = True
        elif (issubclass(e.__class__, requests.exceptions.RequestException)):
            result_container.add_unresponsive_engine((engine_name, gettext('request exception')))
            # other requests exception
            logger.exception("engine {0} : requests exception"
                             "(search duration : {1} s, timeout: {2} s) : {3}"
                             .format(engine_name, search_duration, timeout_limit, e))
            requests_exception = True
        else:
            result_container.add_unresponsive_engine((
                engine_name,
                u'{0}: {1}'.format(gettext('unexpected crash'), e),
            ))
            # others errors
            logger.exception('engine {0} : exception : {1}'.format(engine_name, e))

    # suspend or not the engine if there are HTTP errors
    with threading.RLock():
        if requests_exception:
            # update continuous_errors / suspend_end_time
            engine.continuous_errors += 1
            engine.suspend_end_time = time() + min(60, engine.continuous_errors)
        else:
            # no HTTP error (perhaps an engine error)
            # anyway, reset the suspend variables
            engine.continuous_errors = 0
            engine.suspend_end_time = 0
Exemplo n.º 4
0
def search_one_http_request_safe(engine_name, query, request_params,
                                 result_container, start_time, timeout_limit):
    # set timeout for all HTTP requests
    requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
    # reset the HTTP total time
    requests_lib.reset_time_for_thread()

    #
    engine = engines[engine_name]

    # suppose everything will be alright
    requests_exception = False

    try:
        # send requests and parse the results
        search_results = search_one_http_request(engine, query, request_params)

        # check if the engine accepted the request
        if search_results is not None:
            # yes, so add results
            result_container.extend(engine_name, search_results)

            # update engine time when there is no exception
            engine_time = time() - start_time
            page_load_time = requests_lib.get_time_for_thread()
            result_container.add_timing(engine_name, engine_time,
                                        page_load_time)
            with threading.RLock():
                engine.stats['engine_time'] += engine_time
                engine.stats['engine_time_count'] += 1
                # update stats with the total HTTP time
                engine.stats['page_load_time'] += page_load_time
                engine.stats['page_load_count'] += 1
    except Exception as e:
        record_exception(engine_name, e)

        # Timing
        engine_time = time() - start_time
        page_load_time = requests_lib.get_time_for_thread()
        result_container.add_timing(engine_name, engine_time, page_load_time)

        # Record the errors
        with threading.RLock():
            engine.stats['errors'] += 1

        if (issubclass(e.__class__, requests.exceptions.Timeout)):
            result_container.add_unresponsive_engine(engine_name,
                                                     'HTTP timeout')
            # requests timeout (connect or read)
            logger.error(
                "engine {0} : HTTP requests timeout"
                "(search duration : {1} s, timeout: {2} s) : {3}".format(
                    engine_name, engine_time, timeout_limit,
                    e.__class__.__name__))
            requests_exception = True
        elif (issubclass(e.__class__, requests.exceptions.RequestException)):
            result_container.add_unresponsive_engine(engine_name, 'HTTP error')
            # other requests exception
            logger.exception(
                "engine {0} : requests exception"
                "(search duration : {1} s, timeout: {2} s) : {3}".format(
                    engine_name, engine_time, timeout_limit, e))
            requests_exception = True
        elif (issubclass(e.__class__, SearxEngineCaptchaException)):
            result_container.add_unresponsive_engine(engine_name,
                                                     'CAPTCHA required')
            logger.exception('engine {0} : CAPTCHA')
        else:
            result_container.add_unresponsive_engine(engine_name,
                                                     'unexpected crash')
            # others errors
            logger.exception('engine {0} : exception : {1}'.format(
                engine_name, e))
    else:
        if getattr(threading.current_thread(), '_timeout', False):
            record_error(engine_name, 'Timeout')

    # suspend or not the engine if there are HTTP errors
    with threading.RLock():
        if requests_exception:
            # update continuous_errors / suspend_end_time
            engine.continuous_errors += 1
            engine.suspend_end_time = time() + min(
                settings['search']['max_ban_time_on_fail'],
                engine.continuous_errors *
                settings['search']['ban_time_on_fail'])
        else:
            # no HTTP error (perhaps an engine error)
            # anyway, reset the suspend variables
            engine.continuous_errors = 0
            engine.suspend_end_time = 0
Exemplo n.º 5
0
def search_one_http_request_safe(
    engine_name, query, request_params, result_container, start_time, timeout_limit
):
    # set timeout for all HTTP requests
    requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
    # reset the HTTP total time
    requests_lib.reset_time_for_thread()

    #
    engine = engines[engine_name]

    # suppose everything will be alright
    requests_exception = False
    suspended_time = None

    try:
        # send requests and parse the results
        search_results = search_one_http_request(engine, query, request_params)

        # check if the engine accepted the request
        if search_results is not None:
            # yes, so add results
            result_container.extend(engine_name, search_results)

            # update engine time when there is no exception
            engine_time = time() - start_time
            page_load_time = requests_lib.get_time_for_thread()
            result_container.add_timing(engine_name, engine_time, page_load_time)
            with threading.RLock():
                engine.stats["engine_time"] += engine_time
                engine.stats["engine_time_count"] += 1
                # update stats with the total HTTP time
                engine.stats["page_load_time"] += page_load_time
                engine.stats["page_load_count"] += 1
    except Exception as e:
        record_exception(engine_name, e)

        # Timing
        engine_time = time() - start_time
        page_load_time = requests_lib.get_time_for_thread()
        result_container.add_timing(engine_name, engine_time, page_load_time)

        # Record the errors
        with threading.RLock():
            engine.stats["errors"] += 1

        if issubclass(e.__class__, requests.exceptions.Timeout):
            result_container.add_unresponsive_engine(engine_name, "HTTP timeout")
            # requests timeout (connect or read)
            logger.error(
                "engine {0} : HTTP requests timeout"
                "(search duration : {1} s, timeout: {2} s) : {3}".format(
                    engine_name, engine_time, timeout_limit, e.__class__.__name__
                )
            )
            requests_exception = True
        elif issubclass(e.__class__, requests.exceptions.RequestException):
            result_container.add_unresponsive_engine(engine_name, "HTTP error")
            # other requests exception
            logger.exception(
                "engine {0} : requests exception"
                "(search duration : {1} s, timeout: {2} s) : {3}".format(
                    engine_name, engine_time, timeout_limit, e
                )
            )
            requests_exception = True
        elif issubclass(e.__class__, SearxEngineCaptchaException):
            result_container.add_unresponsive_engine(engine_name, "CAPTCHA required")
            logger.exception("engine {0} : CAPTCHA")
            suspended_time = e.suspended_time  # pylint: disable=no-member
        elif issubclass(e.__class__, SearxEngineTooManyRequestsException):
            result_container.add_unresponsive_engine(engine_name, "too many requests")
            logger.exception("engine {0} : Too many requests")
            suspended_time = e.suspended_time  # pylint: disable=no-member
        elif issubclass(e.__class__, SearxEngineAccessDeniedException):
            result_container.add_unresponsive_engine(engine_name, "blocked")
            logger.exception("engine {0} : Searx is blocked")
            suspended_time = e.suspended_time  # pylint: disable=no-member
        else:
            result_container.add_unresponsive_engine(engine_name, "unexpected crash")
            # others errors
            logger.exception("engine {0} : exception : {1}".format(engine_name, e))
    else:
        if getattr(threading.current_thread(), "_timeout", False):
            record_error(engine_name, "Timeout")

    # suspend the engine if there is an HTTP error
    # or suspended_time is defined
    with threading.RLock():
        if requests_exception or suspended_time:
            # update continuous_errors / suspend_end_time
            engine.continuous_errors += 1
            if suspended_time is None:
                suspended_time = min(
                    settings["search"]["max_ban_time_on_fail"],
                    engine.continuous_errors * settings["search"]["ban_time_on_fail"],
                )
            engine.suspend_end_time = time() + suspended_time
        else:
            # reset the suspend variables
            engine.continuous_errors = 0
            engine.suspend_end_time = 0