Пример #1
0
def search_task():
    task = dict(query='', selected_categories=['general'], pageno=1, settings=get_default_settings())

    # task['method'] = request.method
    x_forwarded_for = request.headers.getlist("X-Forwarded-For")
    if x_forwarded_for:
        ip = x_forwarded_for[0]
    else:
        ip = request.remote_addr

    user_data = {
        'method': request.method,
        'ip': ip,
        'ua': request.user_agent
    }
    task['user_data'] = user_data

    if 'query' in request.values:
        task['query'] = request.values['query']
    if 'selected_categories' in request.values:
        task['selected_categories'].append(request.values['selected_categories'])
    if 'selected_categories[]' in request.values:
        task['selected_categories'] = request.values.getlist('selected_categories[]')
    if 'pageno' in request.values:
        task['pageno'] = request.values['pageno']
    if 'settings' in request.values:
        task['settings'] = request.values['settings']

    if not task['query']:
        return make_response(jsonify({'error': 'query empty'}), 500)

    if not task['pageno'] or int(task['pageno']) < 1:
        return make_response(jsonify({'error': 'wrong pageno'}), 500)

    try:
        search = Search(task)
    except:
        return make_response(jsonify(dict(error='task ???')), 500)

    if plugins.callAPI('pre_search', task, locals()):
        search.search(task)

    plugins.callAPI('post_search', task, locals())

    return jsonify({'results': search.results,
                    'suggestions': search.suggestions,
                    'answers': search.answers,
                    'infoboxes': search.infoboxes
                    })
Пример #2
0
    def search(self, task):
        global number_of_searches

        # init vars
        requests = []
        results_queue = Queue()
        results = {}

        # increase number of searches
        number_of_searches += 1

        # set default useragent
        # user_agent = request.headers.get('User-Agent', '')
        user_agent = gen_useragent()

        # start search-reqest for all selected engines
        for selected_engine in self.engines:
            if selected_engine['name'] not in engines:
                continue

            engine = engines[selected_engine['name']]

            # if paging is not supported, skip
            if self.pageno > 1 and not engine.paging:
                continue

            # if search-language is set and engine does not
            # provide language-support, skip
            if self.lang != 'all' and not engine.language_support:
                continue

            # set default request parameters
            request_params = default_request_params()
            request_params['headers']['User-Agent'] = user_agent
            request_params['category'] = selected_engine['category']
            request_params['started'] = time()
            request_params['pageno'] = self.pageno

            if hasattr(engine, 'language') and engine.language:
                request_params['language'] = engine.language
            else:
                request_params['language'] = self.lang

                # try:
                # 0 = None, 1 = Moderate, 2 = Strict
                # request_params['safesearch'] = int(request.cookies.get('safesearch'))
                # except Exception:
            request_params['safesearch'] = settings['search']['safe_search']

            # update request parameters dependent on
            # search-engine (contained in engines folder)
            engine.request(task['query'].encode('utf-8'), request_params)

            # update request parameters dependent on
            # search-engine (contained in engines folder)
            if request_params['url'] is None:
                # TODO add support of offline engines
                pass

            # create a callback wrapper for the search engine results
            callback = make_callback(
                selected_engine['name'],
                results_queue,
                engine.response,
                request_params)

            # create dictionary which contain all
            # informations about the request
            request_args = dict(
                headers=request_params['headers'],
                hooks=dict(response=callback),
                cookies=request_params['cookies'],
                timeout=engine.timeout,
                verify=request_params['verify']
            )

            # specific type of request (GET or POST)
            if request_params['method'] == 'GET':
                req = requests_lib.get
            else:
                req = requests_lib.post
                request_args['data'] = request_params['data']

            # ignoring empty urls
            if not request_params['url']:
                continue

            # append request to list
            requests.append((req, request_params['url'],
                             request_args,
                             selected_engine['name']))

        if not requests:
            return self
        # send all search-request
        threaded_requests(requests)

        while not results_queue.empty():
            engine_name, engine_results = results_queue.get_nowait()

            # TODO type checks
            [self.suggestions.append(x['suggestion'])
             for x in list(engine_results)
             if 'suggestion' in x
             and engine_results.remove(x) is None]

            [self.answers.append(x['answer'])
             for x in list(engine_results)
             if 'answer' in x
             and engine_results.remove(x) is None]

            self.infoboxes.extend(x for x in list(engine_results)
                                  if 'infobox' in x
                                  and engine_results.remove(x) is None)

            results[engine_name] = engine_results

        # update engine-specific stats
        for engine_name, engine_results in results.items():
            engines[engine_name].stats['search_count'] += 1
            engines[engine_name].stats['result_count'] += len(engine_results)

        # score results and remove duplications
        self.results = score_results(results)

        # merge infoboxes according to their ids
        self.infoboxes = merge_infoboxes(self.infoboxes)

        # update engine stats, using calculated score
        for result in self.results:
            plugins.callAPI('on_result', self.plugins, locals())

            for res_engine in result['engines']:
                engines[result['engine']] \
                    .stats['score_count'] += result['score']

            result['pretty_url'] = prettify_url(result['url'])

            # TODO, check if timezone is calculated right
            if 'publishedDate' in result:
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')

            if not self.paging and engines[result['engine']].paging:
                self.paging = True

            if 'content' in result:
                result['content_html'] = highlight_content(result['content'],
                                                           self.query.encode('utf-8'))  # noqa
            result['title_html'] = highlight_content(result['title'],
                                                     self.query.encode('utf-8'))

            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

            # return results, suggestions, answers and infoboxes
        return self