Exemplo n.º 1
0
def request(query, params):
    offset = (params['pageno'] - 1) * 10

    language = match_language(params['language'], supported_languages)
    language_array = language.split('-')
    if params['language'].find('-') > 0:
        country = params['language'].split('-')[1]
    elif len(language_array) == 2:
        country = language_array[1]
    else:
        country = 'US'

    url_lang = 'lang_' + language

    if use_locale_domain:
        google_hostname = country_to_hostname.get(country.upper(), default_hostname)
    else:
        google_hostname = default_hostname

    # original format: ID=3e2b6616cee08557:TM=5556667580:C=r:IP=4.1.12.5-:S=23ASdf0soFgF2d34dfgf-_22JJOmHdfgg
    params['cookies']['GOOGLE_ABUSE_EXEMPTION'] = 'x'
    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'q': query}),
                                      hostname=google_hostname,
                                      lang=url_lang,
                                      lang_short=language)
    if params['time_range'] in time_range_dict:
        params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])

    params['headers']['Accept-Language'] = language + ',' + language + '-' + country
    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'

    params['google_hostname'] = google_hostname

    return params
Exemplo n.º 2
0
def request(query, params):
    locale = match_language(params['language'], supported_languages)

    params['url'] = search_url.format(
        query=urlencode({'search': query, 'localization': locale}),
        pageno=params['pageno'])

    return params
Exemplo n.º 3
0
def get_region_code(lang, lang_list=[]):
    if lang == 'all':
        return None

    lang_code = match_language(lang, lang_list, language_aliases, 'wt-WT')
    lang_parts = lang_code.split('-')

    # country code goes first
    return lang_parts[1].lower() + '-' + lang_parts[0].lower()
Exemplo n.º 4
0
def request(query, params):
    if params['time_range'] and params['time_range'] not in time_range_dict:
        return params

    offset = (params['pageno'] - 1) * 10 + 1

    language = match_language(params['language'], supported_languages, language_aliases)

    params['url'] = _get_url(query, language, offset, params['time_range'])

    return params
Exemplo n.º 5
0
Arquivo: bing.py Projeto: cy8aer/searx
def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1

    lang = match_language(params['language'], supported_languages, language_aliases)

    query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')

    search_path = search_string.format(
        query=urlencode({'q': query}),
        offset=offset)

    params['url'] = base_url + search_path
    return params
Exemplo n.º 6
0
def request(query, params):
    region = match_language(params['language'], supported_languages)
    ui_language = region.split('-')[0]

    search_path = search_string.format(
        query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
        page=params['pageno']
    )

    # image search query is something like 'image?{query}&page={page}'
    if params['category'] == 'images':
        search_path = 'image' + search_path

    params['url'] = base_url + search_path

    return params
Exemplo n.º 7
0
def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1

    if params['language'] == 'all':
        language = 'en'
    else:
        language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]

    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'p': query}),
                                      lang=language)

    # TODO required?
    params['cookies']['sB'] = '"v=1&vm=p&fl=1&vl=lang_{lang}&sh=1&pn=10&rw=new'\
        .format(lang=language)
    return params
Exemplo n.º 8
0
def response(resp):
    results = []
    html = fromstring(resp.text)
    search_results = html.xpath(wikidata_ids_xpath)

    language = match_language(resp.search_params['language'], supported_languages).split('-')[0]

    # TODO: make requests asynchronous to avoid timeout when result_count > 1
    for search_result in search_results[:result_count]:
        wikidata_id = search_result.split('/')[-1]
        url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
        htmlresponse = get(url)
        jsonresponse = loads(htmlresponse.text)
        results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])

    return results
Exemplo n.º 9
0
def request(query, params):
    if params['time_range'] and params['time_range'] not in time_range_dict:
        return params

    offset = (params['pageno'] - 1) * 10 + 1
    language = match_language(params['language'], supported_languages, language_aliases)
    if language not in language_aliases.values():
        language = language.split('-')[0]
    language = language.replace('-', '_').lower()

    params['url'] = _get_url(query, offset, language, params['time_range'])

    # TODO required?
    params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
        .format(lang=language)

    return params
Exemplo n.º 10
0
def request(query, params):
    offset = (params['pageno'] - 1) * 10

    if categories[0] and categories[0] in category_to_keyword:

        params['url'] = url.format(keyword=category_to_keyword[categories[0]],
                                   query=urlencode({'q': query}),
                                   offset=offset)
    else:
        params['url'] = url.format(keyword='web',
                                   query=urlencode({'q': query}),
                                   offset=offset)

    # add language tag
    language = match_language(params['language'], supported_languages)
    params['url'] += '&locale=' + language.replace('-', '_').lower()

    return params
Exemplo n.º 11
0
def request(query, params):
    offset = (params['pageno'] - 1) * 10

    if params['language'] == 'all' or params['language'] == 'en-US':
        language = 'en-GB'
    else:
        language = match_language(params['language'], supported_languages, language_aliases)

    language_array = language.split('-')
    if params['language'].find('-') > 0:
        country = params['language'].split('-')[1]
    elif len(language_array) == 2:
        country = language_array[1]
    else:
        country = 'US'

    url_lang = 'lang_' + language

    if use_locale_domain:
        google_hostname = country_to_hostname.get(country.upper(), default_hostname)
    else:
        google_hostname = default_hostname

    # original format: ID=3e2b6616cee08557:TM=5556667580:C=r:IP=4.1.12.5-:S=23ASdf0soFgF2d34dfgf-_22JJOmHdfgg
    params['cookies']['GOOGLE_ABUSE_EXEMPTION'] = 'x'
    params['url'] = search_url.format(offset=offset,
                                      query=urlencode({'q': query}),
                                      hostname=google_hostname,
                                      lang=url_lang,
                                      lang_short=language)
    if params['time_range'] in time_range_dict:
        params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])

    params['headers']['Accept-Language'] = language + ',' + language + '-' + country
    params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'

    # Force Internet Explorer 12 user agent to avoid loading the new UI that Searx can't parse
    params['headers']['User-Agent'] = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"

    params['google_hostname'] = google_hostname

    return params
Exemplo n.º 12
0
def request(query, params):

    search_options = {
        'start': (params['pageno'] - 1) * number_of_results
    }

    if params['time_range'] in time_range_dict:
        search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']])

    if safesearch and params['safesearch']:
        search_options['safe'] = 'on'

    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      search_options=urlencode(search_options))

    language = match_language(params['language'], supported_languages).split('-')[0]
    if language:
        params['url'] += '&lr=lang_' + language

    return params
Exemplo n.º 13
0
def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1

    search_path = search_string.format(
        query=urlencode({'q': query}),
        offset=offset)

    language = match_language(params['language'], supported_languages, language_aliases).lower()

    params['cookies']['SRCHHPGUSR'] = \
        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')

    params['cookies']['_EDGE_S'] = 'mkt=' + language +\
        '&ui=' + language + '&F=1'

    params['url'] = base_url + search_path
    if params['time_range'] in time_range_dict:
        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])

    return params
Exemplo n.º 14
0
def request(query, params):
    offset = (params['pageno'] - 1) * 10 + 1

    # safesearch cookie
    params['cookies']['SRCHHPGUSR'] = \
        'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')

    # language cookie
    language = match_language(params['language'], supported_languages, language_aliases).lower()
    params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'

    # query and paging
    params['url'] = search_url.format(query=urlencode({'q': query}),
                                      offset=offset,
                                      number_of_results=number_of_results)

    # time range
    if params['time_range'] in time_range_dict:
        params['url'] += time_range_string.format(interval=time_range_dict[params['time_range']])

    return params
Exemplo n.º 15
0
def render(template_name, override_theme=None, **kwargs):
    # values from the HTTP requests
    kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
    kwargs['cookies'] = request.cookies
    kwargs['errors'] = request.errors

    # values from the preferences
    kwargs['preferences'] = request.preferences
    kwargs['method'] = request.preferences.get_value('method')
    kwargs['autocomplete'] = request.preferences.get_value('autocomplete')
    kwargs['results_on_new_tab'] = request.preferences.get_value(
        'results_on_new_tab')
    kwargs['advanced_search'] = request.preferences.get_value(
        'advanced_search')
    kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
    kwargs['theme'] = get_current_theme_name(override=override_theme)
    kwargs['all_categories'] = _get_ordered_categories()
    kwargs['categories'] = _get_enable_categories(kwargs['all_categories'])

    # i18n
    kwargs['language_codes'] = languages  # from searx.languages
    kwargs['translations'] = json.dumps(get_translations(),
                                        separators=(',', ':'))

    locale = request.preferences.get_value('locale')
    if locale in rtl_locales and 'rtl' not in kwargs:
        kwargs['rtl'] = True
    if 'current_language' not in kwargs:
        kwargs['current_language'] = match_language(
            request.preferences.get_value('language'), LANGUAGE_CODES)

    # values from settings
    kwargs['search_formats'] = [
        x for x in settings['search']['formats'] if x != 'html'
    ]

    # brand
    kwargs['instance_name'] = settings['general']['instance_name']
    kwargs['searx_version'] = VERSION_STRING
    kwargs['brand'] = brand

    # helpers to create links to other pages
    kwargs['url_for'] = url_for_theme  # override url_for function in templates
    kwargs['image_proxify'] = image_proxify
    kwargs['proxify'] = proxify if settings.get('result_proxy',
                                                {}).get('url') else None
    kwargs['proxify_results'] = settings.get('result_proxy',
                                             {}).get('proxify_results', True)
    kwargs['get_result_template'] = get_result_template
    kwargs['opensearch_url'] = (
        url_for('opensearch') + '?' +
        urlencode({
            'method': kwargs['method'],
            'autocomplete': kwargs['autocomplete']
        }))

    # scripts from plugins
    kwargs['scripts'] = set()
    for plugin in request.user_plugins:
        for script in plugin.js_dependencies:
            kwargs['scripts'].add(script)

    # styles from plugins
    kwargs['styles'] = set()
    for plugin in request.user_plugins:
        for css in plugin.css_dependencies:
            kwargs['styles'].add(css)

    start_time = default_timer()
    result = render_template('{}/{}'.format(kwargs['theme'], template_name),
                             **kwargs)
    request.render_time += default_timer() - start_time  # pylint: disable=assigning-non-slot

    return result
Exemplo n.º 16
0
def request(query, params):
    params['url'] = url.format(query=urlencode({'q': query}))
    language = match_language(params['language'], supported_languages, language_aliases)
    params['headers']['Accept-Language'] = language.split('-')[0]
    return params
Exemplo n.º 17
0
def search():
    """Search query in q and return results.

    Supported outputs: html, json, csv, rss.
    """
    # pylint: disable=too-many-locals, too-many-return-statements, too-many-branches
    # pylint: disable=too-many-statements

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in OUTPUT_FORMATS:
        output_format = 'html'

    if output_format not in settings['search']['formats']:
        flask.abort(403)

    # check if there is query (not None and not an empty string)
    if not request.form.get('q'):
        if output_format == 'html':
            return render(
                'index.html',
                selected_categories=get_selected_categories(
                    request.preferences, request.form),
            )
        return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    raw_text_query = None
    result_container = None
    try:
        search_query, raw_text_query, _, _ = get_search_query_from_webapp(
            request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)  # pylint: disable=redefined-outer-name

        result_container = search.search()

    except SearxParameterException as e:
        logger.exception('search error: SearxParameterException')
        return index_error(output_format, e.message), 400
    except Exception as e:  # pylint: disable=broad-except
        logger.exception(e, exc_info=True)
        return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # checkin for a external bang
    if result_container.redirect_url:
        return redirect(result_container.redirect_url)

    # Server-Timing header
    request.timings = result_container.get_timings()  # pylint: disable=assigning-non-slot

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(
                    escape(result['content'][:1024]), search_query.query)
            if 'title' in result and result['title']:
                result['title'] = highlight_content(
                    escape(result['title'] or ''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(
                html_to_text(result['title']).strip().split())

        if 'url' in result:
            result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right  # pylint: disable=fixme
        if result.get(
                'publishedDate'
        ):  # do not try to get a date from an empty string or a None type
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime(
                    '%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(
                        tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now(
                    ) - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(
                            '{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(
                            '{hours} hour(s), {minutes} minute(s) ago').format(
                                hours=hours, minutes=minutes)
                else:
                    result['publishedDate'] = format_date(
                        result['publishedDate'])

    if output_format == 'json':
        x = {
            'query':
            search_query.query,
            'number_of_results':
            number_of_results,
            'results':
            results,
            'answers':
            list(result_container.answers),
            'corrections':
            list(result_container.corrections),
            'infoboxes':
            result_container.infoboxes,
            'suggestions':
            list(result_container.suggestions),
            'unresponsive_engines':
            __get_translated_errors(result_container.unresponsive_engines)
        }
        response = json.dumps(x,
                              default=lambda item: list(item)
                              if isinstance(item, set) else item)
        return Response(response, mimetype='application/json')

    if output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            row['type'] = 'result'
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.answers:
            row = {'title': a, 'type': 'answer'}
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.suggestions:
            row = {'title': a, 'type': 'suggestion'}
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.corrections:
            row = {'title': a, 'type': 'correction'}
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(
            search_query.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response

    if output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            answers=result_container.answers,
            corrections=result_container.corrections,
            suggestions=result_container.suggestions,
            q=request.form['q'],
            number_of_results=number_of_results,
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')

    # HTML output format

    # suggestions: use RawTextQuery to get the suggestion URLs with the same bang
    suggestion_urls = list(
        map(
            lambda suggestion: {
                'url': raw_text_query.changeQuery(suggestion).getFullQuery(),
                'title': suggestion
            }, result_container.suggestions))

    correction_urls = list(
        map(
            lambda correction: {
                'url': raw_text_query.changeQuery(correction).getFullQuery(),
                'title': correction
            }, result_container.corrections))

    return render('results.html',
                  results=results,
                  q=request.form['q'],
                  selected_categories=search_query.categories,
                  pageno=search_query.pageno,
                  time_range=search_query.time_range,
                  number_of_results=format_decimal(number_of_results),
                  suggestions=suggestion_urls,
                  answers=result_container.answers,
                  corrections=correction_urls,
                  infoboxes=result_container.infoboxes,
                  engine_data=result_container.engine_data,
                  paging=result_container.paging,
                  unresponsive_engines=__get_translated_errors(
                      result_container.unresponsive_engines),
                  current_language=match_language(
                      search_query.lang,
                      LANGUAGE_CODES,
                      fallback=request.preferences.get_value("language")),
                  theme=get_current_theme_name(),
                  favicons=global_favicons[themes.index(
                      get_current_theme_name())],
                  timeout_limit=request.form.get('timeout_limit', None))
Exemplo n.º 18
0
def url_lang(lang):
    return match_language(lang, supported_languages).split('-')[0]
Exemplo n.º 19
0
def _get_browser_language(request, lang_list):
    for lang in request.headers.get("Accept-Language", "en").split(","):
        locale = match_language(lang, lang_list, fallback=None)
        if locale is not None:
            return locale
Exemplo n.º 20
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # check if there is query
    if request.form.get('q') is None:
        if output_format == 'html':
            return render(
                'index.html',
            )
        else:
            return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    result_container = None
    try:
        search_query = get_search_query_from_webapp(request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)
        result_container = search.search()
    except Exception as e:
        # log exception
        logger.exception('search error')

        # is it an invalid input parameter or something else ?
        if (issubclass(e.__class__, SearxParameterException)):
            return index_error(output_format, e.message), 400
        else:
            return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(html_to_text(result['title']).strip().split())

        result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(result['publishedDate'])

    if output_format == 'json':
        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                    'number_of_results': number_of_results,
                                    'results': results,
                                    'answers': list(result_container.answers),
                                    'corrections': list(result_container.corrections),
                                    'infoboxes': result_container.infoboxes,
                                    'suggestions': list(result_container.suggestions),
                                    'unresponsive_engines': list(result_container.unresponsive_engines)},
                                   default=lambda item: list(item) if isinstance(item, set) else item),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url(),
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')

    return render(
        'results.html',
        results=results,
        q=request.form['q'],
        selected_categories=search_query.categories,
        pageno=search_query.pageno,
        time_range=search_query.time_range,
        number_of_results=format_decimal(number_of_results),
        advanced_search=advanced_search,
        suggestions=result_container.suggestions,
        answers=result_container.answers,
        corrections=result_container.corrections,
        infoboxes=result_container.infoboxes,
        paging=result_container.paging,
        unresponsive_engines=result_container.unresponsive_engines,
        current_language=match_language(search_query.lang,
                                        LANGUAGE_CODES,
                                        fallback=settings['search']['language']),
        base_url=get_base_url(),
        theme=get_current_theme_name(),
        favicons=global_favicons[themes.index(get_current_theme_name())]
    )
Exemplo n.º 21
0
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
    ret_val = {}

    _lang = params['language']
    _any_language = _lang.lower() == 'all'
    if _any_language:
        _lang = 'en-US'

    language = match_language(_lang, lang_list, custom_aliases)
    ret_val['language'] = language

    # the requested language from params (en, en-US, de, de-AT, fr, fr-CA, ...)
    _l = _lang.split('-')

    # the country code (US, AT, CA)
    if len(_l) == 2:
        country = _l[1]
    else:
        country = _l[0].upper()
        if country == 'EN':
            country = 'US'

    ret_val['country'] = country

    # the combination (en-US, en-EN, de-DE, de-AU, fr-FR, fr-FR)
    lang_country = '%s-%s' % (language, country)

    # subdomain
    ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(),
                                                       'google.com')

    ret_val['params'] = {}
    ret_val['headers'] = {}

    if _any_language and supported_any_language:
        # based on whoogle
        ret_val['params']['source'] = 'lnt'
    else:
        # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5
        ret_val['headers']['Accept-Language'] = ','.join([
            lang_country,
            language + ';q=0.8,',
            'en;q=0.6',
            '*;q=0.5',
        ])

        # lr parameter:
        #   https://developers.google.com/custom-search/docs/xml_results#lrsp
        # Language Collection Values:
        #   https://developers.google.com/custom-search/docs/xml_results_appendices#languageCollections
        ret_val['params'][
            'lr'] = "lang_" + lang_country if lang_country in lang_list else language

    ret_val['params'][
        'hl'] = lang_country if lang_country in lang_list else language

    # hl parameter:
    #   https://developers.google.com/custom-search/docs/xml_results#hlsp The
    # Interface Language:
    #   https://developers.google.com/custom-search/docs/xml_results_appendices#interfaceLanguages
    return ret_val
Exemplo n.º 22
0
def render(template_name, override_theme=None, **kwargs):
    disabled_engines = request.preferences.engines.get_disabled()

    enabled_categories = set(category for engine_name in engines
                             for category in engines[engine_name].categories
                             if (engine_name,
                                 category) not in disabled_engines)

    if 'categories' not in kwargs:
        kwargs['categories'] = [
            x for x in _get_ordered_categories() if x in enabled_categories
        ]

    if 'autocomplete' not in kwargs:
        kwargs['autocomplete'] = request.preferences.get_value('autocomplete')

    locale = request.preferences.get_value('locale')

    if locale in rtl_locales and 'rtl' not in kwargs:
        kwargs['rtl'] = True

    kwargs['searx_version'] = VERSION_STRING

    kwargs['method'] = request.preferences.get_value('method')

    kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))

    kwargs['language_codes'] = languages
    if 'current_language' not in kwargs:
        kwargs['current_language'] = match_language(
            request.preferences.get_value('language'), LANGUAGE_CODES)

    # override url_for function in templates
    kwargs['url_for'] = url_for_theme

    kwargs['image_proxify'] = image_proxify

    kwargs['proxify'] = proxify if settings.get('result_proxy',
                                                {}).get('url') else None

    kwargs['opensearch_url'] = url_for('opensearch') + '?' \
        + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']})

    kwargs['get_result_template'] = get_result_template

    kwargs['theme'] = get_current_theme_name(override=override_theme)

    kwargs['template_name'] = template_name

    kwargs['cookies'] = request.cookies

    kwargs['errors'] = request.errors

    kwargs['instance_name'] = settings['general']['instance_name']

    kwargs['results_on_new_tab'] = request.preferences.get_value(
        'results_on_new_tab')

    kwargs['preferences'] = request.preferences

    kwargs['brand'] = brand

    kwargs['translations'] = json.dumps(get_translations(),
                                        separators=(',', ':'))

    kwargs['scripts'] = set()
    kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
    for plugin in request.user_plugins:
        for script in plugin.js_dependencies:
            kwargs['scripts'].add(script)

    kwargs['styles'] = set()
    for plugin in request.user_plugins:
        for css in plugin.css_dependencies:
            kwargs['styles'].add(css)

    return render_template('{}/{}'.format(kwargs['theme'], template_name),
                           **kwargs)
Exemplo n.º 23
0
def load_engine(engine_data):

    if '_' in engine_data['name']:
        logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
        sys.exit(1)

    engine_module = engine_data['engine']

    try:
        engine = load_module(engine_module + '.py', engine_dir)
    except:
        logger.exception('Cannot load engine "{}"'.format(engine_module))
        return None

    for param_name in engine_data:
        if param_name == 'engine':
            continue
        if param_name == 'categories':
            if engine_data['categories'] == 'none':
                engine.categories = []
            else:
                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
            continue
        setattr(engine, param_name, engine_data[param_name])

    for arg_name, arg_value in engine_default_args.items():
        if not hasattr(engine, arg_name):
            setattr(engine, arg_name, arg_value)

    # checking required variables
    for engine_attr in dir(engine):
        if engine_attr.startswith('_'):
            continue
        if engine_attr == 'inactive' and getattr(engine, engine_attr) is True:
            return None
        if getattr(engine, engine_attr) is None:
            logger.error('Missing engine config attribute: "{0}.{1}"'
                         .format(engine.name, engine_attr))
            sys.exit(1)

    # assign supported languages from json file
    if engine_data['name'] in languages:
        setattr(engine, 'supported_languages', languages[engine_data['name']])

    # find custom aliases for non standard language codes
    if hasattr(engine, 'supported_languages'):
        if hasattr(engine, 'language_aliases'):
            language_aliases = getattr(engine, 'language_aliases')
        else:
            language_aliases = {}

        for engine_lang in getattr(engine, 'supported_languages'):
            iso_lang = match_language(engine_lang, babel_langs, fallback=None)
            if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
               iso_lang not in getattr(engine, 'supported_languages'):
                language_aliases[iso_lang] = engine_lang

        setattr(engine, 'language_aliases', language_aliases)

    # assign language fetching method if auxiliary method exists
    if hasattr(engine, '_fetch_supported_languages'):
        setattr(engine, 'fetch_supported_languages',
                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))

    engine.stats = {
        'result_count': 0,
        'search_count': 0,
        'page_load_time': 0,
        'page_load_count': 0,
        'engine_time': 0,
        'engine_time_count': 0,
        'score_count': 0,
        'errors': 0
    }

    for category_name in engine.categories:
        categories.setdefault(category_name, []).append(engine)

    if engine.shortcut in engine_shortcuts:
        logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
        sys.exit(1)

    engine_shortcuts[engine.shortcut] = engine.name

    return engine
Exemplo n.º 24
0
def _is_selected_language_supported(engine, preferences):
    language = preferences.get_value('language')
    return (language == 'all' or match_language(
        language, getattr(engine, 'supported_languages', []),
        getattr(engine, 'language_aliases', {}), None))
Exemplo n.º 25
0
def url_lang(lang):
    lang_pre = lang.split('-')[0]
    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
        return 'en'
    return match_language(lang, supported_languages,
                          language_aliases).split('-')[0]
Exemplo n.º 26
0
def index():
    """Render index page.

    Supported outputs: html, json, csv, rss.
    """

    # output_format
    output_format = request.form.get('format', 'html')
    if output_format not in ['html', 'csv', 'json', 'rss']:
        output_format = 'html'

    # check if there is query
    if request.form.get('q') is None:
        if output_format == 'html':
            return render('index.html', )
        else:
            return index_error(output_format, 'No query'), 400

    # search
    search_query = None
    raw_text_query = None
    result_container = None
    try:
        search_query, raw_text_query = get_search_query_from_webapp(
            request.preferences, request.form)
        # search = Search(search_query) #  without plugins
        search = SearchWithPlugins(search_query, request.user_plugins, request)
        result_container = search.search()
    except Exception as e:
        # log exception
        logger.exception('search error')

        # is it an invalid input parameter or something else ?
        if (issubclass(e.__class__, SearxParameterException)):
            return index_error(output_format, e.message), 400
        else:
            return index_error(output_format, gettext('search error')), 500

    # results
    results = result_container.get_ordered_results()
    number_of_results = result_container.results_number()
    if number_of_results < result_container.results_length():
        number_of_results = 0

    # UI
    advanced_search = request.form.get('advanced_search', None)

    # Server-Timing header
    request.timings = result_container.get_timings()

    # output
    for result in results:
        if output_format == 'html':
            if 'content' in result and result['content']:
                result['content'] = highlight_content(
                    escape(result['content'][:1024]), search_query.query)
            if 'title' in result and result['title']:
                result['title'] = highlight_content(
                    escape(result['title'] or u''), search_query.query)
        else:
            if result.get('content'):
                result['content'] = html_to_text(result['content']).strip()
            # removing html content and whitespace duplications
            result['title'] = ' '.join(
                html_to_text(result['title']).strip().split())

        if 'url' in result:
            result['pretty_url'] = prettify_url(result['url'])

        # TODO, check if timezone is calculated right
        if 'publishedDate' in result:
            try:  # test if publishedDate >= 1900 (datetime module bug)
                result['pubdate'] = result['publishedDate'].strftime(
                    '%Y-%m-%d %H:%M:%S%z')
            except ValueError:
                result['publishedDate'] = None
            else:
                if result['publishedDate'].replace(
                        tzinfo=None) >= datetime.now() - timedelta(days=1):
                    timedifference = datetime.now(
                    ) - result['publishedDate'].replace(tzinfo=None)
                    minutes = int((timedifference.seconds / 60) % 60)
                    hours = int(timedifference.seconds / 60 / 60)
                    if hours == 0:
                        result['publishedDate'] = gettext(
                            u'{minutes} minute(s) ago').format(minutes=minutes)
                    else:
                        result['publishedDate'] = gettext(
                            u'{hours} hour(s), {minutes} minute(s) ago'
                        ).format(hours=hours, minutes=minutes)  # noqa
                else:
                    result['publishedDate'] = format_date(
                        result['publishedDate'])

    if output_format == 'json':
        return Response(json.dumps(
            {
                'query': search_query.query.decode('utf-8'),
                'number_of_results': number_of_results,
                'results': results,
                'answers': list(result_container.answers),
                'corrections': list(result_container.corrections),
                'infoboxes': result_container.infoboxes,
                'suggestions': list(result_container.suggestions),
                'unresponsive_engines': list(
                    result_container.unresponsive_engines)
            },
            default=lambda item: list(item)
            if isinstance(item, set) else item),
                        mimetype='application/json')
    elif output_format == 'csv':
        csv = UnicodeWriter(StringIO())
        keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type')
        csv.writerow(keys)
        for row in results:
            row['host'] = row['parsed_url'].netloc
            row['type'] = 'result'
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.answers:
            row = {'title': a, 'type': 'answer'}
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.suggestions:
            row = {'title': a, 'type': 'suggestion'}
            csv.writerow([row.get(key, '') for key in keys])
        for a in result_container.corrections:
            row = {'title': a, 'type': 'correction'}
            csv.writerow([row.get(key, '') for key in keys])
        csv.stream.seek(0)
        response = Response(csv.stream.read(), mimetype='application/csv')
        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(
            search_query.query.decode('utf-8'))
        response.headers.add('Content-Disposition', cont_disp)
        return response
    elif output_format == 'rss':
        print(results)
        response_rss = render(
            'opensearch_response_rss.xml',
            results=results,
            answers=result_container.answers,
            corrections=result_container.corrections,
            suggestions=result_container.suggestions,
            q=request.form['q'],
            number_of_results=number_of_results,
            base_url=get_base_url(),
            override_theme='__common__',
        )
        return Response(response_rss, mimetype='text/xml')

    # HTML output format

    # suggestions: use RawTextQuery to get the suggestion URLs with the same bang
    suggestion_urls = list(
        map(
            lambda suggestion: {
                'url': raw_text_query.changeSearchQuery(suggestion).
                getFullQuery(),
                'title': suggestion
            }, result_container.suggestions))

    correction_urls = list(
        map(
            lambda correction: {
                'url': raw_text_query.changeSearchQuery(correction).
                getFullQuery(),
                'title': correction
            }, result_container.corrections))
    #
    return render('results.html',
                  results=results,
                  q=request.form['q'],
                  selected_categories=search_query.categories,
                  pageno=search_query.pageno,
                  time_range=search_query.time_range,
                  number_of_results=format_decimal(number_of_results),
                  advanced_search=advanced_search,
                  suggestions=suggestion_urls,
                  answers=result_container.answers,
                  corrections=correction_urls,
                  infoboxes=result_container.infoboxes,
                  paging=result_container.paging,
                  unresponsive_engines=result_container.unresponsive_engines,
                  current_language=match_language(
                      search_query.lang,
                      LANGUAGE_CODES,
                      fallback=request.preferences.get_value("language")),
                  base_url=get_base_url(),
                  theme=get_current_theme_name(),
                  favicons=global_favicons[themes.index(
                      get_current_theme_name())],
                  timeout_limit=request.form.get('timeout_limit', None))
Exemplo n.º 27
0
def render(template_name, override_theme=None, **kwargs):
    disabled_engines = request.preferences.engines.get_disabled()

    enabled_categories = set(category for engine_name in engines
                             for category in engines[engine_name].categories
                             if (engine_name,
                                 category) not in disabled_engines)

    if 'categories' not in kwargs:
        kwargs['categories'] = ['general']
        kwargs['categories'].extend(
            x for x in sorted(categories.keys())
            if x != 'general' and x in enabled_categories)

    if 'all_categories' not in kwargs:
        kwargs['all_categories'] = ['general']
        kwargs['all_categories'].extend(x for x in sorted(categories.keys())
                                        if x != 'general')

    if 'selected_categories' not in kwargs:
        kwargs['selected_categories'] = []
        for arg in request.args:
            if arg.startswith('category_'):
                c = arg.split('_', 1)[1]
                if c in categories:
                    kwargs['selected_categories'].append(c)

    if not kwargs['selected_categories']:
        cookie_categories = request.preferences.get_value('categories')
        for ccateg in cookie_categories:
            kwargs['selected_categories'].append(ccateg)

    if not kwargs['selected_categories']:
        kwargs['selected_categories'] = ['general']

    if 'autocomplete' not in kwargs:
        kwargs['autocomplete'] = request.preferences.get_value('autocomplete')

    locale = request.preferences.get_value('locale')

    if locale in rtl_locales and 'rtl' not in kwargs:
        kwargs['rtl'] = True

    kwargs['searx_version'] = VERSION_STRING

    kwargs['method'] = request.preferences.get_value('method')

    kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))

    kwargs['language_codes'] = languages
    if 'current_language' not in kwargs:
        kwargs['current_language'] = match_language(
            request.preferences.get_value('language'), LANGUAGE_CODES)

    # override url_for function in templates
    kwargs['url_for'] = url_for_theme

    kwargs['image_proxify'] = image_proxify

    kwargs['proxify'] = proxify if settings.get('result_proxy',
                                                {}).get('url') else None

    kwargs['get_result_template'] = get_result_template

    kwargs['theme'] = get_current_theme_name(override=override_theme)

    kwargs['template_name'] = template_name

    kwargs['cookies'] = request.cookies

    kwargs['errors'] = request.errors

    kwargs['instance_name'] = settings['general']['instance_name']

    kwargs['results_on_new_tab'] = request.preferences.get_value(
        'results_on_new_tab')

    kwargs['unicode'] = unicode

    kwargs['preferences'] = request.preferences

    kwargs['scripts'] = set()
    for plugin in request.user_plugins:
        for script in plugin.js_dependencies:
            kwargs['scripts'].add(script)

    kwargs['styles'] = set()
    for plugin in request.user_plugins:
        for css in plugin.css_dependencies:
            kwargs['styles'].add(css)

    return render_template('{}/{}'.format(kwargs['theme'], template_name),
                           **kwargs)
    def test_match_language(self):
        self.assertEqual(utils.match_language('es', ['es']), 'es')
        self.assertEqual(utils.match_language('es', [], fallback='fallback'),
                         'fallback')
        self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}),
                         'jp')

        aliases = {'en-GB': 'en-UK', 'he': 'iw'}

        # guess country
        self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
        self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
        self.assertEqual(
            utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']),
            'es-ES')
        self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
        self.assertEqual(
            utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']),
            'en-GB')
        self.assertEqual(
            utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'],
                                 aliases), 'en-UK')

        # language aliases
        self.assertEqual(utils.match_language('iw', ['he']), 'he')
        self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
        self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he', ['iw-IL'], aliases),
                         'iw-IL')
        self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases),
                         'iw-IL')
Exemplo n.º 29
0
def url_lang(lang):
    return match_language(lang, supported_languages).split('-')[0]
Exemplo n.º 30
0
def load_engine(engine_data):
    engine_name = engine_data['name']
    if '_' in engine_name:
        logger.error('Engine name contains underscore: "{}"'.format(engine_name))
        sys.exit(1)

    if engine_name.lower() != engine_name:
        logger.warn('Engine name is not lowercase: "{}", converting to lowercase'.format(engine_name))
        engine_name = engine_name.lower()
        engine_data['name'] = engine_name

    engine_module = engine_data['engine']

    try:
        engine = load_module(engine_module + '.py', engine_dir)
    except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError, ImportError, RuntimeError) as e:
        logger.exception('Fatal exception in engine "{}"'.format(engine_module))
        sys.exit(1)
    except:
        logger.exception('Cannot load engine "{}"'.format(engine_module))
        return None

    for param_name in engine_data:
        if param_name == 'engine':
            continue
        if param_name == 'categories':
            if engine_data['categories'] == 'none':
                engine.categories = []
            else:
                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
            continue
        setattr(engine, param_name, engine_data[param_name])

    for arg_name, arg_value in engine_default_args.items():
        if not hasattr(engine, arg_name):
            setattr(engine, arg_name, arg_value)

    # checking required variables
    for engine_attr in dir(engine):
        if engine_attr.startswith('_'):
            continue
        if engine_attr == 'inactive' and getattr(engine, engine_attr) is True:
            return None
        if getattr(engine, engine_attr) is None:
            logger.error('Missing engine config attribute: "{0}.{1}"'
                         .format(engine.name, engine_attr))
            sys.exit(1)

    # assign supported languages from json file
    if engine_data['name'] in ENGINES_LANGUAGES:
        setattr(engine, 'supported_languages', ENGINES_LANGUAGES[engine_data['name']])

    # find custom aliases for non standard language codes
    if hasattr(engine, 'supported_languages'):
        if hasattr(engine, 'language_aliases'):
            language_aliases = getattr(engine, 'language_aliases')
        else:
            language_aliases = {}

        for engine_lang in getattr(engine, 'supported_languages'):
            iso_lang = match_language(engine_lang, babel_langs, fallback=None)
            if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
               iso_lang not in getattr(engine, 'supported_languages'):
                language_aliases[iso_lang] = engine_lang

        setattr(engine, 'language_aliases', language_aliases)

    # assign language fetching method if auxiliary method exists
    if hasattr(engine, '_fetch_supported_languages'):
        setattr(engine, 'fetch_supported_languages',
                lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))

    engine.stats = {
        'result_count': 0,
        'search_count': 0,
        'engine_time': 0,
        'engine_time_count': 0,
        'score_count': 0,
        'errors': 0
    }

    if not engine.offline:
        engine.stats['page_load_time'] = 0
        engine.stats['page_load_count'] = 0

    for category_name in engine.categories:
        categories.setdefault(category_name, []).append(engine)

    if engine.shortcut in engine_shortcuts:
        logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
        sys.exit(1)

    engine_shortcuts[engine.shortcut] = engine.name

    return engine
Exemplo n.º 31
0
def render(template_name, override_theme=None, **kwargs):
    disabled_engines = request.preferences.engines.get_disabled()

    enabled_categories = set(category for engine_name in engines
                             for category in engines[engine_name].categories
                             if (engine_name, category) not in disabled_engines)

    if 'categories' not in kwargs:
        kwargs['categories'] = ['general']
        kwargs['categories'].extend(x for x in
                                    sorted(categories.keys())
                                    if x != 'general'
                                    and x in enabled_categories)

    if 'all_categories' not in kwargs:
        kwargs['all_categories'] = ['general']
        kwargs['all_categories'].extend(x for x in
                                        sorted(categories.keys())
                                        if x != 'general')

    if 'selected_categories' not in kwargs:
        kwargs['selected_categories'] = []
        for arg in request.args:
            if arg.startswith('category_'):
                c = arg.split('_', 1)[1]
                if c in categories:
                    kwargs['selected_categories'].append(c)

    if not kwargs['selected_categories']:
        cookie_categories = request.preferences.get_value('categories')
        for ccateg in cookie_categories:
            kwargs['selected_categories'].append(ccateg)

    if not kwargs['selected_categories']:
        kwargs['selected_categories'] = ['general']

    if 'autocomplete' not in kwargs:
        kwargs['autocomplete'] = request.preferences.get_value('autocomplete')

    if get_locale() in rtl_locales and 'rtl' not in kwargs:
        kwargs['rtl'] = True

    kwargs['searx_version'] = VERSION_STRING

    kwargs['method'] = request.preferences.get_value('method')

    kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))

    kwargs['language_codes'] = languages
    if 'current_language' not in kwargs:
        kwargs['current_language'] = match_language(request.preferences.get_value('language'),
                                                    LANGUAGE_CODES,
                                                    fallback=settings['search']['language'])

    # override url_for function in templates
    kwargs['url_for'] = url_for_theme

    kwargs['image_proxify'] = image_proxify

    kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None

    kwargs['get_result_template'] = get_result_template

    kwargs['theme'] = get_current_theme_name(override=override_theme)

    kwargs['template_name'] = template_name

    kwargs['cookies'] = request.cookies

    kwargs['errors'] = request.errors

    kwargs['instance_name'] = settings['general']['instance_name']

    kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')

    kwargs['unicode'] = unicode

    kwargs['preferences'] = request.preferences

    kwargs['scripts'] = set()
    for plugin in request.user_plugins:
        for script in plugin.js_dependencies:
            kwargs['scripts'].add(script)

    kwargs['styles'] = set()
    for plugin in request.user_plugins:
        for css in plugin.css_dependencies:
            kwargs['styles'].add(css)

    return render_template(
        '{}/{}'.format(kwargs['theme'], template_name), **kwargs)
Exemplo n.º 32
0
    def test_match_language(self):
        self.assertEqual(utils.match_language('es', ['es']), 'es')
        self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
        self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')

        aliases = {'en-GB': 'en-UK', 'he': 'iw'}

        # guess country
        self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
        self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
        self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
        self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
        self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
        self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')

        # language aliases
        self.assertEqual(utils.match_language('iw', ['he']), 'he')
        self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
        self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
        self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
        self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
        self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
Exemplo n.º 33
0
def _is_selected_language_supported(engine, preferences):
    language = preferences.get_value('language')
    return (language == 'all'
            or match_language(language,
                              getattr(engine, 'supported_languages', []),
                              getattr(engine, 'language_aliases', {}), None))
Exemplo n.º 34
0
def load_engine(engine_data):
    engine_name = engine_data['name']
    if '_' in engine_name:
        logger.error(
            'Engine name contains underscore: "{}"'.format(engine_name))
        sys.exit(1)

    if engine_name.lower() != engine_name:
        logger.warn(
            'Engine name is not lowercase: "{}", converting to lowercase'.
            format(engine_name))
        engine_name = engine_name.lower()
        engine_data['name'] = engine_name

    engine_module = engine_data['engine']

    try:
        engine = load_module(engine_module + '.py', engine_dir)
    except (SyntaxError, KeyboardInterrupt, SystemExit, SystemError,
            ImportError, RuntimeError):
        logger.exception(
            'Fatal exception in engine "{}"'.format(engine_module))
        sys.exit(1)
    except:
        logger.exception('Cannot load engine "{}"'.format(engine_module))
        return None

    for param_name, param_value in engine_data.items():
        if param_name == 'engine':
            pass
        elif param_name == 'categories':
            if param_value == 'none':
                engine.categories = []
            else:
                engine.categories = list(map(str.strip,
                                             param_value.split(',')))
        elif param_name == 'proxies':
            engine.proxies = get_proxy_cycles(param_value)
        else:
            setattr(engine, param_name, param_value)

    for arg_name, arg_value in engine_default_args.items():
        if not hasattr(engine, arg_name):
            setattr(engine, arg_name, arg_value)

    # checking required variables
    for engine_attr in dir(engine):
        if engine_attr.startswith('_'):
            continue
        if engine_attr == 'inactive' and getattr(engine, engine_attr) is True:
            return None
        if getattr(engine, engine_attr) is None:
            logger.error('Missing engine config attribute: "{0}.{1}"'.format(
                engine.name, engine_attr))
            sys.exit(1)

    # assign supported languages from json file
    if engine_data['name'] in ENGINES_LANGUAGES:
        setattr(engine, 'supported_languages',
                ENGINES_LANGUAGES[engine_data['name']])

    # find custom aliases for non standard language codes
    if hasattr(engine, 'supported_languages'):
        if hasattr(engine, 'language_aliases'):
            language_aliases = getattr(engine, 'language_aliases')
        else:
            language_aliases = {}

        for engine_lang in getattr(engine, 'supported_languages'):
            iso_lang = match_language(engine_lang, babel_langs, fallback=None)
            if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
               iso_lang not in getattr(engine, 'supported_languages'):
                language_aliases[iso_lang] = engine_lang

        setattr(engine, 'language_aliases', language_aliases)

    # language_support
    setattr(engine, 'language_support',
            len(getattr(engine, 'supported_languages', [])) > 0)

    # assign language fetching method if auxiliary method exists
    if hasattr(engine, '_fetch_supported_languages'):
        headers = {
            'User-Agent': gen_useragent(),
            'Accept-Language':
            'ja-JP,ja;q=0.8,en-US;q=0.5,en;q=0.3',  # bing needs a non-English language
        }
        setattr(
            engine, 'fetch_supported_languages',
            lambda: engine._fetch_supported_languages(
                get(engine.supported_languages_url, headers=headers)))

    engine.stats = {
        'sent_search_count': 0,  # sent search
        'search_count': 0,  # succesful search
        'result_count': 0,
        'engine_time': 0,
        'engine_time_count': 0,
        'score_count': 0,
        'errors': 0
    }

    engine_type = getattr(engine, 'engine_type', 'online')

    if engine_type != 'offline':
        engine.stats['page_load_time'] = 0
        engine.stats['page_load_count'] = 0

    # tor related settings
    if settings['outgoing'].get('using_tor_proxy'):
        # use onion url if using tor.
        if hasattr(engine, 'onion_url'):
            engine.search_url = engine.onion_url + getattr(
                engine, 'search_path', '')
    elif 'onions' in engine.categories:
        # exclude onion engines if not using tor.
        return None

    engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0)

    for category_name in engine.categories:
        categories.setdefault(category_name, []).append(engine)

    if engine.shortcut in engine_shortcuts:
        logger.error('Engine config error: ambigious shortcut: {0}'.format(
            engine.shortcut))
        sys.exit(1)

    engine_shortcuts[engine.shortcut] = engine.name

    return engine
Exemplo n.º 35
0
def url_lang(lang):
    lang_pre = lang.split('-')[0]
    if lang_pre == 'all' or lang_pre not in supported_languages and lang_pre not in language_aliases:
        return 'en'
    return match_language(lang, supported_languages, language_aliases).split('-')[0]
Exemplo n.º 36
0
 def _validate_selection(self, selection):
     if selection != "" and not match_language(
             # pylint: disable=no-member
             selection, self.choices, fallback=None):
         raise ValidationException('Invalid language code: "{0}"'.format(selection))