def _compare_searches(self, invenio_syntax, spires_syntax):
        """Determine if two queries parse to the same search command.

        For comparison of actual search results (regression testing), see the
        tests in the Inspire module.
        """
        parser = search_engine_query_parser.SearchQueryParenthesisedParser()
        converter = search_engine_query_parser.SpiresToInvenioSyntaxConverter()

        parsed_query = parser.parse_query(converter.convert_query(spires_syntax))
        #parse_query removes any parens that convert_query added, but then
        #we have to rejoin the list it returns and create basic searches

        result_obtained = create_basic_search_units(
            None,
            ' '.join(parsed_query).replace('+ ',''),
            '',
            None
            )

        # incase the desired result has parens
        parsed_wanted = parser.parse_query(invenio_syntax)
        result_wanted = create_basic_search_units(
            None,
            ' '.join(parsed_wanted).replace('+ ',''),
            '',
            None)

        assert result_obtained == result_wanted, \
                                  """SPIRES parsed as %s instead of %s""" % \
                                  (repr(result_obtained), repr(result_wanted))
        return
def calculate_hosted_collections_search_params(req,
                                               pattern_list,
                                               field,
                                               hosted_collections,
                                               verbosity_level=0):
    """Calculate the searching parameters for the selected hosted collections
    i.e. the actual hosted search engines and the basic search units"""

    from invenio.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(req, 'Hosted collections (calculate_hosted_collections_search_params): ', verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = ' + pattern)

    # if for any strange reason there is no pattern, just return
    # UPDATE : let search go on even there is no pattern (an empty pattern_list and field)
    #if not pattern: return (None, None)

    # calculate the basic search units
    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = ' + str(basic_search_units))

    # calculate the set of hosted search engines
    hosted_search_engines = select_hosted_search_engines(hosted_collections)
    vprint(3, 'hosted_search_engines = ' + str(hosted_search_engines))

    # no need really to print out a sorted list of the hosted search engines, is there? I'll leave this commented out
    #hosted_search_engines_list = external_collection_sort_engine_by_name(hosted_search_engines)
    #vprint(3, 'hosted_search_engines_list (sorted) : ' + str(hosted_search_engines_list))

    return (hosted_search_engines, basic_search_units)
 def _check(self, p, f, m, result_wanted):
     "Internal checking function calling create_basic_search_units."
     result_obtained = search_engine.create_basic_search_units(None, p, f, m)
     assert result_obtained == result_wanted, \
            'obtained %s instead of %s' % (repr(result_obtained),
                                           repr(result_wanted))
     return
def print_external_results_overview(req, current_collection, pattern_list, field,
        external_collection, verbosity_level=0, lang=CFG_SITE_LANG):
    """Print the external collection overview box. Return the selected external collections and parsed query"""
    from invenio.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(req, 'External collection (print_external_results_overview): ', verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = ' + pattern)

    if not pattern:
        return (None, None, None, None)

    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = ' + str(basic_search_units))

    (search_engines, seealso_engines) = select_external_engines(current_collection, external_collection)
    vprint(3, 'search_engines = ' + str(search_engines))
    vprint(3, 'seealso_engines = ' + str(seealso_engines))

    search_engines_list = external_collection_sort_engine_by_name(search_engines)
    vprint(3, 'search_engines_list (sorted) : ' + str(search_engines_list))
    html = template.external_collection_overview(lang, search_engines_list)
    req.write(html)

    return (search_engines, seealso_engines, pattern, basic_search_units)
 def test_search_Nucl_Phys_B75_1974_461_with_spaces(self):
     """websearch - search '  Nucl.  Phys.   B75   (1974)  461   ', with JournalHintService"""
     user_info = collect_user_info(1)
     pattern = '  Nucl.  Phys.   B75   (1974)  461   '
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assertEqual(response, (0, ''))
 def test_search_D_S_Salopek_J_R_Bond_and_J_M_Bardeen_Phys_Rev_D40_1989_1753(self):
     """websearch - search 'D.S. Salopek, J.R.Bond and J.M.Bardeen,Phys.Rev.D40(1989)1753.', with JournalHintService"""
     user_info = collect_user_info(1)
     pattern = 'D.S. Salopek, J.R.Bond and J.M.Bardeen,Phys.Rev.D40(1989)1753.'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assertEqual(response, (0, ''))
 def test_search_restricted_submission(self):
     """websearch - search for restricted submission, with SubmissionNameSearchService"""
     user_info = collect_user_info(get_uid_from_email('*****@*****.**'))
     pattern = 'submit thesis'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assert_(response[0] >=50)
     self.assert_('doctype=DEMOTHE' in response[1])
 def test_search_restricted_submission_as_guest(self):
     """websearch - search for restricted submission as guest, with SubmissionNameSearchService"""
     user_info = collect_user_info(0)
     pattern = 'submit thesis'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assertEqual(response,
                      (0, ''))
 def test_search_author_Tom(self):
     """websearch - search for an author using invenio sintax, with JournalHintService"""
     user_info = collect_user_info(0)
     pattern = 'author:Tom'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assertEqual(response,
                      (0, ''))
 def test_search_Pais_Valencia_utf8(self):
     """websearch - search 'País Valencià' utf8, with JournalHintService"""
     user_info = collect_user_info(0)
     pattern = u'País Valencià'.encode('utf8')
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assertEqual(response,
                      (0, ''))
 def test_search_public_collection_as_guest(self):
     """websearch - search for public collection as guest, from search page"""
     user_info = collect_user_info(0)
     pattern = 'Atlantis Times Arts'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assert_(response[0] > 50)
     self.assert_('collection/Atlantis%20Times%20Arts' in response[1])
 def test_search_Nucl_Instrum_Methods_Phys_Res_A_445_2000_456_462(self):
     """webseach - search 'Nucl. Instrum. Methods Phys. Res., A :445 2000 456-462', with JournalHintService"""
     user_info = collect_user_info(1)
     pattern = 'Nucl. Instrum. Methods Phys. Res., A :445 2000 456-462'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assert_(response[0] >=50)
     self.assert_('Development of photon beam diagnostics for VUV radiation from a SASE FEL' in response[1])
 def test_search_public_submission_as_guest(self):
     """websearch - search for public submission as guest, with SubmissionNameSearchService"""
     user_info = collect_user_info(0)
     pattern = 'submit article'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assert_(response[0] >= 50)
     self.assert_('doctype=DEMOART' in response[1])
 def test_search_empty_string(self):
     """websearch - search empty string, with JournalHintService"""
     user_info = collect_user_info(0)
     pattern = ''
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assertEqual(response,
                      (0, ''))
 def test_search_restricted_submission_category(self):
     """websearch - search for restricted submission, with SubmissionNameSearchService"""
     user_info = collect_user_info(1)
     pattern = 'submit news'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info, user_info=user_info, of='hb',
                                   cc=CFG_SITE_NAME, colls_to_search='', p=pattern,
                                   f='', search_units=search_units, ln='en')
     self.assert_(response[0] >=50)
     self.assert_('doctype=DEMOJRN' in response[1])
Пример #16
0
def calculate_external_search_params(pattern_list, field, hosted_colls):
    """Function that calculates the basic search units given the search pattern.
    Also returns a set of hosted collections engines."""

    from invenio.search_engine import create_basic_search_units
    from invenio.websearch_external_collections import bind_patterns
    from invenio.websearch_external_collections import select_hosted_search_engines as select_external_search_engines

    pattern = bind_patterns(pattern_list)
    basic_search_units = create_basic_search_units(None, pattern, field)

    external_search_engines = select_external_search_engines(hosted_colls)

    return (external_search_engines, basic_search_units)
Пример #17
0
 def test_search_public_collection_as_guest(self):
     """websearch - search for public collection as guest, from search page"""
     user_info = collect_user_info(0)
     pattern = 'Atlantis Times Arts'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info,
                                   user_info=user_info,
                                   of='hb',
                                   cc=CFG_SITE_NAME,
                                   colls_to_search='',
                                   p=pattern,
                                   f='',
                                   search_units=search_units,
                                   ln='en')
     self.assert_(response[0] > 50)
     self.assert_('collection/Atlantis%20Times%20Arts' in response[1])
Пример #18
0
 def test_search_restricted_submission(self):
     """websearch - search for restricted submission, with SubmissionNameSearchService"""
     user_info = collect_user_info(get_uid_from_email('*****@*****.**'))
     pattern = 'submit thesis'
     search_units = create_basic_search_units(None, pattern, '')
     response = self.plugin.answer(req=user_info,
                                   user_info=user_info,
                                   of='hb',
                                   cc=CFG_SITE_NAME,
                                   colls_to_search='',
                                   p=pattern,
                                   f='',
                                   search_units=search_units,
                                   ln='en')
     self.assert_(response[0] >= 50)
     self.assert_('doctype=DEMOTHE' in response[1])
Пример #19
0
def get_fulltext_terms_from_search_pattern(search_pattern):
    keywords = []
    if search_pattern is not None:
        from invenio.search_engine import create_basic_search_units
        for unit in create_basic_search_units(None,
                                              search_pattern.encode('utf-8'),
                                              None):
            bsu_o, bsu_p, bsu_f, bsu_m = unit[0], unit[1], unit[2], unit[3]
            if (bsu_o != '-' and bsu_f in [None, 'fulltext']):
                if bsu_m == 'a' and bsu_p.startswith('%') and bsu_p.endswith(
                        '%'):
                    # remove leading and training `%' representing partial phrase search
                    keywords.append(bsu_p[1:-1])
                else:
                    keywords.append(bsu_p)
    return keywords
Пример #20
0
def print_external_results_overview(req,
                                    current_collection,
                                    pattern_list,
                                    field,
                                    external_collection,
                                    verbosity_level=0,
                                    lang=CFG_SITE_LANG,
                                    print_overview=True):
    """Print the external collection overview box. Return the selected external collections and parsed query"""
    from invenio.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(
        req, 'External collection (print_external_results_overview): ',
        verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = %s' % cgi.escape(pattern))

    if not pattern:
        return (None, None, None, None)

    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = %s' % cgi.escape(repr(basic_search_units)))

    (search_engines,
     seealso_engines) = select_external_engines(current_collection,
                                                external_collection)
    vprint(3, 'search_engines = ' + str(search_engines))
    vprint(3, 'seealso_engines = ' + str(seealso_engines))

    search_engines_list = external_collection_sort_engine_by_name(
        search_engines)
    vprint(3, 'search_engines_list (sorted) : ' + str(search_engines_list))
    if print_overview:
        html = template.external_collection_overview(lang, search_engines_list)
        req.write(html)

    return (search_engines, seealso_engines, pattern, basic_search_units)
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field, ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    explicit_field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if not len(hitset):
        return ([], "", "", voutput)

    if pattern:
        pattern = " ".join(map(str, pattern))
        from invenio.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, explicit_field)
    else:
        return (None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    query = ""

    (ranked_result, matched_recs) = (None, None)

    # Ranks similar records
    if search_units[0][2] == 'recid':
        recid = search_units[0][1]
        if verbose > 0:
            voutput += "Ranked amount: %s<br/>" % ranked_result_amount

        try:
            (ranked_result, matched_recs) = solr_get_similar_ranked(recid, hitset, params, ranked_result_amount)
        except:
            register_exception()
            return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)

        # Cutoffs potentially large hitset
        it = itertools.islice(hitset, params['find_similar_to_recid']['hitset_cutoff'])
        hitset = intbitset(list(it))

    # Regular word similarity ranking
    else:
        for (operator, pattern, field, unit_type) in search_units:
            # Any field
            if field == '':
                field = 'global'
            # Field might not exist
            elif field not in params["fields"].keys():
                field = params["default_field"]

            if unit_type == "a":
                # Eliminates leading and trailing %
                if pattern[0] == "%":
                    pattern = pattern[1:-1]
                pattern = "\"" + pattern + "\""

            weighting = "^" + str(params["fields"][field]["weight"])

            if ':' in pattern:
                pattern = pattern.rsplit(':', 1)[1]
            query_part = field + ":" + pattern + weighting

            # Considers boolean operator from the second part on, allows negation from the first part on
            if query or operator == "-":
                query += " " + BOOLEAN_EQUIVALENTS[operator] + " "
            query += query_part + " "

        if verbose > 0:
            voutput += "Solr query: %s<br/>" % query

        try:
            (ranked_result, matched_recs) = solr_get_ranked(query, hitset, params, ranked_result_amount)
        except:
            register_exception()
            return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)

    if verbose > 0:
        voutput += "All matched records: %s<br/>" % matched_recs

    # Considers not ranked records
    not_ranked = hitset.difference(matched_recs)
    if not_ranked:
        lrecIDs = list(not_ranked)
        ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

    if verbose > 0:
        voutput += "Not ranked: %s<br/>" % not_ranked

    return (ranked_result, params["prefix"], params["postfix"], voutput)
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field, ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    explicit_field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if not len(hitset):
        return ([], "", "", voutput)

    if pattern:
        pattern = " ".join(map(str, pattern))
        from invenio.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, explicit_field)
    else:
        return (None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    query = ""

    (ranked_result, matched_recs) = (None, None)

    # Ranks similar records
    if search_units[0][2] == 'recid':
        recid = search_units[0][1]
        if verbose > 0:
            voutput += "Ranked amount: %s<br/>" % ranked_result_amount

        try:
            (ranked_result, matched_recs) = solr_get_similar_ranked(recid, hitset, params, ranked_result_amount)
        except:
            register_exception()
            return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)

        # Cutoffs potentially large hitset
        it = itertools.islice(hitset, params['find_similar_to_recid']['hitset_cutoff'])
        hitset = intbitset(list(it))

    # Regular word similarity ranking
    else:
        for (operator, pattern, field, unit_type) in search_units:
            # Any field
            if field == '':
                field = 'global'
            # Field might not exist
            elif field not in params["fields"].keys():
                field = params["default_field"]

            if unit_type == "a":
                # Eliminates leading and trailing %
                if pattern[0] == "%":
                    pattern = pattern[1:-1]
                pattern = "\"" + pattern + "\""

            weighting = "^" + str(params["fields"][field]["weight"])

            if ':' in pattern:
                pattern = pattern.rsplit(':', 1)[1]
            query_part = field + ":" + pattern + weighting

            # Considers boolean operator from the second part on, allows negation from the first part on
            if query or operator == "-":
                query += " " + BOOLEAN_EQUIVALENTS[operator] + " "
            query += query_part + " "

        if verbose > 0:
            voutput += "Solr query: %s<br/>" % query

        try:
            (ranked_result, matched_recs) = solr_get_ranked(query, hitset, params, ranked_result_amount)
        except:
            register_exception()
            return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)

    if verbose > 0:
        voutput += "All matched records: %s<br/>" % matched_recs

    # Considers not ranked records
    not_ranked = hitset.difference(matched_recs)
    if not_ranked:
        lrecIDs = list(not_ranked)
        ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

    if verbose > 0:
        voutput += "Not ranked: %s<br/>" % not_ranked

    # Similar-to-recid requires reverse order
    if search_units[0][2] == 'recid':
        ranked_result.reverse()

    return (ranked_result, params["prefix"], params["postfix"], voutput)
Пример #23
0
def format_template_show_preview_or_save(
    req,
    bft,
    ln=CFG_SITE_LANG,
    code=None,
    ln_for_preview=CFG_SITE_LANG,
    pattern_for_preview="",
    content_type_for_preview="text/html",
    save_action=None,
    navtrail="",
):
    """
    Print the preview of a record with a format template. To be included inside Format template
    editor. If the save_action has a value, then the code should also be saved at the same time

    @param req: the request object
    @param code: the code of a template to use for formatting
    @param ln: language
    @param ln_for_preview: the language for the preview (for bfo)
    @param pattern_for_preview: the search pattern to be used for the preview (for bfo)
    @param content_type_for_preview: the content-type to use to serve the preview page
    @param save_action: has a value if the code has to be saved
    @param bft: the filename of the template to save
    @param navtrail: navigation trail
    @return: a web page
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    (auth_code, auth_msg) = check_user(req, "cfgbibformat")
    if not auth_code:
        user_info = collect_user_info(req)
        uid = user_info["uid"]
        bft = wash_url_argument(bft, "str")
        if save_action is not None and code is not None:
            # save
            bibformatadminlib.update_format_template_code(bft, code=code)
        bibformat_engine.clear_caches()
        if code is None:
            code = bibformat_engine.get_format_template(bft)["code"]

        ln_for_preview = wash_language(ln_for_preview)
        pattern_for_preview = wash_url_argument(pattern_for_preview, "str")
        if pattern_for_preview == "":
            try:
                recID = search_pattern(p="-collection:DELETED").pop()
            except KeyError:
                return page(
                    title="No Document Found",
                    body="",
                    uid=uid,
                    language=ln_for_preview,
                    navtrail="",
                    lastupdated=__lastupdated__,
                    req=req,
                    navmenuid="search",
                )

            pattern_for_preview = "recid:%s" % recID
        else:
            try:
                recID = search_pattern(p=pattern_for_preview + " -collection:DELETED").pop()
            except KeyError:
                return page(
                    title="No Record Found for %s" % pattern_for_preview,
                    body="",
                    uid=uid,
                    language=ln_for_preview,
                    navtrail="",
                    lastupdated=__lastupdated__,
                    req=req,
                )

        units = create_basic_search_units(None, pattern_for_preview, None)
        keywords = [unit[1] for unit in units if unit[0] != "-"]
        bfo = bibformat_engine.BibFormatObject(
            recID=recID, ln=ln_for_preview, search_pattern=keywords, xml_record=None, user_info=user_info
        )
        (body, errors) = bibformat_engine.format_with_format_template(bft, bfo, verbose=7, format_template_code=code)

        if content_type_for_preview == "text/html":
            # Standard page display with CDS headers, etc.
            return page(
                title="",
                body=body,
                uid=uid,
                language=ln_for_preview,
                navtrail=navtrail,
                lastupdated=__lastupdated__,
                req=req,
                navmenuid="search",
            )
        else:
            # Output with chosen content-type.
            req.content_type = content_type_for_preview
            req.send_http_header()
            req.write(body)
    else:
        return page_not_authorized(req=req, text=auth_msg)
Пример #24
0
def format_template_show_preview_or_save(req,
                                         bft,
                                         ln=CFG_SITE_LANG,
                                         code=None,
                                         ln_for_preview=CFG_SITE_LANG,
                                         pattern_for_preview="",
                                         content_type_for_preview='text/html',
                                         save_action=None,
                                         navtrail=""):
    """
    Print the preview of a record with a format template. To be included inside Format template
    editor. If the save_action has a value, then the code should also be saved at the same time

    @param req: the request object
    @param code: the code of a template to use for formatting
    @param ln: language
    @param ln_for_preview: the language for the preview (for bfo)
    @param pattern_for_preview: the search pattern to be used for the preview (for bfo)
    @param content_type_for_preview: the content-type to use to serve the preview page
    @param save_action: has a value if the code has to be saved
    @param bft: the filename of the template to save
    @param navtrail: navigation trail
    @return: a web page
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    (auth_code, auth_msg) = check_user(req, 'cfgbibformat')
    if not auth_code:
        user_info = collect_user_info(req)
        uid = user_info['uid']
        bft = wash_url_argument(bft, 'str')
        if save_action is not None and code is not None:
            #save
            bibformatadminlib.update_format_template_code(bft, code=code)
        bibformat_engine.clear_caches()
        if code is None:
            code = bibformat_engine.get_format_template(bft)['code']

        ln_for_preview = wash_language(ln_for_preview)
        pattern_for_preview = wash_url_argument(pattern_for_preview, 'str')
        if pattern_for_preview == "":
            try:
                recID = search_pattern(p='-collection:DELETED').pop()
            except KeyError:
                return page(title="No Document Found",
                            body="",
                            uid=uid,
                            language=ln_for_preview,
                            navtrail="",
                            lastupdated=__lastupdated__,
                            req=req,
                            navmenuid='search')

            pattern_for_preview = "recid:%s" % recID
        else:
            try:
                recID = search_pattern(p=pattern_for_preview + \
                                        ' -collection:DELETED').pop()
            except KeyError:
                return page(title="No Record Found for %s" %
                            pattern_for_preview,
                            body="",
                            uid=uid,
                            language=ln_for_preview,
                            navtrail="",
                            lastupdated=__lastupdated__,
                            req=req)

        units = create_basic_search_units(None, pattern_for_preview, None)
        keywords = [unit[1] for unit in units if unit[0] != '-']
        bfo = bibformat_engine.BibFormatObject(recID=recID,
                                               ln=ln_for_preview,
                                               search_pattern=keywords,
                                               xml_record=None,
                                               user_info=user_info)
        body = format_with_format_template(bft,
                                           bfo,
                                           verbose=7,
                                           format_template_code=code)

        if content_type_for_preview == 'text/html':
            #Standard page display with CDS headers, etc.
            return page(title="",
                        body=body,
                        uid=uid,
                        language=ln_for_preview,
                        navtrail=navtrail,
                        lastupdated=__lastupdated__,
                        req=req,
                        navmenuid='search')
        else:
            #Output with chosen content-type.
            req.content_type = content_type_for_preview
            req.send_http_header()
            req.write(body)
    else:
        return page_not_authorized(req=req, text=auth_msg)
def word_similarity_xapian(pattern, hitset, params, verbose, field,
                           ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if pattern:
        xapian_init_databases()
        pattern = " ".join(map(str, pattern))
        from invenio.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, field)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    all_ranked_results = []
    included_hits = intbitset()
    excluded_hits = intbitset()
    for (operator, pattern, field,
         unit_type) in search_units:  #@UnusedVariable
        # Field might not exist
        if field not in params["fields"].keys():
            field = params["default_field"]

        if unit_type == "a":
            # Eliminates leading and trailing %
            if pattern[0] == "%":
                pattern = pattern[1:-1]
            pattern = "\"" + pattern + "\""

        (ranked_result_part,
         matched_recs) = xapian_get_ranked_index(field, pattern,
                                                 params["fields"][field],
                                                 hitset, ranked_result_amount)

        if verbose > 0:
            voutput += "Index %s: %s<br/>" % (field, ranked_result_part)
            voutput += "Index records %s: %s<br/>" % (field, matched_recs)

        # Excludes - results
        if operator == "-":
            excluded_hits = excluded_hits.union(matched_recs)
        # + and | are interpreted as OR
        else:
            included_hits = included_hits.union(matched_recs)
            all_ranked_results.extend(ranked_result_part)

    ranked_result = []
    if hitset:
        # Removes the excluded records
        result_hits = included_hits.difference(excluded_hits)

        # Avoids duplicate results and normalises scores
        ranked_result = get_greatest_ranked_records(all_ranked_results)
        ranked_result = get_normalized_ranking_scores(ranked_result)

        # Considers not ranked records
        not_ranked = hitset.difference(result_hits)
        if not_ranked:
            lrecIDs = list(not_ranked)
            ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

        if verbose > 0:
            voutput += "All matched records: %s<br/>" % result_hits
            voutput += "All ranked records: %s<br/>" % ranked_result
            voutput += "All not ranked records: %s<br/>" % not_ranked

        ranked_result.sort(lambda x, y: cmp(x[1], y[1]))
        return (ranked_result, params["prefix"], params["postfix"], voutput)

    return (ranked_result, "", "", voutput)
def word_similarity_xapian(pattern, hitset, params, verbose, field, ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if pattern:
        xapian_init_databases()
        pattern = " ".join(map(str, pattern))
        from invenio.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, field)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    all_ranked_results = []
    included_hits = intbitset()
    excluded_hits = intbitset()
    for (operator, pattern, field, unit_type) in search_units: #@UnusedVariable
        # Field might not exist
        if field not in params["fields"].keys():
            field = params["default_field"]

        if unit_type == "a":
            # Eliminates leading and trailing %
            if pattern[0] == "%":
                pattern = pattern[1:-1]
            pattern = "\"" + pattern + "\""

        (ranked_result_part, matched_recs) = xapian_get_ranked_index(field, pattern, params["fields"][field], hitset, ranked_result_amount)

        if verbose > 0:
            voutput += "Index %s: %s<br/>" % (field, ranked_result_part)
            voutput += "Index records %s: %s<br/>" % (field, matched_recs)

        # Excludes - results
        if operator == "-":
            excluded_hits = excluded_hits.union(matched_recs)
        # + and | are interpreted as OR
        else:
            included_hits = included_hits.union(matched_recs)
            all_ranked_results.extend(ranked_result_part)

    ranked_result = []
    if hitset:
        # Removes the excluded records
        result_hits = included_hits.difference(excluded_hits)

        # Avoids duplicate results and normalises scores
        ranked_result = get_greatest_ranked_records(all_ranked_results)
        ranked_result = get_normalized_ranking_scores(ranked_result)

        # Considers not ranked records
        not_ranked = hitset.difference(result_hits)
        if not_ranked:
            lrecIDs = list(not_ranked)
            ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

        if verbose > 0:
            voutput += "All matched records: %s<br/>" % result_hits
            voutput += "All ranked records: %s<br/>" % ranked_result
            voutput += "All not ranked records: %s<br/>" % not_ranked

        ranked_result.sort(lambda x, y: cmp(x[1], y[1]))
        return (ranked_result, params["prefix"], params["postfix"], voutput)

    return (ranked_result, "", "", voutput)