Exemplo n.º 1
0
def calculate_hosted_collections_search_params(req,
                                               pattern_list,
                                               field,
                                               hosted_collections,
                                               verbosity_level=0):
    """Calculate the searching parameters for the selected hosted collections
    i.e. the actual hosted search engines and the basic search units"""

    from invenio.legacy.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(req, 'Hosted collections (calculate_hosted_collections_search_params): ', verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = %s' % cgi.escape(pattern))

    # if for any strange reason there is no pattern, just return
    # UPDATE : let search go on even there is no pattern (an empty pattern_list and field)
    #if not pattern: return (None, None)

    # calculate the basic search units
    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = %s' % cgi.escape(repr(basic_search_units)))

    # calculate the set of hosted search engines
    hosted_search_engines = select_hosted_search_engines(hosted_collections)
    vprint(3, 'hosted_search_engines = ' + str(hosted_search_engines))

    # no need really to print out a sorted list of the hosted search engines, is there? I'll leave this commented out
    #hosted_search_engines_list = external_collection_sort_engine_by_name(hosted_search_engines)
    #vprint(3, 'hosted_search_engines_list (sorted) : ' + str(hosted_search_engines_list))

    return (hosted_search_engines, basic_search_units)
Exemplo n.º 2
0
def print_external_results_overview(req, current_collection, pattern_list, field,
        external_collection, verbosity_level=0, lang=CFG_SITE_LANG, print_overview=True):
    """Print the external collection overview box. Return the selected external collections and parsed query"""
    from invenio.legacy.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(req, 'External collection (print_external_results_overview): ', verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = %s' % cgi.escape(pattern))

    if not pattern:
        return (None, None, None, None)

    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = %s' % cgi.escape(repr(basic_search_units)))

    (search_engines, seealso_engines) = select_external_engines(current_collection, external_collection)
    vprint(3, 'search_engines = ' + str(search_engines))
    vprint(3, 'seealso_engines = ' + str(seealso_engines))

    search_engines_list = external_collection_sort_engine_by_name(search_engines)
    vprint(3, 'search_engines_list (sorted) : ' + str(search_engines_list))
    if print_overview:
        html = template.external_collection_overview(lang, search_engines_list)
        req.write(html)

    return (search_engines, seealso_engines, pattern, basic_search_units)
Exemplo n.º 3
0
def calculate_hosted_collections_search_params(req,
                                               pattern_list,
                                               field,
                                               hosted_collections,
                                               verbosity_level=0):
    """Calculate the searching parameters for the selected hosted collections
    i.e. the actual hosted search engines and the basic search units"""

    from invenio.legacy.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(
        req,
        'Hosted collections (calculate_hosted_collections_search_params): ',
        verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = %s' % cgi.escape(pattern))

    # if for any strange reason there is no pattern, just return
    # UPDATE : let search go on even there is no pattern (an empty pattern_list and field)
    #if not pattern: return (None, None)

    # calculate the basic search units
    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = %s' % cgi.escape(repr(basic_search_units)))

    # calculate the set of hosted search engines
    hosted_search_engines = select_hosted_search_engines(hosted_collections)
    vprint(3, 'hosted_search_engines = ' + str(hosted_search_engines))

    # no need really to print out a sorted list of the hosted search engines, is there? I'll leave this commented out
    #hosted_search_engines_list = external_collection_sort_engine_by_name(hosted_search_engines)
    #vprint(3, 'hosted_search_engines_list (sorted) : ' + str(hosted_search_engines_list))

    return (hosted_search_engines, basic_search_units)
Exemplo n.º 4
0
def get_answers(req, user_info, of, cc, colls_to_search, p, f, ln):
    """Return answers from all registered search services."""
    if p:
        from invenio.legacy.search_engine import create_basic_search_units

        search_units = create_basic_search_units(req, p, f)
    else:
        search_units = []

    def search_service_answers():
        for search_service in registry.services:
            yield search_service.answer(req, user_info, of, cc, colls_to_search, p, f, search_units, ln)

    nb_answers = 0
    best_relevance = None

    for answer_relevance, answer_html in sorted(search_service_answers(), reverse=True):
        nb_answers += 1
        if best_relevance is None:
            best_relevance = answer_relevance
        if best_relevance <= CFG_WEBSEARCH_SERVICE_MIN_RELEVANCE_TO_DISPLAY:
            # The answer is not relevant enough
            break
        if nb_answers > CFG_WEBSEARCH_SERVICE_MAX_NB_SERVICE_DISPLAY:
            # We have reached the max number of service to display
            break
        if best_relevance - answer_relevance > CFG_WEBSEARCH_SERVICE_MAX_RELEVANCE_DIFFERENCE:
            # The service gave an answer that is way less good than previous
            # ones.
            break
        yield answer_relevance, answer_html

        if answer_relevance == CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE:
            # The service assumes it has given the definitive answer
            break
Exemplo n.º 5
0
def calculate_external_search_params(pattern_list, field, hosted_colls):
    """Function that calculates the basic search units given the search pattern.
    Also returns a set of hosted collections engines."""

    from invenio.legacy.search_engine import create_basic_search_units
    from invenio.legacy.websearch_external_collections import bind_patterns
    from invenio.legacy.websearch_external_collections import select_hosted_search_engines as select_external_search_engines

    pattern = bind_patterns(pattern_list)
    basic_search_units = create_basic_search_units(None, pattern, field)

    external_search_engines = select_external_search_engines(hosted_colls)

    return (external_search_engines, basic_search_units)
Exemplo n.º 6
0
def calculate_external_search_params(pattern_list, field, hosted_colls):
    """Function that calculates the basic search units given the search pattern.
    Also returns a set of hosted collections engines."""

    from invenio.legacy.search_engine import create_basic_search_units
    from invenio.legacy.websearch_external_collections import bind_patterns
    from invenio.legacy.websearch_external_collections import select_hosted_search_engines as select_external_search_engines

    pattern = bind_patterns(pattern_list)
    basic_search_units = create_basic_search_units(None, pattern, field)

    external_search_engines = select_external_search_engines(hosted_colls)

    return (external_search_engines, basic_search_units)
Exemplo n.º 7
0
def get_answers(req, user_info, of, cc, colls_to_search, p, f, ln):
    """Return answers from all registered search services."""
    if p:
        from invenio.legacy.search_engine import create_basic_search_units
        search_units = create_basic_search_units(req, p, f)
    else:
        search_units = []

    def search_service_answers():
        for search_service in registry.services:
            yield search_service.answer(req, user_info, of, cc,
                                        colls_to_search, p, f, search_units,
                                        ln)

    nb_answers = 0
    best_relevance = None

    for answer_relevance, answer_html in sorted(search_service_answers(),
                                                reverse=True):
        nb_answers += 1
        if best_relevance is None:
            best_relevance = answer_relevance
        if best_relevance <= CFG_WEBSEARCH_SERVICE_MIN_RELEVANCE_TO_DISPLAY:
            # The answer is not relevant enough
            break
        if nb_answers > CFG_WEBSEARCH_SERVICE_MAX_NB_SERVICE_DISPLAY:
            # We have reached the max number of service to display
            break
        if best_relevance - answer_relevance > \
                CFG_WEBSEARCH_SERVICE_MAX_RELEVANCE_DIFFERENCE:
            # The service gave an answer that is way less good than previous
            # ones.
            break
        yield answer_relevance, answer_html

        if answer_relevance == \
                CFG_WEBSEARCH_SERVICE_MAX_SERVICE_ANSWER_RELEVANCE:
            # The service assumes it has given the definitive answer
            break
Exemplo n.º 8
0
def print_external_results_overview(req,
                                    current_collection,
                                    pattern_list,
                                    field,
                                    external_collection,
                                    verbosity_level=0,
                                    lang=CFG_SITE_LANG,
                                    print_overview=True):
    """Print the external collection overview box. Return the selected external collections and parsed query"""
    from invenio.legacy.search_engine import create_basic_search_units
    assert req
    vprint = get_verbose_print(
        req, 'External collection (print_external_results_overview): ',
        verbosity_level)

    pattern = bind_patterns(pattern_list)
    vprint(3, 'pattern = %s' % cgi.escape(pattern))

    if not pattern:
        return (None, None, None, None)

    basic_search_units = create_basic_search_units(None, pattern, field)
    vprint(3, 'basic_search_units = %s' % cgi.escape(repr(basic_search_units)))

    (search_engines,
     seealso_engines) = select_external_engines(current_collection,
                                                external_collection)
    vprint(3, 'search_engines = ' + str(search_engines))
    vprint(3, 'seealso_engines = ' + str(seealso_engines))

    search_engines_list = external_collection_sort_engine_by_name(
        search_engines)
    vprint(3, 'search_engines_list (sorted) : ' + str(search_engines_list))
    if print_overview:
        html = template.external_collection_overview(lang, search_engines_list)
        req.write(html)

    return (search_engines, seealso_engines, pattern, basic_search_units)
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field,
                         ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    explicit_field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if not len(hitset):
        return ([], "", "", voutput)

    if pattern:
        pattern = " ".join(map(str, pattern))
        from invenio.legacy.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, explicit_field)
    else:
        return (
            None,
            "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.",
            "", voutput)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    query = ""

    (ranked_result, matched_recs) = (None, None)

    # Ranks similar records
    if search_units[0][2] == 'recid':
        recid = search_units[0][1]
        if verbose > 0:
            voutput += "Ranked amount: %s<br/>" % ranked_result_amount

        try:
            (ranked_result,
             matched_recs) = solr_get_similar_ranked(recid, hitset, params,
                                                     ranked_result_amount)
        except:
            register_exception()
            return (
                None,
                "Records not ranked. An error occurred. Please check the query.",
                "", voutput)

        # Cutoffs potentially large hitset
        it = itertools.islice(hitset,
                              params['find_similar_to_recid']['hitset_cutoff'])
        hitset = intbitset(list(it))

    # Regular word similarity ranking
    else:
        for (operator, pattern, field, unit_type) in search_units:
            # Any field
            if field == '':
                field = 'global'
            # Field might not exist
            elif field not in params["fields"].keys():
                field = params["default_field"]

            if unit_type == "a":
                # Eliminates leading and trailing %
                if pattern[0] == "%":
                    pattern = pattern[1:-1]
                pattern = "\"" + pattern + "\""

            weighting = "^" + str(params["fields"][field]["weight"])

            if ':' in pattern:
                pattern = pattern.rsplit(':', 1)[1]
            query_part = field + ":" + pattern + weighting

            # Considers boolean operator from the second part on, allows negation from the first part on
            if query or operator == "-":
                query += " " + BOOLEAN_EQUIVALENTS[operator] + " "
            query += query_part + " "

        if verbose > 0:
            voutput += "Solr query: %s<br/>" % query

        try:
            (ranked_result,
             matched_recs) = solr_get_ranked(query, hitset, params,
                                             ranked_result_amount)
        except:
            register_exception()
            return (
                None,
                "Records not ranked. An error occurred. Please check the query.",
                "", voutput)

    if verbose > 0:
        voutput += "All matched records: %s<br/>" % matched_recs

    # Considers not ranked records
    not_ranked = hitset.difference(matched_recs)
    if not_ranked:
        lrecIDs = list(not_ranked)
        ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

    if verbose > 0:
        voutput += "Not ranked: %s<br/>" % not_ranked

    # Similar-to-recid requires reverse order
    if search_units[0][2] == 'recid':
        ranked_result.reverse()

    return (ranked_result, params["prefix"], params["postfix"], voutput)
Exemplo n.º 10
0
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field, ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    explicit_field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if not len(hitset):
        return ([], "", "", voutput)

    if pattern:
        pattern = " ".join(map(str, pattern))
        from invenio.legacy.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, explicit_field)
    else:
        return (None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    query = ""

    (ranked_result, matched_recs) = (None, None)

    # Ranks similar records
    if search_units[0][2] == 'recid':
        recid = search_units[0][1]
        if verbose > 0:
            voutput += "Ranked amount: %s<br/>" % ranked_result_amount

        try:
            (ranked_result, matched_recs) = solr_get_similar_ranked(recid, hitset, params, ranked_result_amount)
        except:
            register_exception()
            return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)

        # Cutoffs potentially large hitset
        it = itertools.islice(hitset, params['find_similar_to_recid']['hitset_cutoff'])
        hitset = intbitset(list(it))

    # Regular word similarity ranking
    else:
        for (operator, pattern, field, unit_type) in search_units:
            # Any field
            if field == '':
                field = 'global'
            # Field might not exist
            elif field not in params["fields"].keys():
                field = params["default_field"]

            if unit_type == "a":
                # Eliminates leading and trailing %
                if pattern[0] == "%":
                    pattern = pattern[1:-1]
                pattern = "\"" + pattern + "\""

            weighting = "^" + str(params["fields"][field]["weight"])

            if ':' in pattern:
                pattern = pattern.rsplit(':', 1)[1]
            query_part = field + ":" + pattern + weighting

            # Considers boolean operator from the second part on, allows negation from the first part on
            if query or operator == "-":
                query += " " + BOOLEAN_EQUIVALENTS[operator] + " "
            query += query_part + " "

        if verbose > 0:
            voutput += "Solr query: %s<br/>" % query

        try:
            (ranked_result, matched_recs) = solr_get_ranked(query, hitset, params, ranked_result_amount)
        except:
            register_exception()
            return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput)

    if verbose > 0:
        voutput += "All matched records: %s<br/>" % matched_recs

    # Considers not ranked records
    not_ranked = hitset.difference(matched_recs)
    if not_ranked:
        lrecIDs = list(not_ranked)
        ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

    if verbose > 0:
        voutput += "Not ranked: %s<br/>" % not_ranked

    # Similar-to-recid requires reverse order
    if search_units[0][2] == 'recid':
        ranked_result.reverse()

    return (ranked_result, params["prefix"], params["postfix"], voutput)
def word_similarity_xapian(pattern, hitset, params, verbose, field, ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if pattern:
        xapian_init_databases()
        pattern = " ".join(map(str, pattern))
        from invenio.legacy.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, field)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    all_ranked_results = []
    included_hits = intbitset()
    excluded_hits = intbitset()
    for (operator, pattern, field, unit_type) in search_units: #@UnusedVariable
        # Field might not exist
        if field not in params["fields"].keys():
            field = params["default_field"]

        if unit_type == "a":
            # Eliminates leading and trailing %
            if pattern[0] == "%":
                pattern = pattern[1:-1]
            pattern = "\"" + pattern + "\""

        (ranked_result_part, matched_recs) = xapian_get_ranked_index(field, pattern, params["fields"][field], hitset, ranked_result_amount)

        if verbose > 0:
            voutput += "Index %s: %s<br/>" % (field, ranked_result_part)
            voutput += "Index records %s: %s<br/>" % (field, matched_recs)

        # Excludes - results
        if operator == "-":
            excluded_hits = excluded_hits.union(matched_recs)
        # + and | are interpreted as OR
        else:
            included_hits = included_hits.union(matched_recs)
            all_ranked_results.extend(ranked_result_part)

    ranked_result = []
    if hitset:
        # Removes the excluded records
        result_hits = included_hits.difference(excluded_hits)

        # Avoids duplicate results and normalises scores
        ranked_result = get_greatest_ranked_records(all_ranked_results)
        ranked_result = get_normalized_ranking_scores(ranked_result)

        # Considers not ranked records
        not_ranked = hitset.difference(result_hits)
        if not_ranked:
            lrecIDs = list(not_ranked)
            ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

        if verbose > 0:
            voutput += "All matched records: %s<br/>" % result_hits
            voutput += "All ranked records: %s<br/>" % ranked_result
            voutput += "All not ranked records: %s<br/>" % not_ranked

        ranked_result.sort(lambda x, y: cmp(x[1], y[1]))
        return (ranked_result, params["prefix"], params["postfix"], voutput)

    return (ranked_result, "", "", voutput)
Exemplo n.º 12
0
def format_template_show_preview_or_save(req, bft, ln=CFG_SITE_LANG, code=None,
                                         ln_for_preview=CFG_SITE_LANG,
                                         pattern_for_preview="",
                                         content_type_for_preview='text/html',
                                         save_action=None,
                                         navtrail=""):
    """
    Print the preview of a record with a format template. To be included inside Format template
    editor. If the save_action has a value, then the code should also be saved at the same time

    @param req: the request object
    @param code: the code of a template to use for formatting
    @param ln: language
    @param ln_for_preview: the language for the preview (for bfo)
    @param pattern_for_preview: the search pattern to be used for the preview (for bfo)
    @param content_type_for_preview: the content-type to use to serve the preview page
    @param save_action: has a value if the code has to be saved
    @param bft: the filename of the template to save
    @param navtrail: navigation trail
    @return: a web page
    """
    ln = wash_language(ln)
    _ = gettext_set_language(ln)

    (auth_code, auth_msg) = check_user(req, 'cfgbibformat')
    if not auth_code:
        user_info = collect_user_info(req)
        uid = user_info['uid']
        bft = wash_url_argument(bft, 'str')
        if save_action is not None and code is not None:
            #save
            bibformatadminlib.update_format_template_code(bft, code=code)
        bibformat_engine.clear_caches()
        if code is None:
            code = bibformat_engine.get_format_template(bft)['code']

        ln_for_preview = wash_language(ln_for_preview)
        pattern_for_preview = wash_url_argument(pattern_for_preview, 'str')
        if pattern_for_preview == "":
            try:
                recID = search_pattern(p='-collection:DELETED').pop()
            except KeyError:
                return page(title="No Document Found",
                            body="",
                            uid=uid,
                            language=ln_for_preview,
                            navtrail = "",
                            lastupdated=__lastupdated__,
                            req=req,
                            navmenuid='search')

            pattern_for_preview = "recid:%s" % recID
        else:
            try:
                recID = search_pattern(p=pattern_for_preview + \
                                        ' -collection:DELETED').pop()
            except KeyError:
                return page(title="No Record Found for %s" % pattern_for_preview,
                            body="",
                            uid=uid,
                            language=ln_for_preview,
                            navtrail = "",
                            lastupdated=__lastupdated__,
                            req=req)

        units = create_basic_search_units(None, pattern_for_preview, None)
        keywords = [unit[1] for unit in units if unit[0] != '-']
        bfo = bibformat_engine.BibFormatObject(recID = recID,
                                               ln = ln_for_preview,
                                               search_pattern = keywords,
                                               xml_record = None,
                                               user_info = user_info)
        body = bibformat_engine.format_with_format_template(bft,
                                                            bfo,
                                                            verbose=7,
                                                            format_template_code=code)

        if content_type_for_preview == 'text/html':
            #Standard page display with CDS headers, etc.
            return page(title="",
                        body=body,
                        uid=uid,
                        language=ln_for_preview,
                        navtrail = navtrail,
                        lastupdated=__lastupdated__,
                        req=req,
                        navmenuid='search')
        else:
            #Output with chosen content-type.
            req.content_type = content_type_for_preview
            req.send_http_header()
            req.write(body)
    else:
        return page_not_authorized(req=req, text=auth_msg)
Exemplo n.º 13
0
def word_similarity_xapian(pattern, hitset, params, verbose, field,
                           ranked_result_amount):
    """
    Ranking a records containing specified words and returns a sorted list.
    input:
    hitset - a list of hits for the query found by search_engine
    verbose - verbose value
    field - field to search (selected in GUI)
    ranked_result_amount - amount of results to be ranked
    output:
    recset - a list of sorted records: [[23,34], [344,24], [1,01]]
    prefix - what to show before the rank value
    postfix - what to show after the rank value
    voutput - contains extra information, content dependent on verbose value
    """
    voutput = ""
    search_units = []

    if pattern:
        xapian_init_databases()
        pattern = " ".join(map(str, pattern))
        from invenio.legacy.search_engine import create_basic_search_units
        search_units = create_basic_search_units(None, pattern, field)

    if verbose > 0:
        voutput += "Hitset: %s<br/>" % hitset
        voutput += "Pattern: %s<br/>" % pattern
        voutput += "Search units: %s<br/>" % search_units

    all_ranked_results = []
    included_hits = intbitset()
    excluded_hits = intbitset()
    for (operator, pattern, field,
         unit_type) in search_units:  #@UnusedVariable
        # Field might not exist
        if field not in params["fields"].keys():
            field = params["default_field"]

        if unit_type == "a":
            # Eliminates leading and trailing %
            if pattern[0] == "%":
                pattern = pattern[1:-1]
            pattern = "\"" + pattern + "\""

        (ranked_result_part,
         matched_recs) = xapian_get_ranked_index(field, pattern,
                                                 params["fields"][field],
                                                 hitset, ranked_result_amount)

        if verbose > 0:
            voutput += "Index %s: %s<br/>" % (field, ranked_result_part)
            voutput += "Index records %s: %s<br/>" % (field, matched_recs)

        # Excludes - results
        if operator == "-":
            excluded_hits = excluded_hits.union(matched_recs)
        # + and | are interpreted as OR
        else:
            included_hits = included_hits.union(matched_recs)
            all_ranked_results.extend(ranked_result_part)

    ranked_result = []
    if hitset:
        # Removes the excluded records
        result_hits = included_hits.difference(excluded_hits)

        # Avoids duplicate results and normalises scores
        ranked_result = get_greatest_ranked_records(all_ranked_results)
        ranked_result = get_normalized_ranking_scores(ranked_result)

        # Considers not ranked records
        not_ranked = hitset.difference(result_hits)
        if not_ranked:
            lrecIDs = list(not_ranked)
            ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result

        if verbose > 0:
            voutput += "All matched records: %s<br/>" % result_hits
            voutput += "All ranked records: %s<br/>" % ranked_result
            voutput += "All not ranked records: %s<br/>" % not_ranked

        ranked_result.sort(lambda x, y: cmp(x[1], y[1]))
        return (ranked_result, params["prefix"], params["postfix"], voutput)

    return (ranked_result, "", "", voutput)