Пример #1
0
 def get_facets_for_query(self, qid, limit=20, parent=None):
     """Return facet data."""
     from invenio.legacy.search_engine import get_most_popular_field_values,\
         get_field_tags
     return get_most_popular_field_values(self.get_recids(qid),
                                          get_field_tags(
                                              self.name))[0:limit]
Пример #2
0
 def get_facets_for_query(self, qid, limit=20, parent=None):
     """Return facet data."""
     from invenio.legacy.search_engine import get_most_popular_field_values,\
         get_field_tags
     return get_most_popular_field_values(self.get_recids(qid),
                                          get_field_tags(self.name)
                                          )[0:limit]
Пример #3
0
def format_element(bfo,
                   tag="909C4",
                   label="",
                   separator="<br/> ",
                   description_location=""):
    """
    Return an HTML link to the DOI.

    @param tag: field (tag + indicators) where the DOI can be found, if not specified, we take the tags asociated to the 'doi' logical field
    @param separator: the separator between multiple tags
    @param description_location: where should the description be added: if empty, the description is not printed; possible values: 'front', 'label', 'end'
    @param label: label to use for the DOI link. If not specified, use the DOI number as label for the link.
    """
    fields = []
    doi_tags = get_field_tags('doi')  #first check the tags table
    for doi_tag in doi_tags:
        fields = bfo.fields(
            doi_tag[:5])  #we want only the tag, without the subfields
        if fields:
            break
    if not fields:
        fields = bfo.fields(tag)
    doi_list = []
    for field in fields:
        if field.get('2', 'DOI') == 'DOI' and 'a' in field:
            desc = field.get('y', '')
            front = end = ''
            if desc:
                if description_location == 'front':
                    front = desc + ': '
                elif description_location == 'label':
                    label = desc
                elif description_location == 'end':
                    end = ' (' + desc + ')'
                else:
                    front = end = ''
            doi_list.append((field['a'], front, end, label))

    if doi_list:
        doi_link = """%s<a href="http://dx.doi.org/%s" title="DOI" target="_blank">%s</a>%s"""
        return separator.join([
            doi_link % (escape(front), escape(
                doi, True), label and escape(label) or escape(doi), end)
            for (doi, front, end, label) in doi_list
        ])
    else:
        return ""
Пример #4
0
def get_existing_records_for_reportnumber(reportnum):
    """Given a report number, return a list of recids of real (live) records
       that are associated with it.
       That's to say if the record does not exist (prehaps deleted, for example)
       its recid will now be returned in the list.

       @param reportnum: the report number for which recids are to be returned.
       @type reportnum: string
       @return: list of recids.
       @rtype: list
       @note: If reportnum was not found in phrase indexes, the function searches
           directly in bibxxx tables via MARC tags, so that the record does not
           have to be phrase-indexed.
    """
    existing_records = []  ## List of the report numbers of existing records

    ## Get list of records with the report-number: (first in phrase indexes)
    reclist = list(search_pattern(req=None,
                                  p=reportnum,
                                  f="reportnumber",
                                  m="e"))
    if not reclist:
        # Maybe the record has not been indexed yet? (look in bibxxx tables)
        tags = get_field_tags("reportnumber")
        for tag in tags:
            recids = list(search_pattern(req=None,
                                         p=reportnum,
                                         f=tag,
                                         m="e"))
            reclist.extend(recids)

        reclist = dict.fromkeys(reclist).keys() # Remove duplicates

    ## Loop through all recids retrieved and testing to see whether the record
    ## actually exists or not. If none of the records exist, there is no record
    ## with this reportnumber; If more than one of the records exists, then
    ## there are multiple records with the report-number; If only one record
    ## exists, then everything is OK,
    for rec in reclist:
        rec_exists = record_exists(rec)
        if rec_exists == 1:
            ## This is a live record record the recid and augment the counter of
            ## records found:
            existing_records.append(rec)
    return existing_records
Пример #5
0
def get_nbhits_in_bibxxx(p, f, in_hitset=None):
    """Return number of hits for 'word' inside words index for field 'f'."""
    # determine browse field:
    if not f and string.find(p, ":") > 0:
        # does 'p' contain ':'?
        f, p = string.split(p, ":", 1)

    # FIXME: quick hack for the journal index
    if f == 'journal':
        return get_nbhits_in_bibwords(p, f)

    # construct 'tl' which defines the tag list (MARC tags) to search in:
    tl = []
    if f[0].isdigit() and f[1].isdigit():
        tl.append(f)  # 'f' seems to be okay as it starts by two digits
    else:
        # deduce desired MARC tags on the basis of chosen 'f'
        tl = get_field_tags(f)
    # start searching:
    hitlist = intbitset()
    for t in tl:
        # deduce into which bibxxx table we will search:
        digit1, digit2 = int(t[0]), int(t[1])
        model = getattr(models, 'Bib{0}{1}x'.format(digit1, digit2))

        if len(t) != 6 or t[-1:] == '%':
            # only the beginning of field 't' is defined, so add wildcard
            # character:
            condition = model.tag.like(t + '%')
        else:
            condition = model.tag == t

        res = model.query.join(model.bibrecs).filter(condition).values(
            'id_bibrec')

        hitlist |= intbitset([row[0] for row in res])

    if in_hitset is None:
        nbhits = len(hitlist)
    else:
        nbhits = len(hitlist & in_hitset)
    return nbhits
Пример #6
0
def get_nbhits_in_bibxxx(p, f, in_hitset=None):
    """Return number of hits for 'word' inside words index for field 'f'."""
    # determine browse field:
    if not f and string.find(p, ":") > 0:
        # does 'p' contain ':'?
        f, p = string.split(p, ":", 1)

    # FIXME: quick hack for the journal index
    if f == 'journal':
        return get_nbhits_in_bibwords(p, f)

    # construct 'tl' which defines the tag list (MARC tags) to search in:
    tl = []
    if f[0].isdigit() and f[1].isdigit():
        tl.append(f)  # 'f' seems to be okay as it starts by two digits
    else:
        # deduce desired MARC tags on the basis of chosen 'f'
        tl = get_field_tags(f)
    # start searching:
    hitlist = intbitset()
    for t in tl:
        # deduce into which bibxxx table we will search:
        digit1, digit2 = int(t[0]), int(t[1])
        model = getattr(models, 'Bib{0}{1}x'.format(digit1, digit2))

        if len(t) != 6 or t[-1:] == '%':
            # only the beginning of field 't' is defined, so add wildcard
            # character:
            condition = model.tag.like(t + '%')
        else:
            condition = model.tag == t

        res = model.query.join(
            model.bibrecs).filter(condition).values('id_bibrec')

        hitlist |= intbitset([row[0] for row in res])

    if in_hitset is None:
        nbhits = len(hitlist)
    else:
        nbhits = len(hitlist & in_hitset)
    return nbhits
Пример #7
0
def references_nb_counts():
    """Get number of references for the record `recid`."""
    recid = request.view_args.get('recid')
    if recid is None:
        return

    from invenio.legacy.bibrecord import record_get_field_instances
    from invenio.legacy.search_engine import get_field_tags
    from invenio.modules.records.api import get_record

    if not CFG_CERN_SITE:
        reftag = ""
        reftags = get_field_tags("reference")
        if reftags:
            reftag = reftags[0]
        tmprec = get_record(recid)
        if reftag and len(reftag) > 4:
            return len(record_get_field_instances(tmprec, reftag[0:3],
                       reftag[3], reftag[4]))
    return 0
Пример #8
0
def references_nb_counts():
    """Get number of references for the record `recid`."""
    recid = request.view_args.get('recid')
    if recid is None:
        return

    from invenio.legacy.bibrecord import record_get_field_instances
    from invenio.legacy.search_engine import get_field_tags
    from invenio.modules.records.api import get_record

    if not CFG_CERN_SITE:
        reftag = ""
        reftags = get_field_tags("reference")
        if reftags:
            reftag = reftags[0]
        tmprec = get_record(recid)
        if reftag and len(reftag) > 4:
            return len(
                record_get_field_instances(tmprec, reftag[0:3], reftag[3],
                                           reftag[4]))
    return 0
Пример #9
0
def get_field_data(recids, method_name, definition):
    """Returns the data associated with the definition for recids.
    The returned dictionary will contain ONLY the recids for which
    a value has been found in the database.
    """
    recids_copy = recids.copy()
    #if we are dealing with a MARC definition
    if definition.startswith('MARC'):
        tags = definition.replace('MARC:', '').replace(' ',
                                                       '').strip().split(',')
        if not tags:
            write_message('No MARC tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following MARC tags will be queried: %s' %tags, \
                      verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    #if we are dealing with tags (ex: author, title)
    elif definition.startswith('FIELD'):
        tags = get_field_tags(definition.replace('FIELD:', '').strip())
        if not tags:
            write_message('No tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following tags will be queried: %s' % tags,
                      verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    # if we are dealing with ranking data
    elif definition.startswith('RNK'):
        rnk_name = definition.replace('RNK:', '').strip()
        return get_data_for_definition_rnk(method_name, rnk_name)
    # if we are looking into bibrec table
    elif definition.startswith('BIBREC'):
        column_name = definition.replace('BIBREC:', '').strip()
        return get_data_for_definition_bibrec(column_name, recids_copy)
    else:
        write_message("The definition %s for method % could not be recognized" \
                      %(definition, method_name), stream=sys.stderr)
        return {}
Пример #10
0
def format_element(bfo, tag="909C4", label="", separator="<br/> ", description_location=""):
    """
    Return an HTML link to the DOI.

    @param tag: field (tag + indicators) where the DOI can be found, if not specified, we take the tags asociated to the 'doi' logical field
    @param separator: the separator between multiple tags
    @param description_location: where should the description be added: if empty, the description is not printed; possible values: 'front', 'label', 'end'
    @param label: label to use for the DOI link. If not specified, use the DOI number as label for the link.
    """
    fields = []
    doi_tags = get_field_tags('doi') #first check the tags table
    for doi_tag in doi_tags:
        fields = bfo.fields(doi_tag[:5]) #we want only the tag, without the subfields
        if fields:
            break
    if not fields:
        fields = bfo.fields(tag)
    doi_list = [] 
    for field in fields:
        if field.get('2', 'DOI') == 'DOI' and 'a' in field:
            desc = field.get('y', '')
            front = end = ''
            if desc:
                if description_location == 'front':
                    front = desc + ': '
                elif description_location == 'label':
                    label = desc
                elif description_location == 'end':
                    end = ' (' + desc + ')'
                else:
                    front = end = ''
            doi_list.append((field['a'], front, end, label))

    if doi_list:
        doi_link = """%s<a href="http://dx.doi.org/%s" title="DOI" target="_blank">%s</a>%s"""
        return separator.join([doi_link % (escape(front), escape(doi, True), label and escape(label) or escape(doi), end) for (doi, front, end, label) in doi_list])
    else:
        return ""
Пример #11
0
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    title_tags = get_field_tags('title')

    book_title = ''
    i = 0
    while book_title == '' and i < len(title_tags):
        l = get_fieldvalues(recid, title_tags[i])
        for candidate in l:
            book_title = book_title + candidate + ': '
        i += 1

    book_title = book_title[:-2]

    return book_title
Пример #12
0
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    title_tags = get_field_tags('title')

    book_title = ''
    i = 0
    while book_title == '' and i < len(title_tags):
        l = get_fieldvalues(recid, title_tags[i])
        for candidate in l:
            book_title = book_title + candidate + ': '
        i += 1

    book_title = book_title[:-2]

    return book_title
Пример #13
0
def get_field_data(recids, method_name, definition):
    """Returns the data associated with the definition for recids.
    The returned dictionary will contain ONLY the recids for which
    a value has been found in the database.
    """
    recids_copy = recids.copy()
    #if we are dealing with a MARC definition
    if definition.startswith('MARC'):
        tags = definition.replace('MARC:', '').replace(' ', '').strip().split(',')
        if not tags:
            write_message('No MARC tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following MARC tags will be queried: %s' %tags, \
                      verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    #if we are dealing with tags (ex: author, title)
    elif definition.startswith('FIELD'):
        tags = get_field_tags(definition.replace('FIELD:', '').strip())
        if not tags:
            write_message('No tags found for method %s.' \
                          %method_name, verbose=5)
            return {}
        write_message('The following tags will be queried: %s' %tags, verbose=5)
        return get_data_for_definition_marc(tags, recids_copy)
    # if we are dealing with ranking data
    elif definition.startswith('RNK'):
        rnk_name = definition.replace('RNK:', '').strip()
        return get_data_for_definition_rnk(method_name, rnk_name)
    # if we are looking into bibrec table
    elif definition.startswith('BIBREC'):
        column_name = definition.replace('BIBREC:', '').strip()
        return get_data_for_definition_bibrec(column_name, recids_copy)
    else:
        write_message("The definition %s for method % could not be recognized" \
                      %(definition, method_name), stream=sys.stderr)
        return {}