Пример #1
0
def get_parent_blog(recid):
    """ This function returns the parent blog of any 
    post or comment given its recid
    @param recid: comment or post recid
    @type recid: int
    @return: parent blog recid
    @rtype: int
    """

    coll = get_fieldvalues(recid, '980__a')[0]
    if coll == 'BLOG':
        return recid
    elif coll == 'COMMENT':
        parent_post = get_parent_post(recid)
        recid = parent_post

    parent_blog = get_fieldvalues(recid, '760__w')

    if parent_blog:
        if parent_blog[0]:
            return int(parent_blog[0])
        else:
            return None
    else:
        return None
Пример #2
0
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    book_year = ' '.join(get_fieldvalues(recid, "260__c"))

    book_author = '  '.join(get_fieldvalues(recid, "100__a") + \
                            get_fieldvalues(recid, "100__u"))

    book_isbn = ' '.join(get_fieldvalues(recid, "020__a"))

    book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \
                             get_fieldvalues(recid, "260__b"))


    return (book_title, book_year, book_author, book_isbn, book_editor)
Пример #3
0
def create_signature_blocks(record_id):
    """Create signature blocks given the record_id.

    :param int record_id: record-id
        Example:
            record_id = 1369415

    :return: list of strings representing phonetic blocks for author's and
        co-author's full names. Empty list, if no author's found
        Example:
            [u'ELj', u'MCLAGHLANm', u'VARBASTj']
    """
    signature_blocks = []

    author = get_fieldvalues(record_id, "100__a")
    coauthors = get_fieldvalues(record_id, "700__a")

    authors = []
    authors.extend(author)
    authors.extend(coauthors)

    for author in authors:
        signature_block = create_signature_block(author)
        if signature_block:
            signature_blocks.append(signature_block)

    return signature_blocks
Пример #4
0
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """
    # FIXME do the same that book_title_from_MARC

    book_title  = book_title_from_MARC(recid)

    book_year   =   ''.join(get_fieldvalues(recid, "260__c"))


    author_tags = ['100__a', '700__a', '721__a']
    book_author = ''

    for tag in author_tags:
        l = get_fieldvalues(recid, tag)
        for c in l:
            book_author += c + '; '
    book_author = book_author[:-2]

    l = get_fieldvalues(recid, "020__a")
    book_isbn = ''
    for isbn in l:
        book_isbn += isbn + ', '
    book_isbn = book_isbn[:-2]

    book_editor = ', '.join(get_fieldvalues(recid, "260__a") + \
                            get_fieldvalues(recid, "260__b"))

    return (book_title, book_year, book_author, book_isbn, book_editor)
Пример #5
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals',
                                         of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
                         # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0] # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
Пример #6
0
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """
    # FIXME do the same that book_title_from_MARC

    book_title  = book_title_from_MARC(recid)

    book_year   =   ''.join(get_fieldvalues(recid, "260__c"))


    author_tags = ['100__a', '700__a', '721__a']
    book_author = ''

    for tag in author_tags:
        l = get_fieldvalues(recid, tag)
        for c in l:
            book_author += c + '; '
    book_author = book_author[:-2]

    l = get_fieldvalues(recid, "020__a")
    book_isbn = ''
    for isbn in l:
        book_isbn += isbn + ', '
    book_isbn = book_isbn[:-2]

    book_editor = ', '.join(get_fieldvalues(recid, "260__a") + \
                            get_fieldvalues(recid, "260__b"))

    return (book_title, book_year, book_author, book_isbn, book_editor)
Пример #7
0
def us_affiliations(req):
    from invenio.search_engine_utils import get_fieldvalues

    req.content_type = "text/html"
    print >> req, pageheaderonly("USA affiliations", req=req)

    affiliations = []
    tmp = []
    tmp.extend(get_fieldvalues(perform_request_search(p="*"), '100__u', False))
    tmp.extend(get_fieldvalues(perform_request_search(p="*"), '100__v', False))
    tmp.extend(get_fieldvalues(perform_request_search(p="*"), '700__u', False))
    tmp.extend(get_fieldvalues(perform_request_search(p="*"), '700__v', False))

    def _find_usa(x):
        return ("United States of America" in x or "United States" in x
                or "USA" in x or "U.S.A" in x)

    affiliations.extend(filter(_find_usa, tmp))
    affiliations = set(affiliations)

    replaces = [('United States of America', ''), ("United States", ''),
                ("USA", ''), ("U.S.A", ''), ("University", ''), ("State", ''),
                ('Department of Physics and Astronomy', ""),
                ('Department of Physics', ""), ('Department', ''), (",", '')]

    affs = map(lambda x: multi_replace(x, replaces).strip(), affiliations)
    affiliations2 = zip(affiliations, affs)

    for a in sorted(affiliations2, key=lambda aff: aff[1]):
        req.write(a[0] + '<br />')
    req.write(pagefooteronly(req=req))
    return ""
Пример #8
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    if rec_oaiid_tag:
        rec_oaiid = rec_oaiid_tag[0]
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            file_ = open(filename)
            records = create_records(file_.read(), 0, 0)
            for i in range(0, len(records)):
                record, all_good = records[i][:2]
                if record and all_good:
                    if _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
                        return True
            file_.close()
        except IOError:
            continue
    return False
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    book_year = ' '.join(get_fieldvalues(recid, "260__c"))

    book_author = '  '.join(get_fieldvalues(recid, "100__a") + \
                            get_fieldvalues(recid, "100__u"))

    book_isbn = ' '.join(get_fieldvalues(recid, "020__a"))

    book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \
                             get_fieldvalues(recid, "260__b"))


    return (book_title, book_year, book_author, book_isbn, book_editor)
Пример #10
0
def check_records(records, amend_case=False):
    for record in records:
        for position, value in record.iterfields(['100__a', '700__a']):
            value = value.decode('utf8')
            new_value = NAME_CACHE.get(value)
            if new_value is None:
                search_value = value
                if ',' in value:
                    splitted_values = search_value.split(',', 1)
                    search_value = u"%s %s" % (splitted_values[1].strip(), splitted_values[0].strip())
                original_family_name = value.split(',')[0].strip()
                search_value = RE_SPACES.sub(' ', search_value).strip()
                if len(search_value.split()) < 3:
                    # Simple name
                    continue
                i = perform_request_search(p=u'author:"%s"' % search_value, cc='HepNames')
                possible_values = get_fieldvalues(i, '100__a', sort=False) + get_fieldvalues(i, '400__a', sort=False)
                for correct_value in possible_values:
                    correct_value = correct_value.decode('utf8')
                    if search_value.lower().endswith(" " + correct_value.lower().split(',')[0]):
                        family_name = correct_value.split(',')[0].strip()
                        if len(family_name) < len(original_family_name):
                            continue
                        first_name = search_value[:-(len(family_name) + 1)].strip()
                        new_value = u'%s, %s' % (family_name, first_name)
                        NAME_CACHE[value] = new_value
                        break
                else:
                    NAME_CACHE[value] = value
            if new_value:
                if amend_case and new_value == value:
                    continue
                elif new_value.lower() == value.lower():
                    continue
                record.amend_field(position, new_value.encode('utf8'))
Пример #11
0
def get_authors_from_record(
        recID, tags, use_bibauthorid=CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    if use_bibauthorid:
        authors = get_personids_from_record(recID)
    else:

        def get_id(table, author):
            """Get id from bibxxx tables"""
            return run_sql("SELECT id FROM bib%s WHERE value = %%s" % table,
                           (author, ))

        authors = set()

        def add_ids(table, authors_list):
            for author in authors_list:
                if len(authors) > CFG_SELFCITES_AUTHOR_LIMIT:
                    break
                authors.add(get_id(table, author))

        add_ids('10x', get_fieldvalues(recID, tags['first_author']))
        add_ids('70x', get_fieldvalues(recID, tags['additional_author']))
        add_ids('72x', get_fieldvalues(recID, tags['alternative_author_name']))

    return authors
Пример #12
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals',
                                         of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
                         # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0] # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
def get_video_thumbnail(recid):
    """ Returns the URL and ALT text for a video thumbnail of a given record
    """
    comments = get_fieldvalues(recid, '8564_z')
    descriptions =  get_fieldvalues(recid, '8564_y')
    urls = get_fieldvalues(recid, '8564_u')
    for pos, comment in enumerate(comments):
        if comment in ('SUGGESTIONTHUMB', 'BIGTHUMB', 'THUMB', 'SMALLTHUMB', 'POSTER'):
            return (urls[pos], descriptions[pos])
    return ("", "")
def get_journal_info(recid, tags):
    record_info = []
    # TODO: handle recors with multiple journals
    tagsvalues = {}  # we store the tags and their values here
                     # like c->444 y->1999 p->"journal of foo",
                     # v->20
    tmp = get_fieldvalues(recid, tags['publication']['journal'])
    if tmp:
        tagsvalues["p"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['volume'])
    if tmp:
        tagsvalues["v"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['year'])
    if tmp:
        tagsvalues["y"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['pages'])
    if tmp:
        # if the page numbers have "x-y" take just x
        pages = tmp[0]
        hpos = pages.find("-")
        if hpos > 0:
            pages = pages[:hpos]
        tagsvalues["c"] = pages

    # check if we have the required data
    ok = True
    for c in tags['publication_format']:
        if c in ('p', 'v', 'y', 'c'):
            if c not in tagsvalues:
                ok = False

    if ok:
        publ = format_journal(tags['publication_format'], tagsvalues)
        record_info += [publ]

        alt_volume = get_alt_volume(tagsvalues['v'])
        if alt_volume:
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['v'] = alt_volume
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

        # Add codens
        for coden in get_kb_mappings('CODENS',
                                     value=tagsvalues['p']):
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['p'] = coden['key']
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

    return record_info
Пример #15
0
def get_journal_info(recid, tags):
    record_info = []
    # TODO: handle recors with multiple journals
    tagsvalues = {}  # we store the tags and their values here
                     # like c->444 y->1999 p->"journal of foo",
                     # v->20
    tmp = get_fieldvalues(recid, tags['publication']['journal'])
    if tmp:
        tagsvalues["p"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['volume'])
    if tmp:
        tagsvalues["v"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['year'])
    if tmp:
        tagsvalues["y"] = tmp[0]
    tmp = get_fieldvalues(recid, tags['publication']['pages'])
    if tmp:
        # if the page numbers have "x-y" take just x
        pages = tmp[0]
        hpos = pages.find("-")
        if hpos > 0:
            pages = pages[:hpos]
        tagsvalues["c"] = pages

    # check if we have the required data
    ok = True
    for c in tags['publication_format']:
        if c in ('p', 'v', 'y', 'c'):
            if c not in tagsvalues:
                ok = False

    if ok:
        publ = format_journal(tags['publication_format'], tagsvalues)
        record_info += [publ]

        alt_volume = get_alt_volume(tagsvalues['v'])
        if alt_volume:
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['v'] = alt_volume
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

        # Add codens
        for coden in get_kb_mappings('CODENS',
                                     value=tagsvalues['p']):
            tagsvalues2 = tagsvalues.copy()
            tagsvalues2['p'] = coden['key']
            publ = format_journal(tags['publication_format'], tagsvalues2)
            record_info += [publ]

    return record_info
Пример #16
0
def get_authors_from_record(recID, tags):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    mainauth_list = get_fieldvalues(recID, tags['first_author'])
    coauth_list = get_fieldvalues(recID, tags['additional_author'])
    extauth_list = get_fieldvalues(recID, tags['alternative_author_name'])

    authors = set(mainauth_list)
    authors.update(coauth_list)
    authors.update(extauth_list)

    return authors
Пример #17
0
def format_element(bfo):
    """
    Returns all the links used as references in a post
    """

    current_language = bfo.lang
    links = bfo.fields('856_0')
    menu_out = ""

    if links:
        menu_out = '<h4>%s:</h4>' % cfg_messages["in_issue"][current_language]
        for link in links:
            link_url   = link.get('u')
            link_data  = link.get('y', link_url)
            link_title = link.get('z', '')

            menu_out += """<div class="litem"><a href="%s"%s>%s</a></div>""" % (link_url, link_title and ' title="%s"' % link_title or '' , link_data)

            recid_in_archive = perform_request_search(p = link_url, f = '520__u')
            # differentiate between links to sources inside
            # the archive and sources outside
            if recid_in_archive:
                menu_out += """<div style="padding-left:20px;"><h4>This content is also available in the archive: </h4>"""
                try:
                    title = get_fieldvalues(recid_in_archive[0], "245__a")[0]
                except:
                    title = "Untitled"
                menu_out += """<span class="moreinfo"><a href="%s/record/%s">%s</a></span></div></br>""" % (CFG_SITE_URL, recid_in_archive[0], title)

    return menu_out
Пример #18
0
    def tokenize_for_phrases(self, recID):
        """Get the country names and country codes of the institutions
           affiliated with the authors of the publication
        """

        # Get the name of the institution affiliated
        institution_names = []
        for tag in self.institution_tags:
            institution_names += get_fieldvalues(recID, tag)

        # Get the hitset of all the institutes
        institution_collection_hitset = intbitset([])
        for collection in CFG_WEBSEARCH_INSTITUTION_COLLECTIONS:
            institution_collection_hitset += get_collection_reclist(collection)

        # Search for the institution name and get a list of institution ids
        institution_ids = intbitset([])
        for name in institution_names:
            result_hitset = search_pattern(
                p=name,
                f=self.institution_name_field
            )
            institution_hitset = result_hitset & institution_collection_hitset
            institution_ids += list(institution_hitset)

        # Get the country tokens
        tokens = []
        for instID in institution_ids:
            tokens += self._tokenize_from_country_name_tag(instID)
            tokens += self._tokenize_from_country_code_tag(instID)

        # Remove duplicates
        tokens = list(set(tokens))

        return tokens
Пример #19
0
def get_index_strings_by_control_no(control_no):
    """extracts the index-relevant strings from the authority record referenced by
    the 'control_no' parameter and returns it as a list of strings

    :param control_no: a (INVENIO) MARC internal control_no to an authority record
    :type control_no: string (e.g. 'author:(ABC)1234')

    :param expected_type: the type of authority record expected
    :type expected_type: string, e.g. 'author', 'journal' etc.

    :return: list of index-relevant strings from the referenced authority record

    """

    from invenio.bibindex_engine import list_union

    #return value
    string_list = []
    #1. get recID and authority type corresponding to control_no
    rec_IDs = get_low_level_recIDs_from_control_no(control_no)
    #2. concatenate and return all the info from the interesting fields for this record
    for rec_id in rec_IDs:  # in case we get multiple authority records
        for tag in CFG_BIBAUTHORITY_AUTHORITY_SUBFIELDS_TO_INDEX.get(
                get_type_from_control_no(control_no), []):
            new_strings = get_fieldvalues(rec_id, tag)
            string_list = list_union(new_strings, string_list)
    #return
    return string_list
Пример #20
0
def task_run_core():
    """
    run daemon
    """

    #write_message("Getting expired loans ...", verbose=9)
    expired_loans = get_expired_loan()

    for (borrower_id, loan_id, recid) in expired_loans:
        (number_of_letters, date_letters) = get_overdue_letters_info(loan_id)

        if number_of_letters == 0:
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id)
        elif number_of_letters == 1 and send_second_recall(date_letters):
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id)
        elif number_of_letters == 2 and send_third_recall(date_letters):
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)
        else:
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)

        title = ''.join(get_fieldvalues(recid, "245__a"))
        subject = "LOAN RECALL: " + title
        update_expired_loan(loan_id)
        #write_message("Updating information about expired loans")
        send_overdue_letter(borrower_id, subject, content)
        #write_message("Sending overdue letter")

    #write_message("Done!!")

    return 1
Пример #21
0
    def tokenize_for_phrases(self, recID):
        """Get the country names and country codes of the institutions
           affiliated with the authors of the publication
        """

        # Get the name of the institution affiliated
        institution_names = []
        for tag in self.institution_tags:
            institution_names += get_fieldvalues(recID, tag)

        # Get the hitset of all the institutes
        institution_collection_hitset = intbitset([])
        for collection in CFG_WEBSEARCH_INSTITUTION_COLLECTIONS:
            institution_collection_hitset += get_collection_reclist(collection)

        # Search for the institution name and get a list of institution ids
        institution_ids = intbitset([])
        for name in institution_names:
            if name.strip():
                result_hitset = search_pattern(p=name,
                                               f=self.institution_name_field)
                institution_hitset = result_hitset & institution_collection_hitset
                institution_ids += list(institution_hitset)

        # Get the country tokens
        tokens = []
        for instID in institution_ids:
            tokens += self._tokenize_from_country_name_tag(instID)
            tokens += self._tokenize_from_country_code_tag(instID)

        # Remove duplicates
        tokens = list(set(tokens))

        return tokens
Пример #22
0
def main():
    to_update = []
    to_update_recids = []

    recids = perform_request_search(p="970__a:'SPIRES'")
    for done, recid in enumerate(recids):

        if done % 50 == 0:
            print 'done %s of %s' % (done + 1, len(recids))

        existing_fields = set(get_fieldvalues(recid, '980__a'))
        if 'HEP' in existing_fields:
            continue

        xml = create_our_record(recid)
        to_update.append(xml)
        to_update_recids.append(recid)

        if len(to_update) == 1000 or done + 1 == len(recids) and len(to_update) > 0:
            task_id = submit_task(to_update)
            print 'submitted task id %s' % task_id
            wait_for_task(task_id)
            task_id = submit_bibindex_task(to_update_recids)
            print 'submitted task id %s' % task_id
            wait_for_task(task_id)
            to_update = []
            to_update_recids = []
def create_xml(recid):
    """
    Searches for duplicate instances of 773 and keeps the good one.
    """
    tag = '773__'
    tag_value = tag + 'p'
    journal = get_fieldvalues(recid, tag_value)
    if len(journal) == 2 and journal[0] == journal[1]:
        record = get_record(recid)
        correct_record = {}
        record_add_field(correct_record, '001', \
            controlfield_value=str(recid))
        field_instances = record_get_field_instances(record, \
                              tag[0:3], tag[3], tag[4])
        correct_subfields = []
        c_value = False
        for field_instance in field_instances:
            for code, value in field_instance[0]:
                if value == 'To appear in the proceedings of':
                    pass
                elif (code, value) not in correct_subfields:
                    if code == 'c':
                        if c_value:
                            if len(value) > len(c_value):
                                c_value = value
                        else:
                            c_value = value
                    else:
                        correct_subfields.append((code, value))
        if c_value:
            correct_subfields.append(('c', c_value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                    subfields=correct_subfields)
        return print_rec(correct_record)
    return None
def get_record_year(recid):
    record_date = []
    for tag in DATE_TAGS:
        record_date = get_fieldvalues(recid, tag)
        if record_date:
            break
    return record_date
    def setUp(self, recid=RECID, arxiv_id=ARXIV_ID):
        self.recid = recid
        self.arxiv_id = arxiv_id
        self.arxiv_version = 1
        self.bibupload_xml = """<record>
            <controlfield tag="001">%s</controlfield>
            <datafield tag="037" ind1=" " ind2=" ">
                <subfield code="a">arXiv:%s</subfield>
                <subfield code="9">arXiv</subfield>
                <subfield code="c">hep-ph</subfield>
            </datafield>
        </record>""" % (recid, arxiv_id)

        bibtask.setup_loggers()
        bibtask.task_set_task_param('verbose', 0)
        recs = bibupload.xml_marc_to_records(self.bibupload_xml)
        status, dummy, err = bibupload.bibupload(recs[0], opt_mode='correct')
        assert status == 0, err.strip()
        assert len(get_fieldvalues(recid, '037__a')) == 1

        def mocked_oai_harvest_get(prefix, baseurl, harvestpath,
                                   verb, identifier):
            temp_fd, temp_path = mkstemp()
            os.write(temp_fd, ARXIV_OAI_RESPONSE % self.arxiv_version)
            os.close(temp_fd)
            return [temp_path]

        self.oai_harvest_get = oai_harvest_daemon.oai_harvest_get
        oai_harvest_daemon.oai_harvest_get = mocked_oai_harvest_get

        def mocked_get_oai_src(params={}):
            return [{'baseurl': ''}]

        self.get_oai_src = oai_harvest_dblayer.get_oai_src
        oai_harvest_dblayer.get_oai_src = mocked_get_oai_src
def task_run_core():
    """
    run daemon
    """

    #write_message("Getting expired loans ...", verbose=9)
    expired_loans = get_expired_loan()

    for (borrower_id, loan_id, recid) in expired_loans:
        (number_of_letters, date_letters) = get_overdue_letters_info(loan_id)

        if number_of_letters == 0:
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id)
        elif number_of_letters == 1 and send_second_recall(date_letters):
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id)
        elif number_of_letters == 2 and send_third_recall(date_letters):
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)
        else:
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)

        title = ''.join(get_fieldvalues(recid, "245__a"))
        subject = "LOAN RECALL: " + title
        update_expired_loan(loan_id)
        #write_message("Updating information about expired loans")
        send_overdue_letter(borrower_id, subject, content)
        #write_message("Sending overdue letter")

    #write_message("Done!!")

    return 1
Пример #27
0
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    return book_title
def format_element(bfo, newline=False, show_doi=False):
    """
    Prints link to proceedings if the proceedings exist.
    If not, nothing is returned.

    @param newline: if True, add <br /> at the end
    @param show_doi: if True, show DOI of the proceeding in brackets
    """
    cnum = str(bfo.field('111__g'))
    out = ""
    if not cnum:
        #Something is wrong, return empty string
        return out
    search_result = perform_request_search(p="773__w:" + cnum + " and 980__a:proceedings")
    if search_result:
        if len(search_result) > 1:
            # multiple proceedings
            proceedings = []
            for i, recID in enumerate(search_result):
                # check for the DOI and put it in brackets in the output
                doi = get_fieldvalues(recID, '0247_a')
                if show_doi and doi:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a> (DOI: <a href="https://doi.org/%(doi)s">%(doi)s</a>)'
                                       % {'ID': recID, 'number': i+1, 'doi': doi[0]})
                else:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a>' % {'ID': recID, 'number': i+1})
            out = 'Proceedings: '
            out += ', '.join(proceedings)
        elif len(search_result) == 1:
            # only one proceeding
            out += '<a href="/record/' + str(search_result[0]) + '">Proceedings</a>'
        if newline:
            out += '<br/>'
    return out
Пример #29
0
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, '999'):
            raise RecordHasReferences('Record has references and overwrite ' \
                                      'mode is disabled: %s' % recid)

    if get_fieldvalues(recid, '999C59'):
        raise RecordHasReferences('Record has been curated: %s' % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME,
                                   dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, 'w')
    temp_file.write(references_xml.encode('utf-8'))
    temp_file.close()

    # Update record
    task_low_level_submission('bibupload', 'refextract', '-P', '5', '-c',
                              temp_path)
Пример #30
0
def get_record_year(recid):
    record_date = []
    for tag in DATE_TAGS:
        record_date = get_fieldvalues(recid, tag)
        if record_date:
            break
    return record_date
Пример #31
0
def format_element(bfo, newline=False, show_doi=False):
    """
    Prints link to proceedings if the proceedings exist.
    If not, nothing is returned.

    @param newline: if True, add <br /> at the end
    @param show_doi: if True, show DOI of the proceeding in brackets
    """
    cnum = str(bfo.field('111__g'))
    out = ""
    if not cnum:
        #Something is wrong, return empty string
        return out
    search_result = search_pattern(p="773__w:" + cnum + " and 980__a:proceedings")
    if search_result:
        if len(search_result) > 1:
            # multiple proceedings
            proceedings = []
            for i, recID in enumerate(search_result):
                # check for the DOI and put it in brackets in the output
                doi = get_fieldvalues(recID, '0247_a')
                if show_doi and doi:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a> (DOI: <a href="http://dx.doi.org/%(doi)s">%(doi)s</a>)'
                                       % {'ID': recID, 'number': i+1, 'doi': doi[0]})
                else:
                    proceedings.append('<a href="/record/%(ID)s">#%(number)s</a>' % {'ID': recID, 'number': i+1})
            out = 'Proceedings: '
            out += ', '.join(proceedings)
        elif len(search_result) == 1:
            # only one proceeding
            out += '<a href="/record/' + str(search_result[0]) + '">Proceedings</a>'
        if newline:
            out += '<br/>'
    return out
def create_xml(recid):
    """
    Searches for duplicate instances of 773 and keeps the good one.
    """
    tag = '773__'
    tag_value = tag + 'p'
    journal = get_fieldvalues(recid, tag_value)
    if len(journal) == 2 and journal[0] == journal[1]:
        record = get_record(recid)
        correct_record = {}
        record_add_field(correct_record, '001', \
            controlfield_value=str(recid))
        field_instances = record_get_field_instances(record, \
                              tag[0:3], tag[3], tag[4])
        correct_subfields = []
        c_value = False
        for field_instance in field_instances:
            for code, value in field_instance[0]:
                if value == 'To appear in the proceedings of':
                    pass
                elif (code, value) not in correct_subfields:
                    if code == 'c':
                        if c_value:
                            if len(value) > len(c_value):
                                c_value = value
                        else:
                            c_value = value
                    else:
                        correct_subfields.append((code, value))
        if c_value:
            correct_subfields.append(('c', c_value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4], \
                    subfields=correct_subfields)
        return print_rec(correct_record)
    return None
Пример #33
0
def update_references(recid, overwrite=True):
    """Update references for a record

    First, we extract references from a record.
    Then, we are not updating the record directly but adding a bibupload
    task in -c mode which takes care of updating the record.

    Parameters:
    * recid: the id of the record
    """

    if not overwrite:
        # Check for references in record
        record = get_record(recid)
        if record and record_has_field(record, "999"):
            raise RecordHasReferences("Record has references and overwrite " "mode is disabled: %s" % recid)

    if get_fieldvalues(recid, "999C59"):
        raise RecordHasReferences("Record has been curated: %s" % recid)

    # Parse references
    references_xml = extract_references_from_record_xml(recid)

    # Save new record to file
    (temp_fd, temp_path) = mkstemp(prefix=CFG_REFEXTRACT_FILENAME, dir=CFG_TMPSHAREDDIR)
    temp_file = os.fdopen(temp_fd, "w")
    temp_file.write(references_xml.encode("utf-8"))
    temp_file.close()

    # Update record
    task_low_level_submission("bibupload", "refextract", "-P", "5", "-c", temp_path)
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    return book_title
Пример #35
0
def get_index_strings_by_control_no(control_no):
    """extracts the index-relevant strings from the authority record referenced by
    the 'control_no' parameter and returns it as a list of strings

    @param control_no: a (INVENIO) MARC internal control_no to an authority record
    @type control_no: string (e.g. 'author:(ABC)1234')

    @param expected_type: the type of authority record expected
    @type expected_type: string, e.g. 'author', 'journal' etc.

    @return: list of index-relevant strings from the referenced authority record

    """

    from invenio.bibindex_engine import list_union

    #return value
    string_list = []
    #1. get recID and authority type corresponding to control_no
    rec_IDs = get_low_level_recIDs_from_control_no(control_no)
    #2. concatenate and return all the info from the interesting fields for this record
    for rec_id in rec_IDs: # in case we get multiple authority records
        for tag in CFG_BIBAUTHORITY_AUTHORITY_SUBFIELDS_TO_INDEX.get(get_type_from_control_no(control_no)):
            new_strings = get_fieldvalues(rec_id, tag)
            string_list = list_union(new_strings, string_list)
    #return
    return string_list
Пример #36
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    if rec_oaiid_tag:
        rec_oaiid = rec_oaiid_tag[0]
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            if CFG_BIBEDIT_QUEUE_CHECK_METHOD == "regexp":
                # check via regexp: this is fast, but may not be precise
                re_match_001 = re.compile('<controlfield tag="001">%s</controlfield>' % (recid))
                re_match_oaiid = re.compile(
                    '<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                    % (OAIID_TAG[0:3], rec_oaiid)
                )
                re_match_sysno = re.compile(
                    '<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                    % (SYSNO_TAG[0:3], rec_sysno)
                )
                file_content = open(filename).read()
                if re_match_001.search(file_content):
                    return True
                if rec_oaiid_tag:
                    if re_match_oaiid.search(file_content):
                        return True
                if rec_sysno_tag:
                    if re_match_sysno.search(file_content):
                        return True
            else:
                # by default, check via bibrecord: this is accurate, but may be slow
                file_ = open(filename)
                records = create_records(file_.read(), 0, 0)
                for i in range(0, len(records)):
                    record, all_good = records[i][:2]
                    if record and all_good:
                        if _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
                            return True
                file_.close()
        except IOError:
            continue
    return False
Пример #37
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    if rec_oaiid_tag:
        rec_oaiid = rec_oaiid_tag[0]
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            if CFG_BIBEDIT_QUEUE_CHECK_METHOD == 'regexp':
                # check via regexp: this is fast, but may not be precise
                re_match_001 = re.compile(
                    '<controlfield tag="001">%s</controlfield>' % (recid))
                re_match_oaiid = re.compile(
                    '<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                    % (OAIID_TAG[0:3], rec_oaiid))
                re_match_sysno = re.compile(
                    '<datafield tag="%s" ind1=" " ind2=" ">(\s*<subfield code="a">\s*|\s*<subfield code="9">\s*.*\s*</subfield>\s*<subfield code="a">\s*)%s'
                    % (SYSNO_TAG[0:3], rec_sysno))
                file_content = open(filename).read()
                if re_match_001.search(file_content):
                    return True
                if rec_oaiid_tag:
                    if re_match_oaiid.search(file_content):
                        return True
                if rec_sysno_tag:
                    if re_match_sysno.search(file_content):
                        return True
            else:
                # by default, check via bibrecord: this is accurate, but may be slow
                file_ = open(filename)
                records = create_records(file_.read(), 0, 0)
                for i in range(0, len(records)):
                    record, all_good = records[i][:2]
                    if record and all_good:
                        if _record_has_id_p(record, recid, rec_oaiid,
                                            rec_sysno):
                            return True
                file_.close()
        except IOError:
            continue
    return False
Пример #38
0
def get_authors_from_record(recID, tags,
                                use_bibauthorid=CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    if use_bibauthorid:
        authors = get_personids_from_record(recID)
    else:
        authors_list = chain(
             get_fieldvalues(recID, tags['first_author']),
             get_fieldvalues(recID, tags['additional_author']),
             get_fieldvalues(recID, tags['alternative_author_name']))
        authors = set(hash(author) for author in list(authors_list)[:20])

    return authors
 def test_bibauthority_get_dependent_records_for_control_no(self):
     """bibauthority - test get_dependent_records_for_control_no()"""
     control_no_field = CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD
     control_nos = get_fieldvalues(118, control_no_field)
     count = 0
     for control_no in control_nos:
         count += len(get_dependent_records_for_control_no(control_no))
     self.assertTrue(count)
Пример #40
0
def is_periodical(recid):
    rec_type = get_fieldvalues(recid, "690C_a")
    if len(rec_type) > 0:
        for value in rec_type:
            if value == 'PERI':
                return True

    return False
def get_authors_from_record(recID, tags,
                                use_bibauthorid=CFG_BIBRANK_SELFCITES_USE_BIBAUTHORID):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    if use_bibauthorid:
        authors = get_personids_from_record(recID)
    else:
        authors_list = chain(
             get_fieldvalues(recID, tags['first_author']),
             get_fieldvalues(recID, tags['additional_author']),
             get_fieldvalues(recID, tags['alternative_author_name']))
        authors = set(hash(author) for author in list(authors_list)[:21])

    return authors
Пример #42
0
def is_periodical(recid):
    rec_type = get_fieldvalues(recid, "690C_a")
    if len(rec_type) > 0:
        for value in rec_type:
            if value == 'PERI':
                return True

    return False
 def test_bibauthority_get_dependent_records_for_control_no(self):
     """bibauthority - test get_dependent_records_for_control_no()"""
     control_no_field = CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD
     control_nos = get_fieldvalues(118, control_no_field)
     count = 0
     for control_no in control_nos:
         count += len(get_dependent_records_for_control_no(control_no))
     self.assertTrue(count)
def main():
    bibcatalog_system = BibCatalogSystemRT()

    max_id = run_sql("SELECT max(id) FROM bibrec")[0][0]
    recids = xrange(1, max_id + 1)

    for done, recid in enumerate(recids):
        if recid < 1124295:
            continue
        if recid >= 1183878:
			break
        if get_fieldvalues(recid, '999C6a') \
                                      and not get_fieldvalues(recid, '999C59'):
            print '* processing', recid
            create_ticket(recid, bibcatalog_system)

        if (done + 1) % 25 == 0:
            print 'done %s of %s' % (done + 1, len(recids))
def goto(type, document='', number=0, lang='en', modif=0):
    today = time.strftime('%Y-%m-%d')
    if type == 'SSR':
        ## We would like a CERN Staff Rules and Regulations
        recids = perform_request_search(cc='Staff Rules and Regulations', f="925__a:1996-01-01->%s 925__b:%s->9999-99-99" % (today, today))
        recid = recids[-1]
        reportnumber = get_fieldvalues(recid, '037__a')[0]
        edition = int(reportnumber[-2:]) ## e.g. CERN-STAFF-RULES-ED08
        return BibRecDocs(recid).get_bibdoc(make_cern_ssr_docname(lang, edition, modif)).get_file('.pdf').get_url()
    elif type == "OPER-CIRC":
        recids = perform_request_search(cc="Operational Circulars", p="reportnumber=\"CERN-OPER-CIRC-%s-*\"" % number, sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation_en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation_fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving_en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving_fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex_fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex_en')
            elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
    elif type == 'ADMIN-CIRC':
        recids = perform_request_search(cc="Administrative Circulars", p="reportnumber=\"CERN-ADMIN-CIRC-%s-*\"" % number, sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation-en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation-fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving-en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving-fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex-fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex-en')
            elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
def search_result_info(recid):
    """Return report number of a record or if it doen't exist return the recid
    itself.
    """
    report_numbers = get_fieldvalues(recid, '037__a')
    if len(report_numbers) == 0:
        return "#"+str(recid)
    else:
        return report_numbers[0]
def get_authors_from_record(recID, tags):
    """Get all authors for a record

    We need this function because there's 3 different types of authors
    and to fetch each one of them we need look through MARC tags
    """
    authors = get_personids_from_bibrec(recID)

    if not authors:
        mainauth_list = get_fieldvalues(recID, tags['first_author'])
        coauth_list   = get_fieldvalues(recID, tags['additional_author'])
        extauth_list  = get_fieldvalues(recID, tags['alternative_author_name'])

        authors = set(mainauth_list)
        authors.update(coauth_list)
        authors.update(extauth_list)

    return authors
Пример #48
0
def search_result_info(recid):
    """Return report number of a record or if it doen't exist return the recid
    itself.
    """
    report_numbers = get_fieldvalues(recid, '037__a')
    if len(report_numbers) == 0:
        return "#" + str(recid)
    else:
        return report_numbers[0]
def get_video_duration(recid):
    """ Return the duration of a video
    """
    duration = get_fieldvalues(recid, '950__d')
    if duration:
        duration = duration[0]
        duration = timecode_to_seconds(duration)
        return human_readable_time(duration)
    else:
        return ""
Пример #50
0
def extract_arxiv_ids_from_recid(recid):
    for report_number in get_fieldvalues(recid, '037__a'):
        if not report_number.startswith('arXiv'):
            continue

        # Extract arxiv id
        try:
            yield report_number.split(':')[1]
        except IndexError:
            raise InvalidReportNumber(report_number)
Пример #51
0
def check_record_for_refextract(recid):
    if get_fieldvalues(recid, '999C6v'):
        # References extracted by refextract
        if get_fieldvalues(recid, '999C59'):
            # They have been curated
            # To put in the HP and create ticket in the future
            needs_submitting = False
        else:
            # They haven't been curated, we safely extract from the new pdf
            needs_submitting = True
    elif not get_fieldvalues(recid, '999C5_'):
        # No references in the record, we can safely extract
        # new references
        needs_submitting = True
    else:
        # Old record, with either no curated references or references
        # curated by SLAC. We cannot distinguish, so we do nothing
        needs_submitting = False

    return needs_submitting
Пример #52
0
def get_control_nos_from_recID(recID):
    """
    get a list of control numbers from the record ID

    @param recID: record ID
    @type recID: int

    @return: authority record control number
    """
    return get_fieldvalues(recID, CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD,
                           repetitive_values=False)
Пример #53
0
def check_records(records, amend_case=False):
    for record in records:
        for position, value in record.iterfields(['100__a', '700__a']):
            value = value.decode('utf8')
            new_value = NAME_CACHE.get(value)
            if new_value is None:
                search_value = value
                if ',' in value:
                    splitted_values = search_value.split(',', 1)
                    search_value = u"%s %s" % (splitted_values[1].strip(),
                                               splitted_values[0].strip())
                original_family_name = value.split(',')[0].strip()
                search_value = RE_SPACES.sub(' ', search_value).strip()
                if len(search_value.split()) < 3:
                    # Simple name
                    continue
                i = perform_request_search(p=u'author:"%s"' % search_value,
                                           cc='HepNames')
                possible_values = get_fieldvalues(
                    i, '100__a', sort=False) + get_fieldvalues(
                        i, '400__a', sort=False)
                for correct_value in possible_values:
                    correct_value = correct_value.decode('utf8')
                    if search_value.lower().endswith(
                            " " + correct_value.lower().split(',')[0]):
                        family_name = correct_value.split(',')[0].strip()
                        if len(family_name) < len(original_family_name):
                            continue
                        first_name = search_value[:-(len(family_name) +
                                                     1)].strip()
                        new_value = u'%s, %s' % (family_name, first_name)
                        NAME_CACHE[value] = new_value
                        break
                else:
                    NAME_CACHE[value] = value
            if new_value:
                if amend_case and new_value == value:
                    continue
                elif new_value.lower() == value.lower():
                    continue
                record.amend_field(position, new_value.encode('utf8'))
Пример #54
0
def bst_hal():
    doi_map, arxiv_map = get_hal_maps()
    matchable_records = get_record_ids_to_export()
    write_message("Total matchable records: %s" % len(matchable_records))
    hal_records = get_hal_records()
    write_message("Already matched records: %s" % len(hal_records))
    bibupload = ChunkedBibUpload(mode='a', notimechange=True, user='******')
    tot_records = matchable_records - hal_records
    write_message("Records to be checked: %s" % len(tot_records))
    for i, recid in enumerate(tot_records):
        if i % 1000 == 0:
            write_message("%s records done out of %s" % (i, len(tot_records)))
            task_sleep_now_if_required()
        dois = get_fieldvalues(recid, tag='0247__a', sort=False)
        arxivs = get_fieldvalues(recid, tag='037__a', sort=False)
        matched_hal = [doi_map[doi] for doi in dois if doi in doi_map]
        matched_hal += [
            arxiv_map[arxiv] for arxiv in arxivs if arxiv in arxiv_map
        ]

        # Let's assert that we matched only one single hal document at most
        matched_hal_id = set(id(entry) for entry in matched_hal)
        if len(matched_hal) > 1:
            write_message(
                "WARNING: record %s matches more than 1 HAL record: %s" %
                (recid, matched_hal),
                stream=sys.stderr)
            continue
        elif not matched_hal:
            continue
        hal_id = matched_hal[0]['halId_s']

        rec = {}
        record_add_field(rec, '001', controlfield_value=str(recid))
        record_add_field(rec, '035', subfields=[('a', hal_id), ('9', 'HAL')])

        write_message("Record %s matched HAL record %s" % (recid, hal_id))

        bibupload.add(record_xml_output(rec))

    return True
Пример #55
0
def holdings(recid):
    acquisition_src = get_fieldvalues(recid, AMZ_ACQUISITION_IDENTIFIER_TAG)
    if acquisition_src and acquisition_src[0].startswith(
            'AMZ') and db.has_copies(recid) == False:
        action = "proposal"
    else:
        action = "borrowal"

    holdings_information = perform_get_holdings_information(recid, request, \
                                                            action=action, ln=g.ln)
    return render_template('record_holdings.html',
                           holdings_information=holdings_information)
Пример #56
0
def get_item_info_for_search_result(recid):
    """
    Get the item's info from MARC in order to create a
    search result with more details

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's informations (author, editor and number of copies)
    """

    book_author = '  '.join(get_fieldvalues(recid, "100__a") + \
                            get_fieldvalues(recid, "100__u"))

    book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \
                             get_fieldvalues(recid, "260__b") + \
                             get_fieldvalues(recid, "260__c"))

    book_copies = '  '.join(get_fieldvalues(recid, "964__a"))

    if not book_copies:
        book_copies = db.get_number_copies(recid)

    book_infos = (book_author, book_editor, book_copies)

    return book_infos
Пример #57
0
def record_can_overwrite_refs(recid):
    if get_fieldvalues(recid, '999C6v'):
        # References extracted by refextract
        if 'curator' in [
                value.lower().strip() for value in get_fieldvalues(
                    recid, '999C59', repetitive_values=False) if value.strip()
        ]:
            # They have been curated
            # To put in the HP and create ticket in the future
            needs_submitting = False
        else:
            # They haven't been curated, we safely extract from the new pdf
            needs_submitting = True
    elif not get_fieldvalues(recid, '999C5_'):
        # No references in the record, we can safely extract
        # new references
        needs_submitting = True
    else:
        # Old record, with either no curated references or references
        # curated by SLAC. We cannot distinguish, so we do nothing
        needs_submitting = False

    return needs_submitting