Exemplo n.º 1
0
def format_element(bfo, limit, separator=' ; ', extension='[...]', print_links="yes"):
    """
    Prints the list of editors of a record.

    @param limit: the maximum number of editors to display
    @param separator: the separator between editors.
    @param extension: a text printed if more editors than 'limit' exist
    @param print_links: if yes, print the editors as HTML link to their publications
    """
    from urllib import quote
    from invenio.config import CFG_SITE_URL
    from invenio.legacy import bibrecord

    authors = bibrecord.record_get_field_instances(bfo.get_record(), '100')

    editors = [bibrecord.field_get_subfield_values(author, 'a')[0]
               for author in authors if len(bibrecord.field_get_subfield_values(author, "e")) > 0 and bibrecord.field_get_subfield_values(author, "e")[0]=="ed." ]

    if print_links.lower() == "yes":
        editors = ['<a href="' + CFG_SITE_URL + '/search?f=author&p=' + \
                   quote(editor) + \
                   '&amp;ln='+ bfo.lang + \
                   '">' + editor + '</a>'
                   for editor in editors]

    if limit.isdigit() and len(editors) > int(limit):
        return separator.join(editors[:int(limit)]) + extension

    elif len(editors) > 0:
        return separator.join(editors)
Exemplo n.º 2
0
def format_element(bfo, limit, separator=' ; ', extension='[...]', print_links="yes"):
    """
    Prints the list of editors of a record.

    @param limit: the maximum number of editors to display
    @param separator: the separator between editors.
    @param extension: a text printed if more editors than 'limit' exist
    @param print_links: if yes, print the editors as HTML link to their publications
    """
    from urllib import quote
    from invenio.config import CFG_BASE_URL
    from invenio.legacy import bibrecord

    authors = bibrecord.record_get_field_instances(bfo.get_record(), '100')

    editors = [bibrecord.field_get_subfield_values(author, 'a')[0]
               for author in authors if len(bibrecord.field_get_subfield_values(author, "e")) > 0 and bibrecord.field_get_subfield_values(author, "e")[0]=="ed." ]

    if print_links.lower() == "yes":
        editors = ['<a href="' + CFG_BASE_URL + '/search?f=author&p=' + \
                   quote(editor) + \
                   '&amp;ln='+ bfo.lang + \
                   '">' + editor + '</a>'
                   for editor in editors]

    if limit.isdigit() and len(editors) > int(limit):
        return separator.join(editors[:int(limit)]) + extension

    elif len(editors) > 0:
        return separator.join(editors)
Exemplo n.º 3
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec,
                                 field_tag,
                                 field_instance[1],
                                 field_instance[2],
                                 subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(
                    template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code,
                                field_get_subfield_values(
                                    template_field_instance, code)[0])
    record_order_subfields(rec)
    return rec
Exemplo n.º 4
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code, field_get_subfield_values(template_field_instance, code)[0]
                            )
    record_order_subfields(rec)
    return rec
Exemplo n.º 5
0
def _create_ticket(recid, bibcatalog_system, queue):
    subject = "Refs for #%s" % recid

    if CFG_INSPIRE_SITE:
        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_core = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'CORE':
                    in_core = True
                if collection == 'arXiv':
                    # Do not create tickets for arxiv papers
                    # Tickets for arxiv papers are created in bibcatelog
                    write_message("arXiv paper", verbose=1)
                    return

        # Only create tickets for HEP
        if not in_core:
            write_message("not in hep", verbose=1)
            return

        # Do not create tickets for old records
        creation_date = run_sql(
            """SELECT creation_date FROM bibrec
                                   WHERE id = %s""", [recid])[0][0]
        if creation_date < datetime.now() - timedelta(days=30 * 4):
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'c'):
                if category.startswith('astro-ph'):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

    text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, recid)
    bibcatalog_system.ticket_submit(subject=subject,
                                    queue=queue,
                                    text=text,
                                    recordid=recid)
Exemplo n.º 6
0
def _create_ticket(recid, bibcatalog_system, queue):
    subject = "Refs for #%s" % recid

    if CFG_INSPIRE_SITE:
        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_core = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'CORE':
                    in_core = True
                if collection == 'arXiv':
                    # Do not create tickets for arxiv papers
                    # Tickets for arxiv papers are created in bibcatelog
                    write_message("arXiv paper", verbose=1)
                    return

        # Only create tickets for HEP
        if not in_core:
            write_message("not in hep", verbose=1)
            return

        # Do not create tickets for old records
        creation_date = run_sql("""SELECT creation_date FROM bibrec
                                   WHERE id = %s""", [recid])[0][0]
        if creation_date < datetime.now() - timedelta(days=30*4):
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'c'):
                if category.startswith('astro-ph'):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

    text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL,
                                                    recid)
    bibcatalog_system.ticket_submit(subject=subject,
                                    queue=queue,
                                    text=text,
                                    recordid=recid)
Exemplo n.º 7
0
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
Exemplo n.º 8
0
    def check_arxiv(recid):
        record = get_record(recid)

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'a'):
                if category.startswith('arXiv'):
                    return True
        return False
Exemplo n.º 9
0
def record_in_collection(record, collection):
    """
    Returns True/False if given record is in a given collection (980__a).
    """
    for collection_tag in record_get_field_instances(record, "980"):
        for coll in field_get_subfield_values(collection_tag, 'a'):
            if coll.lower() == collection.lower():
                return True
    return False
Exemplo n.º 10
0
def record_in_collection(record, collection):
    """
    Returns True/False if given record is in a given collection (980__a).
    """
    for collection_tag in record_get_field_instances(record, "980"):
        for coll in field_get_subfield_values(collection_tag, 'a'):
            if coll.lower() == collection.lower():
                return True
    return False
Exemplo n.º 11
0
def tarballs_by_recids(recids, sdir):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids.

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live

    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recid.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = int(recid)

    arXiv_ids = []

    for recid in list_of_ids:
        rec = get_record(recid)
        for afieldinstance in record_get_field_instances(rec, tag='037'):
            if 'arXiv' == field_get_subfield_values(afieldinstance, '9')[0]:
                arXiv_id = field_get_subfield_values(afieldinstance, 'a')[0]
                arXiv_ids.append(arXiv_id)

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Exemplo n.º 12
0
def create_ticket(recid, bibcatalog_system, queue=CFG_REFEXTRACT_TICKET_QUEUE):
    write_message('bibcatalog_system %s' % bibcatalog_system, verbose=1)
    write_message('queue %s' % queue, verbose=1)
    if bibcatalog_system and queue:

        subject = "Refs for #%s" % recid

        # Add report number in the subjecet
        report_number = ""
        record = get_bibrecord(recid)

        in_hep = False
        for collection_tag in record_get_field_instances(record, "980"):
            for collection in field_get_subfield_values(collection_tag, 'a'):
                if collection == 'HEP':
                    in_hep = True

        # Only create tickets for HEP
        if not in_hep:
            write_message("not in hep", verbose=1)
            return

        for report_tag in record_get_field_instances(record, "037"):
            for category in field_get_subfield_values(report_tag, 'c'):
                if category.startswith('astro-ph'):
                    write_message("astro-ph", verbose=1)
                    # We do not curate astro-ph
                    return

            for report_number in field_get_subfield_values(report_tag, 'a'):
                subject += " " + report_number
                break

        text = '%s/record/edit/#state=edit&recid=%s' % (CFG_SITE_SECURE_URL, \
                                                        recid)
        bibcatalog_system.ticket_submit(subject=subject,
                                        queue=queue,
                                        text=text,
                                        recordid=recid)
Exemplo n.º 13
0
def merge_record_with_template(rec, template_name):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]

    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1],
                                 field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(field_instance, code,
                                               field_get_subfield_values(template_field_instance,
                                               code)[0])
    return rec
Exemplo n.º 14
0
def tarballs_by_recids(recids, sdir, docname=None, doctype=None, docformat=None):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids. By default look for files with names matching
    the report number and with source field 'arXiv'. This can be changed
    with C{docname}, C{doctype}, C{docformat}

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live
    @param docname: select tarball for given recid(s) that match docname
    @param doctype: select tarball for given recid(s) that match doctype
    @param docformat: select tarball for given recid(s) that match docformat
    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    if not recids:
        return []

    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recids.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = [int(recids)]

    arXiv_ids = []
    local_files = []
    for recid in list_of_ids:
        rec = get_record(recid)
        if not doctype and not docname and not docformat:
            for afieldinstance in record_get_field_instances(rec, tag='037'):
                if len(field_get_subfield_values(afieldinstance, '9')) > 0:
                    if 'arXiv' == field_get_subfield_values(afieldinstance, '9')[0]:
                        arXiv_id = field_get_subfield_values(afieldinstance, 'a')[0]
                        arXiv_ids.append(arXiv_id)
        else:
            bibarchive = BibRecDocs(recid)
            all_files = bibarchive.list_latest_files()
            if doctype:
                all_files = [docfile for docfile in all_files if
                             docfile.get_type() == doctype]
            if docname:
                all_files = [docfile for docfile in all_files if
                             docfile.get_name() == docname]
            if docformat:
                all_files = [docfile for docfile in all_files if
                             docfile.get_format() == docformat]
            local_files.extend([(docfile.get_path(), recid) for docfile in all_files])

    if doctype or docname or docformat:
        return local_files

    return tarballs_by_arXiv_id(arXiv_ids, sdir)
Exemplo n.º 15
0
def task_run_core():
    """ Performs a search to find records without a texkey, generates a new
    one and uploads the changes in chunks """
    recids = perform_request_search(p='-035:spirestex -035:inspiretex',
                                    cc='HEP')

    write_message("Found %s records to assign texkeys" % len(recids))
    processed_recids = []
    xml_to_process = []
    for count, recid in enumerate(recids):
        write_message("processing recid %s" % recid)

        # Check that the record does not have already a texkey
        has_texkey = False
        recstruct = get_record(recid)
        for instance in record_get_field_instances(recstruct,
                                                   tag="035",
                                                   ind1="",
                                                   ind2=""):
            try:
                provenance = field_get_subfield_values(instance, "9")[0]
            except IndexError:
                provenance = ""
            try:
                value = field_get_subfield_values(instance, "z")[0]
            except IndexError:
                try:
                    value = field_get_subfield_values(instance, "a")[0]
                except IndexError:
                    value = ""
            provenances = ["SPIRESTeX", "INSPIRETeX"]
            if provenance in provenances and value:
                has_texkey = True
                write_message("INFO: Record %s has already texkey %s" %
                              (recid, value))

        if not has_texkey:
            TexKeySeq = TexkeySeq()
            new_texkey = ""
            try:
                new_texkey = TexKeySeq.next_value(recid)
            except TexkeyNoAuthorError:
                write_message(
                    "WARNING: Record %s has no first author or collaboration" %
                    recid)
                continue
            except TexkeyNoYearError:
                write_message("WARNING: Record %s has no year" % recid)
                continue
            write_message("Created texkey %s for record %d" %
                          (new_texkey, recid))
            xml = create_xml(recid, new_texkey)
            processed_recids.append(recid)
            xml_to_process.append(xml)

        task_update_progress("Done %d out of %d." % (count, len(recids)))
        task_sleep_now_if_required()

    # sequence ID to be used in all subsequent tasks
    sequence_id = str(random.randrange(1, 4294967296))
    if xml_to_process:
        process_chunk(xml_to_process, sequence_id)

    # Finally, index all the records processed
    #FIXME: Waiting for sequence id to be fixed
    # if processed_recids:
    #     submit_bibindex_task(processed_recids, sequence_id)

    return True
Exemplo n.º 16
0
def task_run_core():
    """Perform a search to find records without a texkey.

    generates a new one and uploads the changes in chunks
    """
    recids = perform_request_search(
        p='-035:spirestex -035:inspiretex', cc='HEP')

    write_message("Found %s records to assign texkeys" % len(recids))
    processed_recids = []
    xml_to_process = []
    for count, recid in enumerate(recids):
        write_message("processing recid %s" % recid)

        # Check that the record does not have already a texkey
        has_texkey = False
        recstruct = get_record(recid)
        for instance in record_get_field_instances(recstruct, tag="035",
                                                   ind1="", ind2=""):
            try:
                provenance = field_get_subfield_values(instance, "9")[0]
            except IndexError:
                provenance = ""
            try:
                value = field_get_subfield_values(instance, "z")[0]
            except IndexError:
                try:
                    value = field_get_subfield_values(instance, "a")[0]
                except IndexError:
                    value = ""
            provenances = ["SPIRESTeX", "INSPIRETeX"]
            if provenance in provenances and value:
                has_texkey = True
                write_message(
                    "INFO: Record %s has already texkey %s" % (recid, value))

        if not has_texkey:
            TexKeySeq = TexkeySeq()
            new_texkey = ""
            try:
                new_texkey = TexKeySeq.next_value(recid)
            except TexkeyNoAuthorError:
                write_message((
                    "WARNING: Record %s has no first author or "
                    "collaboration") % recid)
                continue
            except TexkeyNoYearError:
                write_message("WARNING: Record %s has no year" % recid)
                continue
            write_message("Created texkey %s for record %d" %
                          (new_texkey, recid))
            xml = create_xml(recid, new_texkey)
            processed_recids.append(recid)
            xml_to_process.append(xml)

        task_update_progress("Done %d out of %d." % (count, len(recids)))
        task_sleep_now_if_required()

    # sequence ID to be used in all subsequent tasks
    sequence_id = str(random.randrange(1, 4294967296))
    if xml_to_process:
        process_chunk(xml_to_process, sequence_id)

    # Finally, index all the records processed
    # FIXME: Waiting for sequence id to be fixed
    # if processed_recids:
    #     submit_bibindex_task(processed_recids, sequence_id)

    return True
Exemplo n.º 17
0
def tarballs_by_recids(recids,
                       sdir,
                       docname=None,
                       doctype=None,
                       docformat=None):
    """
    Take a string representing one recid or several and get the associated
    tarballs for those ids. By default look for files with names matching
    the report number and with source field 'arXiv'. This can be changed
    with C{docname}, C{doctype}, C{docformat}

    @param: recids (string): the record id or ids
    @param: sdir (string): where the tarballs should live
    @param docname: select tarball for given recid(s) that match docname
    @param doctype: select tarball for given recid(s) that match doctype
    @param docformat: select tarball for given recid(s) that match docformat
    @return: tarballs ([string, string, ...]): locations of tarballs
    """
    if not recids:
        return []

    list_of_ids = []

    if ',' in recids:
        recids = recids.split(',')
        for recid in recids:
            if '-' in recid:
                low, high = recid.split('-')
                recid = range(int(low), int(high))
                list_of_ids.extend(recid)
            else:
                recid = int(recid)
                list_of_ids.append(recid)

    else:
        if '-' in recids:
            low, high = recids.split('-')
            list_of_ids = range(int(low), int(high))
        else:
            list_of_ids = [int(recids)]

    arXiv_ids = []
    local_files = []
    for recid in list_of_ids:
        rec = get_record(recid)
        if not doctype and not docname and not docformat:
            for afieldinstance in record_get_field_instances(rec, tag='037'):
                if len(field_get_subfield_values(afieldinstance, '9')) > 0:
                    if 'arXiv' == field_get_subfield_values(
                            afieldinstance, '9')[0]:
                        arXiv_id = field_get_subfield_values(
                            afieldinstance, 'a')[0]
                        arXiv_ids.append(arXiv_id)
        else:
            bibarchive = BibRecDocs(recid)
            all_files = bibarchive.list_latest_files()
            if doctype:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_type() == doctype
                ]
            if docname:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_name() == docname
                ]
            if docformat:
                all_files = [
                    docfile for docfile in all_files
                    if docfile.get_format() == docformat
                ]
            local_files.extend([(docfile.get_path(), recid)
                                for docfile in all_files])

    if doctype or docname or docformat:
        return local_files

    return tarballs_by_arXiv_id(arXiv_ids, sdir)