Exemplo n.º 1
0
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec, '035', subfields=[('a',
                    str(json_res['altmetric_id'])), ('9', 'Altmetric')])
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                (e.status_code, str(e)), alert_admin=False)
Exemplo n.º 2
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    if rec_oaiid_tag:
        rec_oaiid = rec_oaiid_tag[0]
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            file_ = open(filename)
            records = create_records(file_.read(), 0, 0)
            for i in range(0, len(records)):
                record, all_good = records[i][:2]
                if record and all_good:
                    if _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
                        return True
            file_.close()
        except IOError:
            continue
    return False
Exemplo n.º 3
0
def get_recid_and_reportnumber(recid=None, reportnumber=None):
    """
    Given at least a recid or a reportnumber, this function will look into
    the system for the matching record and will return a normalized
    recid and the primary reportnumber.
    @raises ValueError: in case of no record matched.
    """
    if recid:
        ## Recid specified receives priority.
        recid = int(recid)
        values = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER)
        if values:
            ## Let's take whatever reportnumber is stored in the matching record
            reportnumber = values[0]
            return recid, reportnumber
        else:
            raise ValueError("The record %s does not have a primary report number" % recid)
    elif reportnumber:
        ## Ok reportnumber specified, let's better try 1st with primary and then
        ## with other reportnumber
        recids = search_pattern(p='%s:"%s"' % (CFG_PRIMARY_REPORTNUMBER, reportnumber))
        if not recids:
            ## Not found as primary
            recids = search_pattern(p='reportnumber:"%s"' % reportnumber)
        if len(recids) > 1:
            raise ValueError('More than one record matches the reportnumber "%s": %s' % (reportnumber, ', '.join(recids)))
        elif len(recids) == 1:
            recid = list(recids)[0]
            reportnumbers = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER)
            if not reportnumbers:
                raise ValueError("The matched record %s does not have a primary report number" % recid)
            return recid, reportnumbers[0]
        else:
            raise ValueError("No records are matched by the provided reportnumber: %s" % reportnumber)
    raise ValueError("At least the recid or the reportnumber must be specified")
Exemplo n.º 4
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals',
                                         of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
                         # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0] # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
def get_date(recid, product_type):
    """Get date in format mm/dd/yyyy, yyyy or yyyy Month."""
    try:
        date = get_fieldvalues(recid, '260__c')[0]
    except IndexError:
        try:
            date = get_fieldvalues(recid, '269__c')[0]
        except IndexError:
            try:
                date = get_fieldvalues(recid, '502__d')[0]
            except IndexError:
                date = '1900'
    try:
        date_object = datetime.datetime.strptime(date, '%Y-%m-%d')
        date = date_object.strftime('%m/%d/%Y')
    except ValueError:
        try:
            date_object = datetime.datetime.strptime(date, '%Y-%m')
            date = date_object.strftime('%Y %B')
            if product_type in ['TR', 'TD', 'JA']:
                date = date_object.strftime('%m/01/%Y')
        except ValueError:
            if product_type in ['TR', 'TD', 'JA']:
                date = '01/01/' + str(date)
    return date
Exemplo n.º 6
0
def render_dataverse_dataset_html(recid, display_link = True):
    """ Rendering a single Dataverse dataset, both for the tab and the record
    @param display_link Indicates if a link to the data record should be displayed
    @type display_link boolean
    """
    from invenio.search_engine import get_fieldvalues

    # rendering the HTML code

    c = [] #collecting parts of the output
    c.append("<div style=\"background-color: #ececec; padding:10px;\">")

    comments = get_fieldvalues(recid, '520__h')[0]
    publisher = get_fieldvalues(recid, '520__9')

    c.append("<br />")
    c.append("<b>Description: </b> " + comments + "<br />")
    c.append("<br />")

    link_txt = "Go to the record"
    if display_link:
        c.append("<a href=\"%s/record/%s\">%s</a>" % (CFG_SITE_URL, str(recid), link_txt))

    c.append("<br /><br />")
    if publisher[0] == 'Dataverse' and display_link == False:
        c.append("<div class=\"hepdataTablePlaceholder\">")
        c.append("<table cellpadding=\"0\" cellspacing=\"0\" class=\"hepdataTable\">")
        c.append("<tr><td style=\"text-align: center;\">Preview not available</td>")
        c.append("</tr>")
        c.append("</table>")
        c.append("</div>")
        c.append("<br /><br />")

    c.append("</div>")
    return "\n".join(c)
Exemplo n.º 7
0
def build_issns_from_local_site():
    """
    Retrieves the ISSNs from the local database.
    Store the "journal name -> issn" relation.

    Normalize journal names a little bit:
        - strip whithespace chars (left and right)
        - all lower case
        - remove "[Online]" suffix

    Print the result as Python dict structure.
    """

    rec_id_list = perform_request_search(cc='Periodicals',
                                         of='id')
    built_issns = {}
    #built_issns = issns # Uncomment this to extend existing issns dict
                         # (e.g. in case of manual addition)
    for rec_id in rec_id_list:
        journal_name_list = get_fieldvalues(rec_id, '210__%')
        issn_list = get_fieldvalues(rec_id, '022__a')
        if issn_list:
            issn = issn_list[0] # There should be only one ISSN
            for journal_name in journal_name_list:
                # Depending on how journal names are entered into the database,
                # you might want to do some processing before saving:
                journal_name = journal_name.lower().strip()
                if journal_name.endswith("[online]"):
                    journal_name = journal_name[:-8].rstrip()

                built_issns[journal_name] = issn

    prtyp = pprint.PrettyPrinter(indent=4)
    prtyp.pprint(built_issns)
Exemplo n.º 8
0
def bst_openaire_altmetric():
    """
    """
    recids = search_pattern(p="0->Z", f="0247_a")
    a = Altmetric()

    for recid in recids:
        try:
            # Check if we already have an Altmetric id
            sysno_inst = get_fieldvalues(recid, "035__9")
            if ['Altmetric'] in sysno_inst:
                continue

            doi_val = get_fieldvalues(recid, "0247_a")[0]
            json_res = a.doi(doi_val)

            rec = {}
            record_add_field(rec, "001", controlfield_value=str(recid))

            if json_res:
                record_add_field(rec,
                                 '035',
                                 subfields=[('a',
                                             str(json_res['altmetric_id'])),
                                            ('9', 'Altmetric')])
                bibupload(rec, opt_mode='correct')
        except AltmetricHTTPException, e:
            register_exception(prefix='Altmetric error (status code %s): %s' %
                               (e.status_code, str(e)),
                               alert_admin=False)
Exemplo n.º 9
0
def main(experiment, collaboration):
    authors = {}
    affiliations = []
    affiliation_count = 1
    search = "693__e:" + experiment
    x = perform_request_search(p = search, cc = 'HepNames')
    for r in x:
        foaf_name = get_fieldvalues(r, '100__q')
        cal_authorNameNative = get_fieldvalues(r, '400__a')
        name = get_fieldvalues(r, '100__a')[0]
        foaf_givenName  = re.sub(r'.*\, ', '', name)
        foaf_familyName =  re.sub(r'\,.*', '', name)
        author_id = find_inspire_id_from_record(r)
        orcid      = get_hepnames_anyid_from_recid(r, 'ORCID')
        if VERBOSE:
            print r
        affiliation = get_hepnames_aff_from_recid(r, 'Current')
        if not affiliation: print 'No aff - find recid', r
        d = {}
        d['foaf_givenName']  = foaf_givenName
        d['foaf_familyName'] = foaf_familyName
        d['affiliation']     = affiliation
        d['author_id']       = author_id
        authors[name.lower()] = d
        affiliations.append(affiliation)
    affiliations = affiliations_process(affiliations)
    for key in authors:
        affiliation = authors[key]['affiliation']
        affiliation_number = affiliations.index(affiliation) + 1
        authors[key]['affiliation_id'] = affiliation_number
    print xml_frontmatter(experiment, collaboration)
    print xml_affiliations(affiliations)
    print xml_authors(authors)
Exemplo n.º 10
0
def _record_in_files_p(recid, filenames):
    """Search XML files for given record."""
    # Get id tags of record in question
    rec_oaiid = rec_sysno = -1
    rec_oaiid_tag = get_fieldvalues(recid, OAIID_TAG)
    if rec_oaiid_tag:
        rec_oaiid = rec_oaiid_tag[0]
    rec_sysno_tag = get_fieldvalues(recid, SYSNO_TAG)
    if rec_sysno_tag:
        rec_sysno = rec_sysno_tag[0]

    # For each record in each file, compare ids and abort if match is found
    for filename in filenames:
        try:
            file_ = open(filename)
            records = create_records(file_.read(), 0, 0)
            for i in range(0, len(records)):
                record, all_good = records[i][:2]
                if record and all_good:
                    if _record_has_id_p(record, recid, rec_oaiid, rec_sysno):
                        return True
            file_.close()
        except IOError:
            continue
    return False
Exemplo n.º 11
0
def main(search):
    """This module returns a Google-like result showing the most
       highly cited papers from a given result."""

    all_refs = []
    if not search:
        search = 'standard model'
        search = '"dark matter"'
        search = 'qcd sum rules'
    print 'Your search is', search
    result = perform_request_search(p=search, cc='HEP')
    print 'The result is', len(result)
    for recid in result:
        try:
            search = 'citedby:recid:' + str(recid)
            refs = perform_request_search(p=search, cc='HEP')
            all_refs += refs
        except:
            print 'problem with', recid
    all_refs.sort()
    counted_all_refs = Counter(all_refs)
    sorted_count = sorted(counted_all_refs.items(), key=operator.itemgetter(1),
                          reverse=True)
    for recid_count, count in sorted_count[-10:]:
        url = 'http://inspirehep.net/record/' + str(recid_count)
        print count, url
        title = get_fieldvalues(recid_count, '245__a')[0]
        try:
            author = get_fieldvalues(recid_count, '710__g')[0]
        except:
            try:
                author = get_fieldvalues(recid_count, '100__a')[0]
            except:
                author = 'No Author'
        print '  ', author, ':', title
def main(search):
    """This module returns a Google-like result showing the most
       highly cited papers from a given result."""

    all_refs = []
    if not search:
        search = 'standard model'
        search = '"dark matter"'
        search = 'qcd sum rules'
    print 'Your search is', search
    result = perform_request_search(p=search, cc='HEP')
    print 'The result is', len(result)
    for recid in result:
        try:
            search = 'citedby:recid:' + str(recid)
            refs = perform_request_search(p=search, cc='HEP')
            all_refs += refs
        except:
            print 'problem with', recid
    all_refs.sort()
    counted_all_refs = Counter(all_refs)
    sorted_count = sorted(counted_all_refs.items(), key=operator.itemgetter(1))
    for recid_count, count in sorted_count[-10:]:
        url = 'http://inspirehep.net/record/' + str(recid_count)
        print count, url
        title = get_fieldvalues(recid_count, '245__a')[0]
        try:
            author = get_fieldvalues(recid_count, '710__g')[0]
        except:
            try:
                author = get_fieldvalues(recid_count, '100__a')[0]
            except:
                author = 'No Author'
        print '  ', author, ':', title
def migrate_bibdoc_status(recid, is_public, access_right):
    from invenio.search_engine import get_fieldvalues
    from invenio.bibdocfile import BibRecDocs

    # Generate firerole
    fft_status = []
    if is_public:
        email = get_fieldvalues(recid, "8560_f")[0]
        if access_right == "open":
            # Access to everyone
            fft_status = ["allow any"]
        elif access_right == "embargoed":
            # Access to submitted, Deny everyone else until embargo date,
            # then allow all
            date = get_fieldvalues(recid, "942__a")[0]
            fft_status = ['allow email "%s"' % email, 'deny until "%s"' % date, "allow any"]
        elif access_right in ("closed", "restricted"):
            # Access to submitter, deny everyone else
            fft_status = ['allow email "%s"' % email, "deny all"]
    else:
        # Access to submitter, deny everyone else
        fft_status = None

    if fft_status:
        fft_status = "firerole: %s" % "\n".join(fft_status)

        brd = BibRecDocs(recid)
        for d in brd.list_bibdocs():
            d.set_status(fft_status)
def ccreate_xml(recid, rawstring):
    found = False
    record = {}
    record_add_field(record, '001', controlfield_value=str(recid))
    rawstring = rawstring.lower().replace('proc. of the', '').replace(
        'proc. of',
        '').replace('.', ' ').replace('(', '').replace(')',
                                                       '').replace(' -', '')
    for k, v in term_dict.items():
        if k in rawstring:
            rawstring = rawstring.replace(k, v)
    matchobj = re.search('(.*?\d{4})', rawstring)
    if matchobj:
        search = perform_request_search(p=matchobj.group(), cc='Conferences')
        if len(search) == 1:
            for s in search:
                cnums = get_fieldvalues(s, '111__g')
                cnum = cnums[0]
                existing_cnum = get_fieldvalues(recid, '773__w')
                if cnum not in existing_cnum:
                    print recid, cnum
                    found = True

    if found:
        record_add_field(record, '773', '', '', subfields=[('w', cnum)])
        return print_rec(record)
Exemplo n.º 15
0
def solr_add_range(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            try:
                abstract = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0]), 'utf-8')
            except:
                abstract = ""
            try:
                first_author = remove_control_characters(get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0])
                additional_authors = remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), ''))
                author = unicode(first_author + " " + additional_authors, 'utf-8')
            except:
                author = ""
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(remove_control_characters(bibrecdocs.get_text()), 'utf-8')
            except:
                fulltext = ""
            try:
                keyword = unicode(remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), '')), 'utf-8')
            except:
                keyword = ""
            try:
                title = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_TITLE)[0]), 'utf-8')
            except:
                title = ""
            solr_add(recid, abstract, author, fulltext, keyword, title)

    SOLR_CONNECTION.commit()
    task_sleep_now_if_required(can_stop_too=True)
def get_date(recid, product_type):
    """Get date in format mm/dd/yyyy, yyyy or yyyy Month."""
    try:
        date = get_fieldvalues(recid, '260__c')[0]
    except IndexError:
        try:
            date = get_fieldvalues(recid, '269__c')[0]
        except IndexError:
            try:
                date = get_fieldvalues(recid, '502__d')[0]
            except IndexError:
                date = '1900'
    try:
        date_object = datetime.datetime.strptime(date, '%Y-%m-%d')
        date = date_object.strftime('%m/%d/%Y')
    except ValueError:
        try:
            date_object = datetime.datetime.strptime(date, '%Y-%m')
            date = date_object.strftime('%Y %B')
            if product_type in ['TR', 'TD', 'JA']:
                date = date_object.strftime('%m/01/%Y')
        except ValueError:
            if product_type in ['TR', 'TD', 'JA']:
                date = '01/01/' + str(date)
    return date
def main(experiment, collaboration):
    authors = {}
    affiliations = []
    affiliation_count = 1
    search = "693__e:" + experiment
    x = perform_request_search(p = search, cc = 'HepNames')
    for r in x:
        foaf_name = get_fieldvalues(r, '100__q')
        cal_authorNameNative = get_fieldvalues(r, '400__a')
        name = get_fieldvalues(r, '100__a')[0]
        foaf_givenName  = re.sub(r'.*\, ', '', name)
        foaf_familyName =  re.sub(r'\,.*', '', name)
        author_id = find_inspire_id_from_record(r)
        if VERBOSE:
            print r
        affiliation = get_hepnames_affiliation_from_recid(r, 'Current')
        if not affiliation: print 'find recid', r
        d = {}
        d['foaf_givenName']  = foaf_givenName
        d['foaf_familyName'] = foaf_familyName
        d['affiliation']     = affiliation
        d['author_id']       = author_id
        authors[name.lower()] = d
        affiliations.append(affiliation)
    affiliations = affiliations_process(affiliations)
    for key in authors:
        affiliation = authors[key]['affiliation']
        affiliation_number = affiliations.index(affiliation) + 1
        authors[key]['affiliation_id'] = affiliation_number
    print xml_frontmatter(experiment, collaboration)
    print xml_affiliations(affiliations)
    print xml_authors(authors)
Exemplo n.º 18
0
def exp4coll(collaboration):
    from invenio.search_engine import perform_request_search
    from invenio.search_engine import get_fieldvalues
    experiment = None
    experiments = []
    correct_coll = None
    
    exp_candidates = perform_request_search(p='710__g:"%s"' % collaboration, cc='Experiments')
    if len(exp_candidates) == 1:
        experiment = get_fieldvalues(exp_candidates[0], '119__a')[0]
        return experiment, experiments, correct_coll
    
    if len(exp_candidates) > 1:
        experiments = [get_fieldvalues(exp_recid, '119__a')[0] for exp_recid in exp_candidates]
        return experiment, experiments, correct_coll
        
    # search in EXP-name instead
    exp_candidates = perform_request_search(p='119__a:"%s"' % collaboration, cc='Experiments')
    if len(exp_candidates) == 1:
        experiment = collaboration
        m710g = get_fieldvalues(exp_candidates[0], '710__g')
        if m710g:
            correct_coll = m710g
        return experiment, experiments, correct_coll
        
    # fuzzy search
    exp_candidates = perform_request_search(p="710__g:'%s'" % collaboration, cc='Experiments')
    exp_candidates += perform_request_search(p="419__a:'%s'" % collaboration, cc='Experiments')
    short_coll = re.sub(r'^(...[^ /_-]*)[ /_-].*',r'\1', collaboration)
    exp_candidates += perform_request_search(p="710__g:'%s'" % short_coll, cc='Experiments')
    exp_candidates = set(exp_candidates)
    experiments = [get_fieldvalues(exp_recid, '119__a')[0] for exp_recid in exp_candidates]
    return experiment, experiments, correct_coll
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    book_year = ' '.join(get_fieldvalues(recid, "260__c"))

    book_author = '  '.join(get_fieldvalues(recid, "100__a") + \
                            get_fieldvalues(recid, "100__u"))

    book_isbn = ' '.join(get_fieldvalues(recid, "020__a"))

    book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \
                             get_fieldvalues(recid, "260__b"))


    return (book_title, book_year, book_author, book_isbn, book_editor)
Exemplo n.º 20
0
def book_information_from_MARC(recid):
    """
    Retrieve book's information from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return tuple with title, year, author, isbn and editor.
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    book_year = ' '.join(get_fieldvalues(recid, "260__c"))

    book_author = '  '.join(get_fieldvalues(recid, "100__a") + \
                            get_fieldvalues(recid, "100__u"))

    book_isbn = ' '.join(get_fieldvalues(recid, "020__a"))

    book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \
                             get_fieldvalues(recid, "260__b"))

    return (book_title, book_year, book_author, book_isbn, book_editor)
Exemplo n.º 21
0
def get_author_number(recid):
    """Gets number of authors."""
    author_list = get_fieldvalues(recid, "100__a") + \
                  get_fieldvalues(recid, "700__a")
    try:
        return len(author_list)
    except IndexError:
        return 0
def get_corporate_author(recid):
    """Check to see if there is a corporte author and return it."""
    try:
        #return get_fieldvalues(recid, "110__a")[0]
        author_list = get_fieldvalues(recid, "110__a") \
                    + get_fieldvalues(recid, "710__a")
        return '; '.join([unicode(a, "utf-8") for a in author_list])
    except IndexError:
        return None
Exemplo n.º 23
0
def get_corporate_author(recid):
    """Check to see if there is a corporte author and return it."""
    try:
        #return get_fieldvalues(recid, "110__a")[0]
        author_list = get_fieldvalues(recid, "110__a") \
                    + get_fieldvalues(recid, "710__a")
        return '; '.join([unicode(a, "utf-8") for a in author_list])
    except IndexError:
        return None
def main(recids):
    """
    Gets name and email from each HEPNames record.
    """

    if VERBOSE:
        print recids

    icount = 1
    for recid in recids:
        if recid in BAD_RECIDS:
            break
        recid_str = str(recid)
        recid_int = int(recid)
        if re.search(r'INSPIRE-', recid_str):
            search = '035__a:' + recid_str
            result = perform_request_search(p=search, cc='HepNames')
            recid = result[0]
            recid_str = str(recid)
            recid_int = int(recid)
        if get_hepnames_anyid_from_recid(recid_int, 'ORCID'):
            print recid_str, 'already has an ORCID\n'
            icount += 1
            continue
        try:
            contact_email = get_fieldvalues(recid_int, '371__m')[0]
        except:
            contact_email = '*****@*****.**'
        try:
            contact_name = get_fieldvalues(recid_int, '100__a')[0]
            if "," in contact_name:
                contact_name = " ".join(contact_name.split(", ")[::-1])
        except:
            contact_name = 'Sir or Madam'
        #contact_email = '*****@*****.**'
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"

        print icount, '/', len(recids)
        print 'recid = ', recid_str
        print 'email = ', contact_email
        print 'name  = ', contact_name
        print ' '
        try:
            send_jobs_mail(recid_str, contact_email, contact_name)
            time.sleep(1)
        except IOError as e:
            print "I/O error({0}): {1}".format(e.errno, e.strerror)
            print 'PROBLEM sending mail to:'
            print recid, contact_email, contact_name, '\n'
        icount += 1
Exemplo n.º 25
0
def doi_to_pbn():
    print "<?xml version=\"1.0\" ?>"
    print "<collection>"

    searches = ['0247_a:/PhysRev.*/ -773__p:/Phys.Rev./',
            '0247_a:/PhysRev.*/ -773__c:/[0-9]/',
            '0247_a:/PhysRev.*/ -773__y:/[0-9]{4}/',
            '0247_a:/RevModPhys.*/ -773__p:/Rev.Mod.Phys./',
            '0247_a:/RevModPhys.*/ -773__c:/[0-9]/',
            '0247_a:/RevModPhys.*/ -773__y:/[0-9]{4}/'
]


    for search in searches:
        x = perform_request_search(p=search,cc='HEP')
        x = x[:200]
        for r in x:   
            if VERBOSE:
                print r
            doi = get_fieldvalues(r,'0247_a')[0]
            pbn = get_fieldvalues(r,'773__p')
            [publisher,jvp] = re.split('/',doi)
            try:
                [journal,volume,page] = re.split('\.',jvp)
            except ValueError:
                print "Error in:", r, journal,volume,page
            except UnboundLocalError:
                print "Error in:", r, "with the journal,volume,page"
            try:
                volumeNumber = int(volume)
            except ValueError:
                print "Error:", volume, " is not a number"
            if journal == 'PhysRevSTAB': 
                journal = 'Phys.Rev.ST Accel.Beams'
            elif journal == 'PhysRevSTPER': 
                journal = 'Phys.Rev.ST Phys.Educ.Res.'
            elif journal == 'RevModPhys': 
                journal = 'Rev.Mod.Phys.'
            else:
                matchObj = re.search("[A-EX]$", journal)
                if matchObj:
                    volume =  matchObj.group() + volume
                    journal = re.sub(r'[A-EX]$',r'',journal)
                    journal = journal + "."
                    journal = re.sub(r'([a-z])([A-Z])',r'\1.\2',journal)
                    year = str(yearCalc(journal,volume))
                    print '<record>'
                    print '  <controlfield tag="001">'+str(r)+'</controlfield>'
                    print '  <datafield tag="773" ind1=" " ind2=" ">'
                    print '    <subfield code="p">' + journal + '</subfield>'
                    print '    <subfield code="v">' + volume + '</subfield>'
                    print '    <subfield code="c">' + page + '</subfield>'
                    print '    <subfield code="y">' + year + '</subfield>'
                    print '  </datafield>'
                    print '</record>'
    print "</collection>"
def get_fermilab_report(recid):
    """Get the Fermilab report number."""

    report = None
    report_numbers = get_fieldvalues(recid, "037__a") + \
                     get_fieldvalues(recid, "037__z")
    for report_number in report_numbers:
        if report_number.startswith('FERMILAB'):
            report = report_number
    return report
Exemplo n.º 27
0
def migrate_980__ab(recid, rec):
    from invenio.bibrecord import record_add_field
    from invenio.search_engine import get_fieldvalues

    collections = get_fieldvalues(recid, "980__a")
    subcollections = get_fieldvalues(recid, "980__b")

    upload_type = []
    extras = []
    curated = True

    for val in collections:
        if val in [
                'DARK',
                'DELETED',
                'DUPLICATE',
                'PENDING',
                'REJECTED',
                'PROVISIONAL',
        ]:
            curated = False
            extras.append(val)
        if val in collection_mapping:
            upload_type.append(collection_mapping[val])
        elif val in newcolls:
            upload_type.append(val)

    for val in subcollections:
        if val in collection_mapping:
            upload_type.append(collection_mapping[val])
        elif val in newsubcolls:
            upload_type.append(val)

    if upload_type:
        upload_type = [upload_type[0]]

    is_public = False
    if curated:
        upload_type.append(('curated', ''))
        is_public = True

    for a, b in upload_type:
        if b:
            record_add_field(rec, '980', subfields=[('a', a), ('b', b)])
        else:
            record_add_field(rec, '980', subfields=[
                ('a', a),
            ])
    if extras:
        for e in extras:
            record_add_field(rec, '980', subfields=[
                ('a', e),
            ])
    return (rec, is_public)
Exemplo n.º 28
0
def get_eprint(recid):
    """Get the eprintt number from a record."""
    report_fermilab = None
    eprint = None
    url = None
    reports = get_fieldvalues(recid, '037__a')
    reports = reports + get_fieldvalues(recid, '037__z')
    if VERBOSE:
        print reports
    for report in reports:
        if re.search("FERMILAB", report):
            report_fermilab = report
            if VERBOSE:
                print report_fermilab
    if not report_fermilab:
        return None
    bfo = BibFormatObject(recid)
    eprint = bfe_arxiv.get_arxiv(bfo, category="no")
    if VERBOSE:
        print eprint
    if eprint:
        eprint = eprint[0]
        print report_fermilab, eprint
        return None
    for url_i in get_fieldvalues(recid, '8564_u'):
        if re.match(r'https?://inspirehep.net.*pdf', url_i):
            url = url_i
    for item in BibFormatObject(int(recid)).fields('8564_'):
        if item.has_key('y') or item.has_key('z') and item.has_key('u'):
            try:
                if re.search('fermilab', item['y'].lower()):
                    return None
            except KeyError:
                pass
            if item['u'].endswith('pdf'):
                url = item['u']
            try:
                if item['y'].lower() == 'fulltext':
                    url = item['u']
                if item['y'].lower() == 'poster':
                    url = None
                if item['y'].lower() == 'slides':
                    url = None
            except KeyError:
                pass
            try:
                if item['z'].lower() == 'openaccess':
                    url = item['u']
            except KeyError:
                pass
    if url:
        print report_fermilab, url
Exemplo n.º 29
0
def main(recids):
    icount = 1
    for recid in recids:
        recid = str(recid)
        title = get_fieldvalues(recid, '245__a')[0]
        title = title[:50]
        try:
            contact_email = get_fieldvalues(recid, '270__m')[0]
        except IndexError:
            contact_email = '*****@*****.**'
        try:
            contact_name = get_fieldvalues(recid, '270__p')[0]
            if "," in contact_name:
                contact_name = " ".join(contact_name.split(", ")[::-1])
            #contact_name = contact_name
        except IndexError:
            contact_name = 'Sir or Madam'
        if contact_email == '*****@*****.**':
            contact_email = '*****@*****.**'
            #contact_email = '*****@*****.**'
        elif contact_email == 'recruitment.service@cern':
            contact_email = 'Caroline.Dumont@cern'
        #contact_email = '*****@*****.**'
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"
        #contact_email = "*****@*****.**"

        try:      
            deadline = get_fieldvalues(recid, '046__i')[0]
        except IndexError:
            print 'PROBLEM: no deadline'
            print recid, contact_email, contact_name, title
            print ''
        try:
            print icount, '/', len(recids)
            print 'recid = ', recid
            print 'title = ', title
            print 'email = ', contact_email
            print 'name  = ', contact_name
            print 'dline = ', deadline
            print ' '
            send_jobs_mail(recid, contact_email, contact_name, title,
                           deadline)
        except:
            print 'PROBLEM'
            print recid, contact_email, contact_name, title, deadline
        icount += 1
def MBI_Mail_Blog_Modified_to_User(parameters, curdir, form, user_info=None):
    """
    This function sends an email to the user who modified any metadata of
    a blog record saying that the blog was successfully modified

    Parameters:

      * emailFile: Name of the file containing the email of the user

    """

    global rn, sysno
    FROMADDR = '%s Submission Engine <%s>' % (CFG_SITE_NAME,CFG_SITE_SUPPORT_EMAIL)
    sequence_id = bibtask_allocate_sequenceid(curdir)

    blog_title = "".join(["%s" % title.strip() for title in \
                         get_fieldvalues(int(sysno), "245__a")])

    blog_url = "".join(["%s" % url.strip() for url in \
                        get_fieldvalues(int(sysno), "520__u")])

    # The submitters email address is read from the file specified by 'emailFile'
    try:
        fp = open("%s/%s" % (curdir,parameters['emailFile']),"r")
        m_recipient = fp.read().replace ("\n"," ")
        fp.close()
    except:
        m_recipient = ""

    # create email body
    email_txt = "\nModifications done on the metadata of the blog record with URL [%s] and title '%s' have been correctly applied.\n\n" % (blog_url, blog_title)
    email_txt += "It will be soon accessible here: <%s/%s/%s>\n" % (CFG_SITE_URL, CFG_SITE_RECORD, sysno)
 
    # email_txt += get_nice_bibsched_related_message(curdir)
    email_txt = email_txt + "\nThank you for using %s Submission Interface.\n" % CFG_SITE_NAME
    
    email_subject = "Blog record modification done: [%(id)s]"

    if blog_title:
        email_subject = email_subject % {'id' : blog_title}
    else:
        email_subject = email_subject % {'id' : blog_url}

    ## send the mail, if there are any recipients or copy to admin
    if m_recipient or CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN:
        scheduled_send_email(FROMADDR, m_recipient.strip(), email_subject, email_txt,
                             copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,
                             other_bibtasklet_arguments=['-I', str(sequence_id)])

    return ""
Exemplo n.º 31
0
def get_recid_and_reportnumber(recid=None,
                               reportnumber=None,
                               keep_original_reportnumber=True):
    """
    Given at least a recid or a reportnumber, this function will look into
    the system for the matching record and will return a normalized
    recid and the primary reportnumber.
    @raises ValueError: in case of no record matched.
    """
    if recid:
        ## Recid specified receives priority.
        recid = int(recid)
        values = get_fieldvalues(recid, CFG_PRIMARY_REPORTNUMBER)
        if values:
            ## Let's take whatever reportnumber is stored in the matching record
            reportnumber = values[0]
            return recid, reportnumber
        else:
            raise ValueError(
                "The record %s does not have a primary report number" % recid)
    elif reportnumber:
        ## Ok reportnumber specified, let's better try 1st with primary and then
        ## with other reportnumber
        recids = search_pattern(p='%s:"%s"' %
                                (CFG_PRIMARY_REPORTNUMBER, reportnumber))
        if not recids:
            ## Not found as primary
            recids = search_pattern(p='reportnumber:"%s"' % reportnumber)
        if len(recids) > 1:
            raise ValueError(
                'More than one record matches the reportnumber "%s": %s' %
                (reportnumber, ', '.join([str(i) for i in recids])))
        elif len(recids) == 1:
            recid = list(recids)[0]
            if keep_original_reportnumber:
                return recid, reportnumber
            else:
                reportnumbers = get_fieldvalues(recid,
                                                CFG_PRIMARY_REPORTNUMBER)
                if not reportnumbers:
                    raise ValueError(
                        "The matched record %s does not have a primary report number"
                        % recid)
                return recid, reportnumbers[0]
        else:
            raise ValueError(
                "No records are matched by the provided reportnumber: %s" %
                reportnumber)
    raise ValueError(
        "At least the recid or the reportnumber must be specified")
 def get_ref_metadata_inspire(cls, ref, dois):
     """Get the metadata for a particular reference from INSPIRE."""
     if ref not in dois:
         return None
     ref = ref.replace('doi:', '')
     recid = perform_request_search(p='0247_a:' + ref, cc='HEP') + \
             perform_request_search(p='0247_a:' + ref, cc='Fermilab')
     try:
         recid = recid[0]
         title = get_fieldvalues(recid, '245__a')[0]
         author = get_fieldvalues(recid, '100__a')[0]
         return """This DOI is in INSPIRE
 {0} : {1}""".format(author, title)
     except IndexError:
         return 'DOI should be in HEP but is not: ' + ref
def get_eprint(recid):
    """Get the eprintt number from a record."""
    report_fermilab = None
    eprint = None
    url = None
    reports = get_fieldvalues(recid, '037__a')
    reports = reports + get_fieldvalues(recid, '037__z')
    if VERBOSE:
        print reports
    for report in reports:
        if re.search("FERMILAB", report):
            report_fermilab = report
            if VERBOSE:
                print report_fermilab
    if not report_fermilab:
        return None
    bfo = BibFormatObject(recid)
    eprint = bfe_arxiv.get_arxiv(bfo, category = "no")
    if VERBOSE:
        print eprint
    if eprint:
        eprint = eprint[0]
        print report_fermilab, eprint
        return None
    for url_i in get_fieldvalues(recid, '8564_u'):
        if re.match(r'https?://inspirehep.net.*pdf', url_i):
            url = url_i
    for item in BibFormatObject(int(recid)).fields('8564_'):
        if item.has_key('y') or item.has_key('z') and item.has_key('u'):
            try:
                if re.search('fermilab', item['y'].lower()):
                    return None
            except KeyError:
                pass
            if item['u'].endswith('pdf'):
                url = item['u']
            try:
                if item['y'].lower() == 'fulltext':
                    url = item['u']
            except KeyError:
                pass
            try:
                if item['z'].lower() == 'openaccess':
                    url = item['u']
            except KeyError:
                pass
    if url:
        print report_fermilab, url
def main():
    counter = 0
    filename = 'ADS_eprints_missing_in_INSPIRE.csv'
    mismatch_filename = ''ADS_eprints_missing_in_INSPIRE_mismatch.csv'
    output = open(filename, 'w')
    mismatch_output = open(mismatch_filename, 'w')
    records = collections.defaultdict(dict)
    search = '0247_2:doi -037__9:arxiv'
    results = perform_request_search(p=search, cc='HEP')
    for r in results:
        doi = get_fieldvalues(r, '0247_a')
        if doi:
            records[r]['doi'] = doi
    eprints = []
    eprint_search = perform_request_search(p='037__9:arxiv', cc='HEP')
    for e in eprint_search:
         eprint = get_eprint_id(e)
         if eprint:
             eprint = eprint.replace('arxiv:', '')
             eprints.append(eprint)
    tree = ET.parse(DOCUMENT)
    root = tree.getroot()
    for child in root:
        if counter < 10:
            if 'doi' and 'preprint_id' in child.attrib:
                found_eprint = check_doi(child.attrib, records, eprints)
                if found_eprint:
                    if found_eprint[0] is True:
                        counter+=1
                        output.write('%s,%s,%s\n' % (found_eprint[0], found_eprint[1], found_eprint[2]))
                    else:                        
                        mismatch_output.write('%s,%s,%s\n' % (found_eprint[0], found_eprint[1], found_eprint[2]))
    output.close()
    print counter
Exemplo n.º 35
0
def openaire_register_doi(recid):
    """
    Register a DOI for new publication

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception("DOI %s is not assigned to record %s." %
                        (doi_val, recid))

    if pid.is_new() or pid.is_reserved():
        logger.info("Registering DOI %s for record %s" % (doi_val, recid))

        url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid)
        doc = format_record(recid, 'dcite')

        if not pid.register(url=url, doc=doc):
            m = "Failed to register DOI %s" % doi_val
            logger.error(m + "\n%s\n%s" % (url, doc))
            if not openaire_register_doi.request.is_eager:
                raise openaire_register_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully registered DOI %s." % doi_val)
Exemplo n.º 36
0
def main():
    from_base = 'http://openaire.cern.ch'
    to_base = config.CFG_SITE_URL

    # All records
    recids = search_pattern(p="0->Z", f="8564_u")

    print "<collection>"
    for recid in recids:
        # Get record information
        touched = False
        file_links = get_fieldvalues(recid, "8564_u")

        new_file_links = map(replace_link_func(from_base, to_base), file_links)

        # Print correcting to record
        rec = {}
        record_add_field(rec, "001", controlfield_value=str(recid))
        for old_link, new_link in zip(file_links, new_file_links):
            if old_link != new_link:
                touched = True
            record_add_field(rec, '856', ind1='4', subfields=[('u', new_link)])

        if touched:
            print record_xml_output(rec)
    print "</collection>"
def check_record_status(recid):
    """Checks to see if a PDF has already been sent
       or if we have an accepted manuscript.
    """

    if check_already_sent(recid):
        return True

    try:
        JOURNALS.append(get_fieldvalues(recid, '773__p')[0])
    except IndexError:
        print 'No journal on:\nhttp://inspirehep.net/record/' + \
               str(recid)

    if not PDF_CHECK:
        return False
    print "Checking accepted status", recid
    accepted_status = get_url(recid)
    if True in accepted_status:
        return True
    elif None in accepted_status:
        if VERBOSE:
            print 'No url on:\nhttp://inspirehep.net/record/' + str(recid)
        return False
    else:
        if VERBOSE:
            print recid, accepted_status
        return False
Exemplo n.º 38
0
def openaire_delete_doi(recid):
    """
    Delete DOI in DataCite

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception("DOI %s is not assigned to record %s." %
                        (doi_val, recid))

    if pid.is_registered():
        logger.info("Inactivating DOI %s for record %s" % (doi_val, recid))

        if not pid.delete():
            m = "Failed to inactive DOI %s" % doi_val
            logger.error(m)
            if not openaire_delete_doi.request.is_eager:
                raise openaire_delete_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully inactivated DOI %s." % doi_val)
def generate_list_to_send(search):
    '''
    Generate a list to send to MSNET.
    '''

    filename = 'tmp_' + __file__
    filename = re.sub('.py', '_send.txt', filename)
    output = open(filename,'w')

    recids_nomatch = find_recids_nomatch()

    print search
    result_m = perform_request_search(p=search, cc='HEP')
    print search, len(result_m)
    search = "035__9:msnet"
    result_i = perform_request_search(p=search, cc='HEP')
    search = "0247_2:doi"
    result_d = perform_request_search(p=search, cc='HEP')
    result = intbitset(result_m) & intbitset(result_d) - intbitset(result_i)
    result = result - intbitset(recids_nomatch)
    for recid in result:
        try:
            doi = get_fieldvalues(recid, '0247_a')[0]
        except IndexError:
            print 'Problem with:', recid, doi
            break
        output.write(str(recid) + ',' + doi + '\n')
    output.close()
    print filename
def get_collaborations(recid):
    """Get the collaboration information"""
    try:
        collaborations = get_fieldvalues(recid, "710__g")
        return '; '.join([unicode(a, "utf-8") for a in collaborations])
    except StandardError:
        return None
def get_affiliations(recid, long_flag):
    """Get affiliations using OSTI institution names."""
    affiliations = get_fieldvalues(recid, "100__u") \
                 + get_fieldvalues(recid, "700__u")
    affiliations.append("Fermilab")
    doe_affs = []
    doe_affs_long = []
    for aff in set(affiliations):
        #if aff in INSPIRE_AFF_DICT and not INSPIRE_AFF_DICT[aff] in doe_affs:
        if aff in INSPIRE_AFF_DICT:
            doe_affs.append(INSPIRE_AFF_DICT[aff])
            doe_affs_long.append(DOE_AFF_DICT[INSPIRE_AFF_DICT[aff]])
    if long_flag:
        return '; '.join([a for a in doe_affs_long])
    else:
        return '; '.join([a for a in doe_affs])
def get_collaborations(recid):
    """Get the collaboration information"""
    try:
        collaborations = get_fieldvalues(recid, "710__g")
        return '; '.join([unicode(a, "utf-8") for a in collaborations])
    except StandardError:
        return None
Exemplo n.º 43
0
def openaire_register_doi(recid):
    """
    Register a DOI for new publication

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception("DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_new() or pid.is_reserved():
        logger.info("Registering DOI %s for record %s" % (doi_val, recid))

        url = "%s/record/%s" % (CFG_DATACITE_SITE_URL, recid)
        doc = format_record(recid, 'dcite')

        if not pid.register(url=url, doc=doc):
            m = "Failed to register DOI %s" % doi_val
            logger.error(m + "\n%s\n%s" % (url, doc))
            if not openaire_register_doi.request.is_eager:
                raise openaire_register_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully registered DOI %s." % doi_val)
def get_author_number(recid):
    """Gets number of authors."""
    author_list = get_fieldvalues(recid, "700__a")
    try:
        return len(author_list)
    except IndexError:
        return 0
Exemplo n.º 45
0
def openaire_delete_doi(recid):
    """
    Delete DOI in DataCite

    If it fails, it will retry every 10 minutes for 1 hour.
    """
    doi_val = get_fieldvalues(recid, "0247_a")[0]
    logger.debug("Found DOI %s in record %s" % (doi_val, recid))

    pid = PersistentIdentifier.get("doi", doi_val)
    if not pid:
        logger.debug("DOI not locally managed.")
        return
    else:
        logger.debug("DOI locally managed.")

    if not pid.has_object("rec", recid):
        raise Exception("DOI %s is not assigned to record %s." % (doi_val, recid))

    if pid.is_registered():
        logger.info("Inactivating DOI %s for record %s" % (doi_val, recid))

        if not pid.delete():
            m = "Failed to inactive DOI %s" % doi_val
            logger.error(m)
            if not openaire_delete_doi.request.is_eager:
                raise openaire_delete_doi.retry(exc=Exception(m))
        else:
            logger.info("Successfully inactivated DOI %s." % doi_val)
def create_xml(recid):
    record = get_record(recid)
    correct_record = {}
    record_add_field(correct_record, '001', controlfield_value=str(recid))
    field_instances = record_get_field_instances(record, tag[0:3],
                                                     tag[3], tag[4])
    correct_subfields = []
    for field_instance in field_instances:
        correct_subfields = []
        for code, value in field_instance[0]:
            if volume_letter:
                if code == 'p':
                    correct_subfields.append(('p', repl_journal))
                elif code == 'v':
                    volume = get_fieldvalues(recid, '773__v')
                    for v in volume:
                        if v[0].isalpha():
                            correct_subfields.append(('v', v))
                        else: 
                            new_volume = volume_letter + v
                            correct_subfields.append(('v', new_volume))
                else:
                    correct_subfields.append((code, value))
            else:
                if code == 'p':
                    correct_subfields.append(('p', repl_journal))
                else:
                    correct_subfields.append((code, value))
        record_add_field(correct_record, tag[0:3], tag[3], tag[4],
                             subfields=correct_subfields)
    return print_rec(correct_record)
Exemplo n.º 47
0
def unlinked(req):
    """
    Return an id-ordered list of citation log entries of at most 10000
    rows.
    """
    from invenio.dbquery import run_sql
    from invenio.search_engine import get_fieldvalues, get_collection_reclist
    useful_personids1 = intbitset(run_sql("SELECT distinct personid FROM aidPERSONIDDATA WHERE tag LIKE 'extid:%'"))
    useful_personids2 = intbitset(run_sql("SELECT distinct personid from aidPERSONIDPAPERS where flag=2"))
    linked_personids = intbitset(run_sql("SELECT personid FROM aidPERSONIDDATA WHERE tag='extid:INSPIREID'"))
    names = dict(run_sql("SELECT personid, data FROM aidPERSONIDDATA WHERE tag='canonical_name'"))
    matched_names = [name.lower().strip() for name in get_fieldvalues(get_collection_reclist('HepNames'), '035__a')]
    personid_to_match = (useful_personids1 | useful_personids2) - linked_personids

    body = ['<ol>']
    for personid in personid_to_match:
        name = names.get(personid, str(personid))
        if name.lower().strip() in matched_names:
            continue
        body.append('<li><a href="%(siteurl)s/author/profile/%(bai)s" target="_blank">%(bai)s</a></li>' % {
                'siteurl': escape(CFG_SITE_SECURE_URL, True),
                'bai': escape(name, True)})
    body.append('</ol>')
    body = '\n'.join(body)

    return page(req=req, body=body, title="Unlinked useful BAIs")
Exemplo n.º 48
0
def render_other_dataset_html(recid, display_link = True):
    """ Try to render the basic content of an unknown dataset, both for the tab and the record
    @param display_link Indicates if a link to the data record should be displayed
    @type display_link boolean
    """
    from invenio.search_engine import get_fieldvalues

    c = [] #collecting parts of the output
    c.append("<div style=\"background-color: #ececec; padding:10px;\">")

    comments = get_fieldvalues(recid, '520__h')
    if comments:
        comments = comments[0]

    c.append("<br />")
    c.append("<b>Description: </b> " + comments + "<br />")
    c.append("<br />")

    link_txt = "Go to the record"
    if display_link:
        c.append("<a href=\"%s/record/%s\">%s</a>" % (CFG_SITE_URL, str(recid), link_txt))

    c.append("<br /><br />")
    c.append("</div>")
    return "\n".join(c)
Exemplo n.º 49
0
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    return book_title
Exemplo n.º 50
0
def check_record(record):
    """ replace old ids in 999C50 with superseeding ids """

    for pos, val in record.iterfield('999C50'):
        if val:
            try:
                val = int(val)
            except ValueError:
                record.warn("invalid non-digit id in %r" % (pos, ))
                continue

            if val in superseeded_recids_cache():
                newrecs = set(get_fieldvalues(val, '970__d'))
                if len(newrecs) == 1:
                    newid = newrecs.pop()
                    try:
                        int(newid)
                    except ValueError:
                        record.warn("non digit value in 970__d for %r" %
                                    (pos, ))
                        continue
                    record.amend_field(pos, newid,
                                       "replaced %s with %s" % (val, newid))

                elif len(newrecs) > 1:
                    record.warn("more than one 970__d for %r" % (pos, ))
Exemplo n.º 51
0
def task_run_core():
    """
    run daemon
    """

    #write_message("Getting expired loans ...", verbose=9)
    expired_loans = get_expired_loan()

    for (borrower_id, loan_id, recid) in expired_loans:
        (number_of_letters, date_letters) = get_overdue_letters_info(loan_id)

        if number_of_letters == 0:
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id)
        elif number_of_letters == 1 and send_second_recall(date_letters):
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id)
        elif number_of_letters == 2 and send_third_recall(date_letters):
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)
        else:
            content = generate_email_body(CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)

        title = ''.join(get_fieldvalues(recid, "245__a"))
        subject = "LOAN RECALL: " + title
        update_expired_loan(loan_id)
        #write_message("Updating information about expired loans")
        send_overdue_letter(borrower_id, subject, content)
        #write_message("Sending overdue letter")

    #write_message("Done!!")

    return 1
Exemplo n.º 52
0
def task_run_core():
    """
    run daemon
    """

    #write_message("Getting expired loans ...", verbose=9)
    expired_loans = get_expired_loan()

    for (borrower_id, loan_id, recid) in expired_loans:
        (number_of_letters, date_letters) = get_overdue_letters_info(loan_id)

        if number_of_letters == 0:
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL1'], loan_id)
        elif number_of_letters == 1 and send_second_recall(date_letters):
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL2'], loan_id)
        elif number_of_letters == 2 and send_third_recall(date_letters):
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)
        else:
            content = generate_email_body(
                CFG_BIBCIRCULATION_TEMPLATES['RECALL3'], loan_id)

        title = ''.join(get_fieldvalues(recid, "245__a"))
        subject = "LOAN RECALL: " + title
        update_expired_loan(loan_id)
        #write_message("Updating information about expired loans")
        send_overdue_letter(borrower_id, subject, content)
        #write_message("Sending overdue letter")

    #write_message("Done!!")

    return 1
def get_affiliations(recid, long_flag):
    """Get affiliations using OSTI institution names."""
    affiliations = get_fieldvalues(recid, "100__u") \
                 + get_fieldvalues(recid, "700__u")
    affiliations.append("Fermilab")
    doe_affs = []
    doe_affs_long = []
    for aff in set(affiliations):
        #if aff in INSPIRE_AFF_DICT and not INSPIRE_AFF_DICT[aff] in doe_affs:
        if aff in INSPIRE_AFF_DICT:
            doe_affs.append(INSPIRE_AFF_DICT[aff])
            doe_affs_long.append(DOE_AFF_DICT[INSPIRE_AFF_DICT[aff]])
    if long_flag:
        return '; '.join([a for a in doe_affs_long])
    else:
        return '; '.join([a for a in doe_affs])
def book_title_from_MARC(recid):
    """
    Retrieve book's title from MARC

    @param recid: identify the record. Primary key of bibrec.
    @type recid: int

    @return book's title
    """

    book_title = ' '.join(get_fieldvalues(recid, "245__a") + \
                          get_fieldvalues(recid, "245__b") + \
                          get_fieldvalues(recid, "245__n") + \
                          get_fieldvalues(recid, "245__p"))

    return book_title
Exemplo n.º 55
0
def generate_list_to_send(search):
    '''
    Generate a list to send to MSNET.
    '''

    filename = 'tmp_' + __file__
    filename = re.sub('.py', '_send.txt', filename)
    output = open(filename, 'w')

    recids_nomatch = find_recids_nomatch()

    print search
    result_m = perform_request_search(p=search, cc='HEP')
    print search, len(result_m)
    search = "035__9:msnet"
    result_i = perform_request_search(p=search, cc='HEP')
    search = "0247_2:doi"
    result_d = perform_request_search(p=search, cc='HEP')
    result = intbitset(result_m) & intbitset(result_d) - intbitset(result_i)
    result = result - intbitset(recids_nomatch)
    for recid in result:
        try:
            doi = get_fieldvalues(recid, '0247_a')[0]
        except IndexError:
            print 'Problem with:', recid, doi
            break
        output.write(str(recid) + ',' + doi + '\n')
    output.close()
    print filename