Exemplo n.º 1
0
def solr_add_range(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            try:
                abstract = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0]), 'utf-8')
            except:
                abstract = ""
            try:
                first_author = remove_control_characters(get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0])
                additional_authors = remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), ''))
                author = unicode(first_author + " " + additional_authors, 'utf-8')
            except:
                author = ""
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(remove_control_characters(bibrecdocs.get_text()), 'utf-8')
            except:
                fulltext = ""
            try:
                keyword = unicode(remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), '')), 'utf-8')
            except:
                keyword = ""
            try:
                title = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_TITLE)[0]), 'utf-8')
            except:
                title = ""
            solr_add(recid, abstract, author, fulltext, keyword, title)

    SOLR_CONNECTION.commit()
    task_sleep_now_if_required(can_stop_too=True)
def oaigetrecord(args):
    """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.

    - if record does not exist, return oai_error 'idDoesNotExist'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return oai_error 'idDoesNotExist'.
    """

    arg = parse_args(args)
    out = ""
    sysno = oaigetsysno(arg['identifier'])
    _record_exists = record_exists(sysno)
    if _record_exists == 1 or \
           (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
        out = print_record(sysno, arg['metadataPrefix'], _record_exists)
        out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord")
    else:
        out = oai_error("idDoesNotExist", "invalid record Identifier")
        out = oai_error_header(args, "GetRecord") + out + oai_error_footer("GetRecord")
    return out
Exemplo n.º 3
0
def oaigetrecord(args):
    """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.

    - if record does not exist, return oai_error 'idDoesNotExist'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return oai_error 'idDoesNotExist'.
    """

    arg = parse_args(args)
    out = ""
    sysno = oaigetsysno(arg['identifier'])
    _record_exists = record_exists(sysno)
    if _record_exists == 1 or \
           (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
        out = print_record(sysno, arg['metadataPrefix'], _record_exists)
        out = oai_header(args, "GetRecord") + out + oai_footer("GetRecord")
    else:
        out = oai_error("idDoesNotExist", "invalid record Identifier")
        out = oai_error_header(
            args, "GetRecord") + out + oai_error_footer("GetRecord")
    return out
def oailistrecords(args):
    "Generates response to oailistrecords verb."

    arg = parse_args(args)

    out = ""
    resumptionToken_printed = False

    sysnos = []
    sysno  = []
    # check if the resumptionToken did not expire
    if arg['resumptionToken']:
        filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken'])
        if os.path.exists(filename) == 0:
            out = oai_error("badResumptionToken", "ResumptionToken expired")
            out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
            return out

    if arg['resumptionToken'] != "":
        sysnos = oaicacheout(arg['resumptionToken'])
        arg['metadataPrefix'] = sysnos.pop()
    else:
        sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until'])

    if len(sysnos) == 0: # noRecordsMatch error

        out = out + oai_error("noRecordsMatch", "no records correspond to the request")
        out = oai_error_header(args, "ListRecords") + out + oai_error_footer("ListRecords")
        return out

    i = 0
    for sysno_ in sysnos:
        if sysno_:
            if i >= CFG_OAI_LOAD:          # cache or write?
                if not resumptionToken_printed: # resumptionToken?
                    arg['resumptionToken'] = oaigenresumptionToken()
                    extdate = oaigetresponsedate(CFG_OAI_EXPIRE)
                    if extdate:
                        out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (out, extdate, arg['resumptionToken'])
                    else:
                        out = "%s <resumptionToken>%s</resumptionToken>\n" % (out, arg['resumptionToken'])
                    resumptionToken_printed = True
                sysno.append(sysno_)
            else:
                _record_exists = record_exists(sysno_)
                if not (_record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"):
                    #Produce output only if record exists and had to be printed
                    i = i + 1 # Increment limit only if record is returned
                    res = print_record(sysno_, arg['metadataPrefix'], _record_exists)
                    if res:
                        out += res

    if resumptionToken_printed:
        oaicacheclean()
        sysno.append(arg['metadataPrefix'])
        oaicachein(arg['resumptionToken'], sysno)

    out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords")
    return out
Exemplo n.º 5
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Exemplo n.º 6
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(
                get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Exemplo n.º 7
0
def Get_Recid_Number(parameters, curdir, form, user_info=None):
    """
    This function gets the value contained in the [edsrn] file and
    stores it in the 'rn' global variable which is the recid of the
    corresponding record.

    Parameters:

    * edsrn: Name of the file which stores the reference.  This
    value depends on the web form configuration you
    did. It should contain the name of the form element
    used for storing the reference of the document.
    """

    global rn, sysno

    # Path of file containing recid
    if os.path.exists("%s/%s" % (curdir, parameters['edsrn'])):
        try:
            fp = open("%s/%s" % (curdir, parameters['edsrn']), "r")
            rn = fp.read()
            rn = re.sub("[\n\r ]+", "", rn)
        except IOError:
            exception_prefix = "Error in WebSubmit function " \
                                "Get_Recid_Number. Tried to open " \
                                "edsrn file [%s/edsrn] but was " \
                                "unable to." % curdir
            register_exception(prefix=exception_prefix)
            rn = ""

    else:
        rn = ""

    if rn:
        act = form['act']
        if act not in ['APS']:
            try:
                if record_exists(int(rn)) == 1:
                    sysno = int(rn)
                    coll = get_fieldvalues(sysno, '980__a')[0]
                    if act == 'MBI':
                        if coll not in ['BLOG']:
                            raise InvenioWebSubmitFunctionStop(CFG_MODIFY_BLOG_ERROR)
                    if act == 'DBI':
                        if coll not in ['BLOG']:
                            raise InvenioWebSubmitFunctionStop(CFG_DELETE_BLOG_ERROR)
                    if act == 'DPI':
                        if coll not in ['BLOGPOST']:
                            raise InvenioWebSubmitFunctionStop(CFG_DELETE_POST_ERROR)
                else:
                    raise InvenioWebSubmitFunctionStop(CFG_ALERT_RECORD_NOT_FOUND % rn)
            except:
                raise InvenioWebSubmitFunctionStop(CFG_ALERT_RECORD_NOT_FOUND % rn)

    return ""
Exemplo n.º 8
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result[
                'resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result[
                'resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result[
                'resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record(print_record(recid, 'xm'))[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path):  #check if file doesn't exist
            result['resultCode'], result[
                'resultText'] = 1, 'Temporary file doesnt exist'
        else:  #open file
            tmpfile = open(file_path, 'r')
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result[
                    'resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result[
                'resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result[
            'resultText'] = 1, 'Invalid record mode for record2'
    return record
Exemplo n.º 9
0
        def getfile(req, form):
            args = wash_urlargd(form, websubmit_templates.files_default_urlargd)
            ln = args["ln"]

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args["verbose"]
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid="submit")

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _("Requested record does not seem to exist.")
                return warningMsg(msg, req, CFG_SITE_NAME, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _("Requested record does not seem to have been integrated.")
                return warningMsg(msg, req, CFG_SITE_NAME, ln)

            (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info["email"] == "guest":
                cookie = mail_cookie_create_authorize_action(
                    VIEWRESTRCOLL, {"collection": guess_primary_collection_of_a_record(self.recid)}
                )
                target = "/youraccount/login" + make_canonical_urlargd(
                    {"action": cookie, "ln": ln, "referer": CFG_SITE_URL + user_info["uri"]}, {}
                )
                return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                return page_not_authorized(req, "../", text=auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioWebSubmitFileError, e:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."),
                    _("The error has been logged and will be taken in consideration as soon as possible."),
                )
                return warningMsg(msg, req, CFG_SITE_NAME, ln)
Exemplo n.º 10
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e')
        if recids:
            restricted_recids = get_all_restricted_recids()
            for recid in recids:
                if record_exists(recid) > 0 and recid not in restricted_recids:
                    return recid
            if recid not in restricted_recids:
                return recid
    return None
Exemplo n.º 11
0
def oai_get_recid(identifier):
    """Returns the recid corresponding to the OAI identifier. Prefer a non deleted
    record if multiple recids matches but some of them are deleted (e.g. in
    case of merging). Returns None if no record matches."""
    if identifier:
        recids = search_pattern(p=identifier, f=CFG_OAI_ID_FIELD, m='e')
        if recids:
            restricted_recids = get_all_restricted_recids()
            for recid in recids:
                if record_exists(recid) > 0 and recid not in restricted_recids:
                    return recid
            if recid not in restricted_recids:
                return recid
    return None
Exemplo n.º 12
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record( print_record(recid, 'xm') )[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                       CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path): #check if file doesn't exist
            result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist'
        else: #open file
            tmpfile = open(file_path, 'r')
            record = create_record( tmpfile.read() )[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result['resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2'
    return record
def _get_breaking_news(lang, journal_name):
    """
    Gets the 'Breaking News' articles that are currently active according to
    start and end dates.
    """
    # CERN Bulletin only
    if not journal_name.lower() == 'cernbulletin':
        return ''
    # Look for active breaking news
    breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \
                            if record_exists(recid) == 1]
    today = time.mktime(time.localtime())
    breaking_news = ""
    for recid in breaking_news_recids:
        temp_rec = BibFormatObject(recid)
        try:
            end_date = time.mktime(time.strptime(temp_rec.field("925__b"),
                                                 "%m/%d/%Y"))
        except:
            end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y"))
        if end_date < today:
            continue
        try:
            start_date = time.mktime(time.strptime(temp_rec.field("925__a"),
                                                   "%m/%d/%Y"))
        except:
            start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y"))
        if start_date > today:
            continue
        publish_date = temp_rec.field("269__c")
        if lang == 'fr':
            title = temp_rec.field("246_1a")
        else:
            title = temp_rec.field("245__a")
        breaking_news += '''
<h2 class="%s">%s<br/>
    <strong>
        <a href="%s/journal/popup?name=%s&amp;type=breaking_news&amp;record=%s&amp;ln=%s" target="_blank">%s</a>
    </strong>
</h2>
''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title)
    if breaking_news:
        breaking_news = '<li>%s</li>' % breaking_news

    return breaking_news
Exemplo n.º 14
0
def get_existing_records_for_reportnumber(reportnum):
    """Given a report number, return a list of recids of real (live) records
       that are associated with it.
       That's to say if the record does not exist (prehaps deleted, for example)
       its recid will now be returned in the list.

       @param reportnum: the report number for which recids are to be returned.
       @type reportnum: string
       @return: list of recids.
       @rtype: list
       @note: If reportnum was not found in phrase indexes, the function searches
           directly in bibxxx tables via MARC tags, so that the record does not
           have to be phrase-indexed.
    """
    existing_records = []  ## List of the report numbers of existing records

    ## Get list of records with the report-number: (first in phrase indexes)
    reclist = list(search_pattern(req=None,
                                  p=reportnum,
                                  f="reportnumber",
                                  m="e"))
    if not reclist:
        # Maybe the record has not been indexed yet? (look in bibxxx tables)
        tags = get_field_tags("reportnumber")
        for tag in tags:
            recids = list(search_pattern(req=None,
                                         p=reportnum,
                                         f=tag,
                                         m="e"))
            reclist.extend(recids)

        reclist = dict.fromkeys(reclist).keys() # Remove duplicates

    ## Loop through all recids retrieved and testing to see whether the record
    ## actually exists or not. If none of the records exist, there is no record
    ## with this reportnumber; If more than one of the records exists, then
    ## there are multiple records with the report-number; If only one record
    ## exists, then everything is OK,
    for rec in reclist:
        rec_exists = record_exists(rec)
        if rec_exists == 1:
            ## This is a live record record the recid and augment the counter of
            ## records found:
            existing_records.append(rec)
    return existing_records
Exemplo n.º 15
0
def oailistmetadataformats(args):
    "Generates response to oailistmetadataformats verb."

    arg = parse_args(args)

    out = ""

    flag = 1  # list or not depending on identifier

    if arg['identifier'] != "":

        flag = 0

        sysno = oaigetsysno(arg['identifier'])
        _record_exists = record_exists(sysno)
        if _record_exists == 1 or \
               (_record_exists == -1 and CFG_OAI_DELETED_POLICY != "no"):

            flag = 1

        else:

            out = out + oai_error("idDoesNotExist",
                                  "invalid record Identifier")
            out = oai_error_header(
                args, "ListMetadataFormats") + out + oai_error_footer(
                    "ListMetadataFormats")
            return out

    if flag:
        out = out + "   <metadataFormat>\n"
        out = out + "    <metadataPrefix>oai_dc</metadataPrefix>\n"
        out = out + "    <schema>http://www.openarchives.org/OAI/1.1/dc.xsd</schema>\n"
        out = out + "    <metadataNamespace>http://purl.org/dc/elements/1.1/</metadataNamespace>\n"
        out = out + "   </metadataFormat>\n"
        out = out + "   <metadataFormat>\n"
        out = out + "    <metadataPrefix>marcxml</metadataPrefix>\n"
        # Infoscience modification
        out = out + "    <schema>http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd</schema>\n"
        out = out + "    <metadataNamespace>http://www.loc.gov/MARC21/slim</metadataNamespace>\n"
        out = out + "   </metadataFormat>\n"

    out = oai_header(
        args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
    return out
Exemplo n.º 16
0
def _get_breaking_news(lang, journal_name):
    """
    Gets the 'Breaking News' articles that are currently active according to
    start and end dates.
    """
    # CERN Bulletin only
    if not journal_name.lower() == 'cernbulletin':
        return ''
    # Look for active breaking news
    breaking_news_recids = [recid for recid in search_pattern(p='980__a:BULLETINBREAKING') \
                            if record_exists(recid) == 1]
    today = time.mktime(time.localtime())
    breaking_news = ""
    for recid in breaking_news_recids:
        temp_rec = BibFormatObject(recid)
        try:
            end_date = time.mktime(
                time.strptime(temp_rec.field("925__b"), "%m/%d/%Y"))
        except:
            end_date = time.mktime(time.strptime("01/01/1970", "%m/%d/%Y"))
        if end_date < today:
            continue
        try:
            start_date = time.mktime(
                time.strptime(temp_rec.field("925__a"), "%m/%d/%Y"))
        except:
            start_date = time.mktime(time.strptime("01/01/2050", "%m/%d/%Y"))
        if start_date > today:
            continue
        publish_date = temp_rec.field("269__c")
        if lang == 'fr':
            title = temp_rec.field("246_1a")
        else:
            title = temp_rec.field("245__a")
        breaking_news += '''
<h2 class="%s">%s<br/>
    <strong>
        <a href="%s/journal/popup?name=%s&amp;type=breaking_news&amp;record=%s&amp;ln=%s" target="_blank">%s</a>
    </strong>
</h2>
''' % ("", publish_date, CFG_SITE_URL, journal_name, recid, lang, title)
    if breaking_news:
        breaking_news = '<li>%s</li>' % breaking_news

    return breaking_news
Exemplo n.º 17
0
def _get_record(recid, uid, result, fresh_record=False):
    """Retrieve record structure.
    """
    record = None
    mtime = None
    cache_dirty = None
    record_status = record_exists(recid)
    existing_cache = cache_exists(recid, uid)
    if record_status == 0:
        result['resultCode'], result[
            'resultText'] = 1, 'Non-existent record: %s' % recid
    elif record_status == -1:
        result['resultCode'], result[
            'resultText'] = 1, 'Deleted record: %s' % recid
    elif not existing_cache and record_locked_by_other_user(recid, uid):
        result['resultCode'], result[
            'resultText'] = 1, 'Record %s locked by user' % recid
    elif existing_cache and cache_expired(recid, uid) and \
        record_locked_by_other_user(recid, uid):
        result['resultCode'], result[
            'resultText'] = 1, 'Record %s locked by user' % recid
    elif record_locked_by_queue(recid):
        result['resultCode'], result[
            'resultText'] = 1, 'Record %s locked by queue' % recid
    else:
        if fresh_record:
            delete_cache(recid, uid)
            existing_cache = False
        if not existing_cache:
            record_revision, record = create_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            cache_dirty = False
        else:
            tmpRes = get_cache_contents(recid, uid)
            cache_dirty, record_revision, record = tmpRes[0], tmpRes[
                1], tmpRes[2]
            touch_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            if not latest_record_revision(recid, record_revision):
                result['cacheOutdated'] = True
        result['resultCode'], result['resultText'], result[
            'cacheDirty'], result[
                'cacheMTime'] = 0, 'Record OK', cache_dirty, mtime
    record_order_subfields(record)
    return record
Exemplo n.º 18
0
def get_existing_records_for_reportnumber(reportnum):
    """Given a report number, return a list of recids of real (live) records
       that are associated with it.
       That's to say if the record does not exist (prehaps deleted, for example)
       its recid will now be returned in the list.

       @param reportnum: the report number for which recids are to be returned.
       @type reportnum: string
       @return: list of recids.
       @rtype: list
       @note: If reportnum was not found in phrase indexes, the function searches
           directly in bibxxx tables via MARC tags, so that the record does not
           have to be phrase-indexed.
    """
    existing_records = []  ## List of the report numbers of existing records

    ## Get list of records with the report-number: (first in phrase indexes)
    reclist = list(search_pattern(req=None,
                                  p=reportnum,
                                  f="reportnumber",
                                  m="e"))
    if not reclist:
        # Maybe the record has not been indexed yet? (look in bibxxx tables)
        tags = get_field_tags("reportnumber")
        for tag in tags:
            recids = list(search_pattern(req=None,
                                         p=reportnum,
                                         f=tag,
                                         m="e"))
            reclist.extend(recids)

        reclist = dict.fromkeys(reclist).keys() # Remove duplicates

    ## Loop through all recids retrieved and testing to see whether the record
    ## actually exists or not. If none of the records exist, there is no record
    ## with this reportnumber; If more than one of the records exists, then
    ## there are multiple records with the report-number; If only one record
    ## exists, then everything is OK,
    for rec in reclist:
        rec_exists = record_exists(rec)
        if rec_exists == 1:
            ## This is a live record record the recid and augment the counter of
            ## records found:
            existing_records.append(rec)
    return existing_records
Exemplo n.º 19
0
def oai_list_metadata_formats(argd):
    """Generates response to oai_list_metadata_formats verb."""

    if argd.get('identifier'):
        recid = oai_get_recid(argd['identifier'])
        _record_exists = record_exists(recid)
        if _record_exists != 1 and (_record_exists != -1 or CFG_OAI_DELETED_POLICY == "no"):
            return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])

    out = ""
    for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items():
        out += X.metadataFormat()(
            X.metadataPrefix(prefix),
            X.schema(schema),
            X.metadataNamespace(namespace)
        )

    return oai_header(argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
Exemplo n.º 20
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == "none":
        mode = "none"
    if mode == "recid":
        record_status = record_exists(recid)
        # check for errors
        if record_status == 0:
            result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid
        elif record_status == -1:
            result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid
        elif record_locked_by_queue(recid):
            result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid
        else:
            record = create_record(print_record(recid, "xm"))[0]

    elif mode == "tmpfile":
        file_path = "%s_%s.xml" % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path):  # check if file doesn't exist
            result["resultCode"], result["resultText"] = 1, "Temporary file doesnt exist"
        else:  # open file
            tmpfile = open(file_path, "r")
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == "revision":
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result["resultCode"], result["resultText"] = 1, "The specified revision does not exist"
        else:
            result["resultCode"], result["resultText"] = 1, "Invalid revision id"

    elif mode == "none":
        return {}

    else:
        result["resultCode"], result["resultText"] = 1, "Invalid record mode for record2"
    return record
Exemplo n.º 21
0
def oai_list_metadata_formats(argd):
    """Generates response to oai_list_metadata_formats verb."""

    if argd.get('identifier'):
        recid = oai_get_recid(argd['identifier'])
        _record_exists = record_exists(recid)
        if _record_exists != 1 and (_record_exists != -1
                                    or CFG_OAI_DELETED_POLICY == "no"):
            return oai_error(
                argd, [("idDoesNotExist",
                        "invalid record Identifier: %s" % argd['identifier'])])

    out = ""
    for prefix, (dummy, schema, namespace) in CFG_OAI_METADATA_FORMATS.items():
        out += X.metadataFormat()(X.metadataPrefix(prefix), X.schema(schema),
                                  X.metadataNamespace(namespace))

    return oai_header(
        argd, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
Exemplo n.º 22
0
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None):
    """Prints record 'recid' formatted according to 'prefix'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(recid) function (in order not to call that function
    again if already done.)
    """

    record_exists_result = record_exists(recid) == 1
    if record_exists_result:
        sets = get_field(recid, CFG_OAI_SET_FIELD)
        if set_spec is not None and not set_spec in sets and not [
                set_ for set_ in sets if set_.startswith("%s:" % set_spec)
        ]:
            ## the record is not in the requested set, and is not
            ## in any subset
            record_exists_result = False

    if record_exists_result:
        status = None
    else:
        status = 'deleted'

    if not record_exists_result and CFG_OAI_DELETED_POLICY not in (
            'persistent', 'transient'):
        return

    idents = get_field(recid, CFG_OAI_ID_FIELD)
    try:
        assert idents, "No OAI ID for record %s, please do your checks!" % recid
    except AssertionError, err:
        register_exception(alert_admin=True)
        return
Exemplo n.º 23
0
def solr_add_range(lower_recid, upper_recid, tags_to_index,
                   next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract = get_field_content_in_utf8(recid, 'abstract',
                                                 tags_to_index)
            author = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword = get_field_content_in_utf8(recid, 'keyword',
                                                tags_to_index)
            title = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
                abstract = unicode(
                    get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8')
            except:
                abstract = ""
            try:
                first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]
                additional_authors = reduce(
                    lambda x, y: x + " " + y,
                    get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME),
                    '')
                author = unicode(first_author + " " + additional_authors,
                                 'utf-8')
            except:
                author = ""
            try:
                fulltext = unicode(get_entire_fulltext(recid), 'utf-8')
            except:
                fulltext = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,
                                                           recid=recid)

    return next_commit_counter
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract        = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
            author          = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword         = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
            title           = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs  = BibRecDocs(recid)
                fulltext    = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext    = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)

    return next_commit_counter
Exemplo n.º 25
0
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract        = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
            author          = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword         = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
            title           = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs  = BibRecDocs(recid)
                fulltext    = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext    = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)

    return next_commit_counter
def oailistmetadataformats(args):
    "Generates response to oailistmetadataformats verb."

    arg = parse_args(args)

    out = ""

    flag = 1 # list or not depending on identifier

    if arg['identifier'] != "":

        flag = 0

        sysno = oaigetsysno(arg['identifier'])
        _record_exists = record_exists(sysno)
        if _record_exists == 1 or \
               (_record_exists == -1 and CFG_OAI_DELETED_POLICY != "no"):

            flag = 1

        else:

            out = out + oai_error("idDoesNotExist","invalid record Identifier")
            out = oai_error_header(args, "ListMetadataFormats") + out + oai_error_footer("ListMetadataFormats")
            return out

    if flag:
        out = out + "   <metadataFormat>\n"
        out = out + "    <metadataPrefix>oai_dc</metadataPrefix>\n"
        out = out + "    <schema>http://www.openarchives.org/OAI/1.1/dc.xsd</schema>\n"
        out = out + "    <metadataNamespace>http://purl.org/dc/elements/1.1/</metadataNamespace>\n"
        out = out + "   </metadataFormat>\n"
        out = out + "   <metadataFormat>\n"
        out = out + "    <metadataPrefix>marcxml</metadataPrefix>\n"
        out = out + "    <schema>http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd</schema>\n"
        out = out + "    <metadataNamespace>http://www.loc.gov/MARC21/slim</metadataNamespace>\n"
        out = out + "   </metadataFormat>\n"

    out = oai_header(args, "ListMetadataFormats") + out + oai_footer("ListMetadataFormats")
    return out
Exemplo n.º 27
0
def _get_record(recid, uid, result, fresh_record=False):
    """Retrieve record structure.
    """
    record = None
    mtime = None
    cache_dirty = None
    record_status = record_exists(recid)
    existing_cache = cache_exists(recid, uid)
    if record_status == 0:
        result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid
    elif record_status == -1:
        result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid
    elif not existing_cache and record_locked_by_other_user(recid, uid):
        result["resultCode"], result["resultText"] = 1, "Record %s locked by user" % recid
    elif existing_cache and cache_expired(recid, uid) and record_locked_by_other_user(recid, uid):
        result["resultCode"], result["resultText"] = 1, "Record %s locked by user" % recid
    elif record_locked_by_queue(recid):
        result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid
    else:
        if fresh_record:
            delete_cache_file(recid, uid)
            existing_cache = False
        if not existing_cache:
            record_revision, record = create_cache_file(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            cache_dirty = False
        else:
            tmpRes = get_cache_file_contents(recid, uid)
            cache_dirty, record_revision, record = tmpRes[0], tmpRes[1], tmpRes[2]
            touch_cache_file(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            if not latest_record_revision(recid, record_revision):
                result["cacheOutdated"] = True
        result["resultCode"], result["resultText"], result["cacheDirty"], result["cacheMTime"] = (
            0,
            "Record OK",
            cache_dirty,
            mtime,
        )
    return record
Exemplo n.º 28
0
def oai_get_record(argd):
    """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.

    - if record does not exist, return oai_error 'idDoesNotExist'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return oai_error 'idDoesNotExist'.
    """

    recid = oai_get_recid(argd['identifier'])
    _record_exists = record_exists(recid)
    if _record_exists == 1 or \
           (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
        out = print_record(recid, argd['metadataPrefix'], _record_exists)
        out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord")
    else:
        return oai_error(argd, [("idDoesNotExist", "invalid record Identifier: %s" % argd['identifier'])])
    return out
Exemplo n.º 29
0
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None):
    """Prints record 'recid' formatted according to 'prefix'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(recid) function (in order not to call that function
    again if already done.)
    """

    record_exists_result = record_exists(recid) == 1
    if record_exists_result:
        sets = get_field(recid, CFG_OAI_SET_FIELD)
        if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]:
            ## the record is not in the requested set, and is not
            ## in any subset
            record_exists_result = False

    if record_exists_result:
        status = None
    else:
        status = 'deleted'

    if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'):
        return

    idents = get_field(recid, CFG_OAI_ID_FIELD)
    try:
        assert idents, "No OAI ID for record %s, please do your checks!" % recid
    except AssertionError, err:
        register_exception(alert_admin=True)
        return
Exemplo n.º 30
0
def _get_record(recid, uid, result, fresh_record=False):
    """Retrieve record structure.
    """
    record = None
    mtime = None
    cache_dirty = None
    record_status = record_exists(recid)
    existing_cache = cache_exists(recid, uid)
    if record_status == 0:
        result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid
    elif record_status == -1:
        result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid
    elif not existing_cache and record_locked_by_other_user(recid, uid):
        result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid
    elif existing_cache and cache_expired(recid, uid) and \
        record_locked_by_other_user(recid, uid):
        result['resultCode'], result['resultText'] = 1, 'Record %s locked by user' % recid
    elif record_locked_by_queue(recid):
        result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid
    else:
        if fresh_record:
            delete_cache(recid, uid)
            existing_cache = False
        if not existing_cache:
            record_revision, record = create_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            cache_dirty = False
        else:
            tmpRes = get_cache_contents(recid, uid)
            cache_dirty, record_revision, record = tmpRes[0], tmpRes[1], tmpRes[2]
            touch_cache(recid, uid)
            mtime = get_cache_mtime(recid, uid)
            if not latest_record_revision(recid, record_revision):
                result['cacheOutdated'] = True
        result['resultCode'], result['resultText'], result['cacheDirty'], result['cacheMTime'] = 0, 'Record OK', cache_dirty, mtime
    record_order_subfields(record)
    return record
Exemplo n.º 31
0
def get_low_level_recIDs_from_control_no(control_no):
    """
    returns the list of EXISTING record ID(s) of the authority records
    corresponding to the given (INVENIO) MARC control_no
    (e.g. 'AUTHOR:(XYZ)abc123')
    (NB: the list should normally contain exactly 1 element)

    @param control_no: a (INVENIO) MARC internal control_no to an authority record
    @type control_no: string

    @return:: list containing the record ID(s) of the referenced authority record
        (should be only one)
    """
    # values returned
#    recIDs = []
    #check for correct format for control_no
#    control_no = ""
#    if CFG_BIBAUTHORITY_PREFIX_SEP in control_no:
#        auth_prefix, control_no = control_no.split(CFG_BIBAUTHORITY_PREFIX_SEP);
#        #enforce expected enforced_type if present
#        if (enforced_type is None) or (auth_prefix == enforced_type):
#            #low-level search needed e.g. for bibindex
#            hitlist = search_pattern(p='980__a:' + auth_prefix)
#            hitlist &= _get_low_level_recIDs_intbitset_from_control_no(control_no)
#            recIDs = list(hitlist)

    recIDs = list(_get_low_level_recIDs_intbitset_from_control_no(control_no))

    # filter out "DELETED" recIDs
    recIDs = [recID for recID in recIDs if record_exists(recID) > 0]

    # normally there should be exactly 1 authority record per control_number
    _assert_unique_control_no(recIDs, control_no)

    # return
    return recIDs
Exemplo n.º 32
0
def get_low_level_recIDs_from_control_no(control_no):
    """
    returns the list of EXISTING record ID(s) of the authority records
    corresponding to the given (INVENIO) MARC control_no
    (e.g. 'AUTHOR:(XYZ)abc123')
    (NB: the list should normally contain exactly 1 element)

    :param control_no: a (INVENIO) MARC internal control_no to an authority record
    :type control_no: string

    :return:: list containing the record ID(s) of the referenced authority record
        (should be only one)
    """
    # values returned
    # recIDs = []
    #check for correct format for control_no
    #    control_no = ""
    #    if CFG_BIBAUTHORITY_PREFIX_SEP in control_no:
    #        auth_prefix, control_no = control_no.split(CFG_BIBAUTHORITY_PREFIX_SEP);
    #        #enforce expected enforced_type if present
    #        if (enforced_type is None) or (auth_prefix == enforced_type):
    #            #low-level search needed e.g. for bibindex
    #            hitlist = search_pattern(p='980__a:' + auth_prefix)
    #            hitlist &= _get_low_level_recIDs_intbitset_from_control_no(control_no)
    #            recIDs = list(hitlist)

    recIDs = list(_get_low_level_recIDs_intbitset_from_control_no(control_no))

    # filter out "DELETED" recIDs
    recIDs = [recID for recID in recIDs if record_exists(recID) > 0]

    # normally there should be exactly 1 authority record per control_number
    _assert_unique_control_no(recIDs, control_no)

    # return
    return recIDs
Exemplo n.º 33
0
def oai_get_record(argd):
    """Returns record 'identifier' according to 'metadataPrefix' format for OAI metadata harvesting.

    - if record does not exist, return oai_error 'idDoesNotExist'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return oai_error 'idDoesNotExist'.
    """

    recid = oai_get_recid(argd['identifier'])
    _record_exists = record_exists(recid)
    if _record_exists == 1 or \
           (_record_exists == -1 and CFG_OAI_DELETED_POLICY != 'no'):
        out = print_record(recid, argd['metadataPrefix'], _record_exists)
        out = oai_header(argd, "GetRecord") + out + oai_footer("GetRecord")
    else:
        return oai_error(argd,
                         [("idDoesNotExist", "invalid record Identifier: %s" %
                           argd['identifier'])])
    return out
Exemplo n.º 34
0
def record_get_xml(recID,
                   format='xm',
                   decompress=zlib.decompress,
                   on_the_fly=False):
    """
    Returns an XML string of the record given by recID.

    The function builds the XML directly from the database,
    without using the standard formatting process.

    'format' allows to define the flavour of XML:
        - 'xm' for standard XML
        - 'marcxml' for MARC XML
        - 'oai_dc' for OAI Dublin Core
        - 'xd' for XML Dublin Core

    If record does not exist, returns empty string.
    If the record is deleted, returns an empty MARCXML (with recid
    controlfield, OAI ID fields and 980__c=DELETED)

    @param recID: the id of the record to retrieve
    @param on_the_fly: if False, try to fetch precreated one in database
    @return: the xml string of the record
    """
    from invenio.search_engine import record_exists

    def get_fieldvalues(recID, tag):
        """Return list of field values for field TAG inside record RECID."""
        out = []
        if tag == "001___":
            # we have asked for recID that is not stored in bibXXx tables
            out.append(str(recID))
        else:
            # we are going to look inside bibXXx tables
            digit = tag[0:2]
            bx = "bib%sx" % digit
            bibx = "bibrec_bib%sx" % digit
            query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \
                    "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag)
            res = run_sql(query)
            for row in res:
                out.append(row[0])
        return out

    def get_creation_date(recID, fmt="%Y-%m-%d"):
        "Returns the creation date of the record 'recID'."
        out = ""
        res = run_sql(
            "SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s",
            (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    def get_modification_date(recID, fmt="%Y-%m-%d"):
        "Returns the date of last modification for the record 'recID'."
        out = ""
        res = run_sql(
            "SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s",
            (fmt, recID), 1)
        if res:
            out = res[0][0]
        return out

    #_ = gettext_set_language(ln)

    out = ""

    # sanity check:
    record_exist_p = record_exists(recID)
    if record_exist_p == 0:  # doesn't exist
        return out

    # print record opening tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "  <record>\n"
        out += "   <header>\n"
        for identifier in get_fieldvalues(recID, CFG_OAI_ID_FIELD):
            out += "    <identifier>%s</identifier>\n" % identifier
        out += "    <datestamp>%s</datestamp>\n" % get_modification_date(recID)
        out += "   </header>\n"
        out += "   <metadata>\n"

    if format.startswith("xm") or format == "marcxml":
        res = None
        if on_the_fly == False:
            # look for cached format existence:
            query = """SELECT value FROM bibfmt WHERE
            id_bibrec='%s' AND format='%s'""" % (recID, format)
            res = run_sql(query, None, 1)
        if res and record_exist_p == 1:
            # record 'recID' is formatted in 'format', so print it
            out += "%s" % decompress(res[0][0])
        else:
            # record 'recID' is not formatted in 'format' -- they are
            # not in "bibfmt" table; so fetch all the data from
            # "bibXXx" tables:
            if format == "marcxml":
                out += """    <record xmlns="http://www.loc.gov/MARC21/slim">\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(
                    recID)
            elif format.startswith("xm"):
                out += """    <record>\n"""
                out += "        <controlfield tag=\"001\">%d</controlfield>\n" % int(
                    recID)
            if record_exist_p == -1:
                # deleted record, so display only OAI ID and 980:
                oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD)
                if oai_ids:
                    out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \
                           (CFG_OAI_ID_FIELD[0:3],
                            CFG_OAI_ID_FIELD[3:4],
                            CFG_OAI_ID_FIELD[4:5],
                            CFG_OAI_ID_FIELD[5:6],
                            oai_ids[0])
                out += "<datafield tag=\"980\" ind1=\" \" ind2=\" \"><subfield code=\"c\">DELETED</subfield></datafield>\n"
            else:
                # controlfields
                query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\
                        "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\
                        "ORDER BY bb.field_number, b.tag ASC" % recID
                res = run_sql(query)
                for row in res:
                    field, value = row[0], row[1]
                    value = encode_for_xml(value)
                    out += """        <controlfield tag="%s">%s</controlfield>\n""" % \
                           (encode_for_xml(field[0:3]), value)
                # datafields
                i = 1  # Do not process bib00x and bibrec_bib00x, as
                # they are controlfields. So start at bib01x and
                # bibrec_bib00x (and set i = 0 at the end of
                # first loop)
                for digit1 in range(0, 10):
                    for digit2 in range(i, 10):
                        bx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec='%s' AND b.id=bb.id_bibxxx AND b.tag LIKE '%s%%' "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bx,
                                                                         bibx,
                                                                         recID,
                                                                         str(digit1)+str(digit2))
                        res = run_sql(query)
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_" or ind1 == "":
                                ind1 = " "
                            if ind2 == "_" or ind2 == "":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or \
                                   field[:-1] != field_old[:-1]:
                                if field_number_old != -999:
                                    out += """        </datafield>\n"""
                                out += """        <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \
                                       (encode_for_xml(field[0:3]),
                                        encode_for_xml(ind1),
                                        encode_for_xml(ind2))
                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            value = encode_for_xml(value)
                            out += """            <subfield code="%s">%s</subfield>\n""" % \
                                   (encode_for_xml(field[-1:]), value)

                        # all fields/subfields printed in this run, so close the tag:
                        if field_number_old != -999:
                            out += """        </datafield>\n"""
                    i = 0  # Next loop should start looking at bib%0 and bibrec_bib00x
            # we are at the end of printing the record:
            out += "    </record>\n"

    elif format == "xd" or format == "oai_dc":
        # XML Dublin Core format, possibly OAI -- select only some bibXXx fields:
        out += """    <dc xmlns="http://purl.org/dc/elements/1.1/"
                         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                         xsi:schemaLocation="http://purl.org/dc/elements/1.1/
                                             http://www.openarchives.org/OAI/1.1/dc.xsd">\n"""
        if record_exist_p == -1:
            out += ""
        else:
            for f in get_fieldvalues(recID, "041__a"):
                out += "        <language>%s</language>\n" % f

            for f in get_fieldvalues(recID, "100__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "700__a"):
                out += "        <creator>%s</creator>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "245__a"):
                out += "        <title>%s</title>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "65017a"):
                out += "        <subject>%s</subject>\n" % encode_for_xml(f)

            for f in get_fieldvalues(recID, "8564_u"):
                out += "        <identifier>%s</identifier>\n" % encode_for_xml(
                    f)

            for f in get_fieldvalues(recID, "520__a"):
                out += "        <description>%s</description>\n" % encode_for_xml(
                    f)

            out += "        <date>%s</date>\n" % get_creation_date(recID)
        out += "    </dc>\n"

    # print record closing tags, if needed:
    if format == "marcxml" or format == "oai_dc":
        out += "   </metadata>\n"
        out += "  </record>\n"

    return out
Exemplo n.º 35
0
def format_element(bfo):
    """
    Special contract with US DoE Office of Scientific and Technical Information
    29/08/2013 R.A.

    Serializes selected record info as "OSTI" xml
    """

    try:
        from lxml import etree
    except ImportError:
        return

    from invenio.search_engine import perform_request_search, \
        get_fieldvalues, record_exists

    # a dictionary of Inspire subjects mapped to OSTI coded research categories
    osticats = {
        'Accelerators': '43 PARTICLE ACCELERATORS',
        'Computing':
        '99 GENERAL AND MISCELLANEOUS//MATHEMATICS, COMPUTING, AND INFORMATION SCIENCE',
        'Experiment-HEP': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'General Physics':
        '71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS',
        'Instrumentation':
        '46 INSTRUMENTATION RELATED TO NUCLEAR SCIENCE AND TECHNOLOGY',
        'Astrophysics': '71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS',
        'Lattice': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'Math and Math Physics':
        '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'Theory-Nucl': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'Experiment-Nucl': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'Phenomenology-HEP': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'Theory-HEP': '72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS',
        'Other': '71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS',
    }

    rec = etree.Element("rec")
    recid = bfo.recID
    if record_exists(recid) == -1:
        return etree.tostring(rec, encoding='UTF-8', declaration=False)

    node = etree.SubElement

    for recnum in [x.replace('SPIRES-','') for x in \
                       [r for r in get_fieldvalues(recid, "970__a") \
                            if r.startswith('SPIRES-')]]:
        # SPIRES record number is used if available to prevent OSTI loading
        # duplicate records from INSPIRE which they already got from SPIRES.
        node(rec, "accession_num").text = recnum
    if not rec.xpath('//accession_num'):
        # use regular inspire recid
        node(rec, "accession_num").text = str(recid)

    for title in get_fieldvalues(recid, "245__a"):
        node(rec, "title").text = unicode(title, "utf-8")

    # The authors in the ostixml are all strung together between author tags
    # delimited by ';' If zero or > 10 authors don't show any authors
    authors = get_fieldvalues(recid, "100__a") \
        + get_fieldvalues(recid, "700__a")

    if len(authors) <= 10 and len(authors) > 0:
        node(rec, 'author').text = \
        '; '.join([unicode(a, "utf-8") for a in authors])

    for category in get_fieldvalues(recid, "65017a"):
        if osticats.has_key(category):
            node(rec, 'subj_category').text = osticats[category]
            node(rec, 'subj_keywords').text = category

    for pubdate in get_fieldvalues(recid, "269__c"):
        node(rec, 'date').text = pubdate

    #Fermilab report numbers mapped to OSTI doc types
    for dtype in get_fieldvalues(recid, "037__a"):
        if 'fermilab' in dtype.lower():
            if "PUB" in dtype:
                doctype = 'JA'
            elif "CONF" in dtype:
                doctype = 'CO'
            elif "THESIS" in dtype:
                doctype = 'TD'
            else:
                doctype = 'TR'
            node(rec, 'doctype').text = doctype
    # One MARC field is used for conferences and journals.  So, the following
    # journal coding handles the variations, and outputs journal and
    # conf. cites in a nice order.  If the conf has a cnum, we get the conf
    # info from its separate record in the conf. "collection."  There are a
    # few if-then gymnastics to cover possible missing information and still
    # make a note that looks okay (sort of)
    journals = bfo.fields('773__', repeatable_subfields_p=True)
    for journal in journals:
        if journal.has_key('p'):
            jinfo = str(journal['p'][0])
            if journal.has_key('v'):
                jinfo += ' %s' % journal['v'][0]
            if journal.has_key('c'):
                jinfo += ':%s' % journal['c'][0]
            if journal.has_key('y'):
                jinfo += ',%s' % journal['y'][0]
            node(rec, 'journal_info').text = unicode(jinfo, "utf-8")

        confstring = ''
        # without t info or cnum don't print anything
        if journal.has_key('t'):
            confstring += '%s: ' % journal['t'][0]
        if journal.has_key('w'):
            conf_info = {}
            cnum = journal['w'][0].replace("/", "-")
            idrec = perform_request_search(p="111__g:" + str(cnum), \
                                               c='Conferences')
            if idrec:
                for subfield in ('a', 'c', 'd'):
                    val = get_fieldvalues(idrec[0], '111__%s' % subfield, \
                                              repetitive_values=True)
                    if val:
                        conf_info[subfield] = val[0]
                confstring += '%s. %s, %s.' % \
                    tuple(conf_info.get(x, '') for x in ('a','c','d'))
        if journal.has_key('c') and confstring != '':
            confstring += ' pp: %s' % journal['c'][0]

    for doi in get_fieldvalues(recid, "0247_a"):
        node(rec, 'doi').text = doi

    if journals and confstring != '':
        # because it has to come after doi (?)
        # although order is not guaranteed for XML serialization
        node(rec, 'conf_info').text = unicode(confstring, "utf-8")

    for pages in get_fieldvalues(recid, "300__a"):
        node(rec, 'format').text = '%s pages' % pages

    for lang in get_fieldvalues(recid, "041__a"):
        node(rec, 'language').text = lang
    # As with journals, eprints are in with report nos. in our MARC format
    # so they have to be separated out
    eprint = ''
    for repno in get_fieldvalues(recid, "037__a"):
        if "arXiv" in repno:
            eprint = repno
            node(rec, 'arXiv_eprint').text = \
                'arXiv eprint number %s' % unicode(repno, "utf-8")
        else:
            node(rec, 'report_number').text = unicode(repno, "utf-8")

    urls = bfo.fields('8564_', repeatable_subfields_p=True)
    for url in urls:
        if url.has_key('y') and "FERMILAB" in url['y'][0] and url.has_key('u'):
            node(rec, 'url').text = '%s.pdf' % url['u'][0]

    if eprint:
        node(rec, 'availability').text = \
            'http://arXiv.org/abs/%s' % eprint

    node(rec, 'sponsor_org').text = 'DOE Office of Science'

    dt_harvest = get_modification_date(recid)
    if dt_harvest:
        node(rec, 'dt_harvest').text = dt_harvest
    else:
        # fallback to SPIRES era marc
        for date in get_fieldvalues(recid, "961__c"):
            node(rec, 'dt_harvest').text = date

    out = etree.tostring(rec, encoding='UTF-8', xml_declaration=False, \
                              pretty_print=True, method='xml').rstrip('\n')
    return out
Exemplo n.º 36
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """
    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str(
            uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR,
                                    xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description',
                                       description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat',
                                     subformat)
                if (comment == m_comment and description == m_description
                        and subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(
                            124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" %
                                batch_job['recid'])
            task_update_progress("Video master for record %d not found" %
                                 batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(
        batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job,
                                            'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" %
                                bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension,
                getval(batch_job, 'bibdoc_master_subformat', 'master'))
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job['input'],
                version=1,
                description=getval(batch_job, 'bibdoc_master_description'),
                comment=getval(batch_job, 'bibdoc_master_comment'),
                docformat=master_format)

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------" %
                            (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(
                job['bibdoc_docname']).safe_substitute(
                    {'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(
                    job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname',
                                                bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" %
                (bibdoc_slave_video_docname, bibdoc_video_extension,
                 bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job['input'],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, 'audiocodec'),
                vcodec=getval(job, 'videocodec'),
                abitrate=getval(job, 'videobitrate'),
                vbitrate=getval(job, 'audiobitrate'),
                resolution=getval(job, 'resolution'),
                passes=getval(job, 'passes', 1),
                special=getval(job, 'special'),
                specialfirst=getval(job, 'specialfirst'),
                specialsecond=getval(job, 'specialsecond'),
                metadata=getval(job, 'metadata'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),  # Aspect for every job
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message)
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(bibdoc_video_directory,
                                 bibdoc_video_extension,
                                 bibdoc_video_subformat, 1,
                                 bibdoc_slave_video_docname))
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                             bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(
                        getval(job, 'bibdoc_description'), bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname',
                                          bibdoc_video_docname)
            tmpfname = (
                tmpdir + "/" + bibdoc_frame_docname + '.' +
                getval(profile, 'extension', getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(
                input_file=batch_job['input'],
                output_file=tmpfname,
                size=getval(job, 'size'),
                positions=getval(job, 'positions'),
                numberof=getval(job, 'numberof'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(
                        filename)
                    _task_write_message("Creating new bibdoc for %s" %
                                        bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(
                            docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(
                        bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(
                            bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc" %
                                        (bibdoc_frame_docname,
                                         getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, 'bibdoc_description'),
                        comment=getval(job, 'bibdoc_comment'),
                        docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'),
                                 pbcoreIdentifier=batch_job['recid'],
                                 aspect_override=getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern',
                      '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str())):
                _notify_error_admin(batch_job, getval(batch_job,
                                                      'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
    return 1
Exemplo n.º 37
0
def print_record(recid, prefix='marcxml', verb='ListRecords', set_spec=None, set_last_updated=None):
    """Prints record 'recid' formatted according to 'prefix'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    """

    record_exists_result = record_exists(recid) == 1
    if record_exists_result:
        sets = get_field(recid, CFG_OAI_SET_FIELD)
        if set_spec is not None and not set_spec in sets and not [set_ for set_ in sets if set_.startswith("%s:" % set_spec)]:
            ## the record is not in the requested set, and is not
            ## in any subset
            record_exists_result = False

    if record_exists_result:
        status = None
    else:
        status = 'deleted'

    if not record_exists_result and CFG_OAI_DELETED_POLICY not in ('persistent', 'transient'):
        return ""

    idents = get_field(recid, CFG_OAI_ID_FIELD)
    if not idents:
        return ""
    ## FIXME: Move these checks in a bibtask
    #try:
        #assert idents, "No OAI ID for record %s, please do your checks!" % recid
    #except AssertionError, err:
        #register_exception(alert_admin=True)
        #return ""
    #try:
        #assert len(idents) == 1, "More than OAI ID found for recid %s. Considering only the first one, but please do your checks: %s" % (recid, idents)
    #except AssertionError, err:
        #register_exception(alert_admin=True)
    ident = idents[0]

    header_body = EscapedXMLString('')
    header_body += X.identifier()(ident)
    if set_last_updated:
        header_body += X.datestamp()(max(get_modification_date(recid), set_last_updated))
    else:
        header_body += X.datestamp()(get_modification_date(recid))
    for set_spec in get_field(recid, CFG_OAI_SET_FIELD):
        if set_spec and set_spec != CFG_OAI_REPOSITORY_GLOBAL_SET_SPEC:
            # Print only if field not empty
            header_body += X.setSpec()(set_spec)

    header = X.header(status=status)(header_body)

    if verb == 'ListIdentifiers':
        return header
    else:
        if record_exists_result:
            metadata_body = format_record(recid, CFG_OAI_METADATA_FORMATS[prefix][0])
            metadata = X.metadata(body=metadata_body)
            provenance_body = get_record_provenance(recid)
            if provenance_body:
                provenance = X.about(body=provenance_body)
            else:
                provenance = ''
            rights_body = get_record_rights(recid)
            if rights_body:
                rights = X.about(body=rights_body)
            else:
                rights = ''
        else:
            metadata = ''
            provenance = ''
            rights = ''
        return X.record()(header, metadata, provenance, rights)
Exemplo n.º 38
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        return create_record(print_record(recid, 'xm'))[0]
Exemplo n.º 39
0
        def getfile(req, form):
            args = wash_urlargd(form,
                                bibdocfile_templates.files_default_urlargd)
            ln = args['ln']

            _ = gettext_set_language(ln)

            uid = getUid(req)
            user_info = collect_user_info(req)

            verbose = args['verbose']
            if verbose >= 1 and not isUserSuperAdmin(user_info):
                # Only SuperUser can see all the details!
                verbose = 0

            if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1:
                return page_not_authorized(req,
                                           "/%s/%s" %
                                           (CFG_SITE_RECORD, self.recid),
                                           navmenuid='submit')

            if record_exists(self.recid) < 1:
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to exist.")
                return warning_page(msg, req, ln)

            if record_empty(self.recid):
                msg = "<p>%s</p>" % _(
                    "Requested record does not seem to have been integrated.")
                return warning_page(msg, req, ln)

            (auth_code,
             auth_message) = check_user_can_view_record(user_info, self.recid)
            if auth_code and user_info['email'] == 'guest':
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    cookie = mail_cookie_create_authorize_action(
                        VIEWRESTRCOLL, {
                            'collection':
                            guess_primary_collection_of_a_record(self.recid)
                        })
                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                             make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                                     CFG_SITE_SECURE_URL + user_info['uri']}, {})
                    return redirect_to_url(req, target, norobot=True)
            elif auth_code:
                if webjournal_utils.is_recid_in_released_issue(self.recid):
                    # We can serve the file
                    pass
                else:
                    return page_not_authorized(req, "../", \
                                               text = auth_message)

            readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1

            # From now on: either the user provided a specific file
            # name (and a possible version), or we return a list of
            # all the available files. In no case are the docids
            # visible.
            try:
                bibarchive = BibRecDocs(self.recid)
            except InvenioBibDocFileError:
                register_exception(req=req, alert_admin=True)
                msg = "<p>%s</p><p>%s</p>" % (
                    _("The system has encountered an error in retrieving the list of files for this document."
                      ),
                    _("The error has been logged and will be taken in consideration as soon as possible."
                      ))
                return warning_page(msg, req, ln)

            if bibarchive.deleted_p():
                req.status = apache.HTTP_GONE
                return warning_page(
                    _("Requested record does not seem to exist."), req, ln)

            docname = ''
            docformat = ''
            version = ''
            warn = ''

            if filename:
                # We know the complete file name, guess which docid it
                # refers to
                ## TODO: Change the extension system according to ext.py from setlink
                ##       and have a uniform extension mechanism...
                docname = file_strip_ext(filename)
                docformat = filename[len(docname):]
                if docformat and docformat[0] != '.':
                    docformat = '.' + docformat
                if args['subformat']:
                    docformat += ';%s' % args['subformat']
            else:
                docname = args['docname']

            if not docformat:
                docformat = args['format']
                if args['subformat']:
                    docformat += ';%s' % args['subformat']

            if not version:
                version = args['version']

            ## Download as attachment
            is_download = False
            if args['download']:
                is_download = True

            # version could be either empty, or all or an integer
            try:
                int(version)
            except ValueError:
                if version != 'all':
                    version = ''

            display_hidden = isUserSuperAdmin(user_info)

            if version != 'all':
                # search this filename in the complete list of files
                for doc in bibarchive.list_bibdocs():
                    if docname == bibarchive.get_docname(doc.id):
                        try:
                            try:
                                docfile = doc.get_file(docformat, version)
                            except InvenioBibDocFileError, msg:
                                req.status = apache.HTTP_NOT_FOUND
                                if not CFG_INSPIRE_SITE and req.headers_in.get(
                                        'referer'):
                                    ## There must be a broken link somewhere.
                                    ## Maybe it's good to alert the admin
                                    register_exception(req=req,
                                                       alert_admin=True)
                                warn += write_warning(
                                    _("The format %s does not exist for the given version: %s"
                                      ) % (cgi.escape(docformat),
                                           cgi.escape(str(msg))))
                                break
                            (auth_code,
                             auth_message) = docfile.is_restricted(user_info)
                            if auth_code != 0 and not is_user_owner_of_record(
                                    user_info, self.recid):
                                if CFG_BIBDOCFILE_ICON_SUBFORMAT_RE.match(
                                        get_subformat_from_format(docformat)):
                                    return stream_restricted_icon(req)
                                if user_info['email'] == 'guest':
                                    cookie = mail_cookie_create_authorize_action(
                                        'viewrestrdoc',
                                        {'status': docfile.get_status()})
                                    target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                                    make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \
                                        CFG_SITE_SECURE_URL + user_info['uri']}, {})
                                    redirect_to_url(req, target)
                                else:
                                    req.status = apache.HTTP_UNAUTHORIZED
                                    warn += write_warning(
                                        _("This file is restricted: ") +
                                        str(auth_message))
                                    break

                            if not docfile.hidden_p():
                                if not readonly:
                                    ip = str(req.remote_ip)
                                    doc.register_download(
                                        ip, docfile.get_version(), docformat,
                                        uid, self.recid)
                                try:
                                    return docfile.stream(req,
                                                          download=is_download)
                                except InvenioBibDocFileError, msg:
                                    register_exception(req=req,
                                                       alert_admin=True)
                                    req.status = apache.HTTP_INTERNAL_SERVER_ERROR
                                    warn += write_warning(
                                        _("An error has happened in trying to stream the request file."
                                          ))
                            else:
                                req.status = apache.HTTP_UNAUTHORIZED
                                warn += write_warning(
                                    _("The requested file is hidden and can not be accessed."
                                      ))

                        except InvenioBibDocFileError, msg:
                            register_exception(req=req, alert_admin=True)
Exemplo n.º 40
0
    def __call__(self, req, form):
        argd = wash_search_urlargd(form)

        argd['recid'] = self.recid

        argd['tab'] = self.tab

        # do we really enter here ?

        if self.format is not None:
            argd['of'] = self.format
        req.argd = argd
        uid = getUid(req)
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this record.",
                                       navmenuid='search')
        elif uid > 0:
            pref = get_user_preferences(uid)
            try:
                if not form.has_key('rg'):
                    # fetch user rg preference only if not overridden via URL
                    argd['rg'] = int(pref['websearch_group_records'])
            except (KeyError, ValueError):
                pass

        user_info = collect_user_info(req)
        (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)

        if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
            argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS

        #check if the user has rights to set a high wildcard limit
        #if not, reduce the limit set by user, with the default one
        if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
            if acc_authorize_action(req, 'runbibedit')[0] != 0:
                argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

        # only superadmins can use verbose parameter for obtaining debug information
        if not isUserSuperAdmin(user_info):
            argd['verbose'] = 0

        if auth_code and user_info['email'] == 'guest':
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                    make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
            return redirect_to_url(req, target, norobot=True)
        elif auth_code:
            return page_not_authorized(req, "../", \
                text=auth_msg, \
                navmenuid='search')

        from invenio.search_engine import record_exists, get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(argd['recid'])
        merged_recid = get_merged_recid(argd['recid'])
        if record_status == -1 and merged_recid:
            url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s'
            url %= (str(merged_recid), argd['ln'])
            redirect_to_url(req, url)
        elif record_status == -1:
            req.status = apache.HTTP_GONE ## The record is gone!

        # mod_python does not like to return [] in case when of=id:
        out = perform_request_search(req, **argd)
        if isinstance(out, intbitset):
            return out.fastdump()
        elif out == []:
            return str(out)
        else:
            return out
Exemplo n.º 41
0
def format_record(recID,
                  of,
                  ln=CFG_SITE_LANG,
                  verbose=0,
                  search_pattern=None,
                  xml_record=None,
                  user_info=None,
                  on_the_fly=False):
    """
    Format a record in given output format.

    Return a formatted version of the record in the specified
    language, search pattern, and with the specified output format.
    The function will define which format template must be applied.

    The record to be formatted can be specified with its ID (with
    'recID' parameter) or given as XML representation (with
    'xml_record' parameter). If 'xml_record' is specified 'recID' is
    ignored (but should still be given for reference. A dummy recid 0
    or -1 could be used).

    'user_info' allows to grant access to some functionalities on a
    page depending on the user's priviledges. The 'user_info' object
    makes sense only in the case of on-the-fly formatting. 'user_info'
    is the same object as the one returned by
    'webuser.collect_user_info(req)'

    @param recID: the ID of record to format.
    @type recID: int
    @param of: an output format code (or short identifier for the output format)
    @type of: string
    @param ln: the language to use to format the record
    @type ln: string
    @param verbose: the level of verbosity from 0 to 9 (O: silent,
                                                       5: errors,
                                                       7: errors and warnings, stop if error in format elements
                                                       9: errors and warnings, stop if error (debug mode ))
    @type verbose: int
    @param search_pattern: list of strings representing the user request in web interface
    @type search_pattern: list(string)
    @param xml_record: an xml string represention of the record to format
    @type xml_record: string or None
    @param user_info: the information of the user who will view the formatted page (if applicable)
    @param on_the_fly: if False, try to return an already preformatted version of the record in the database
    @type on_the_fly: boolean
    @return: formatted record
    @rtype: string
    """
    from invenio.search_engine import record_exists
    if search_pattern is None:
        search_pattern = []

    out = ""

    if verbose == 9:
        out += """\n<span class="quicknote">
        Formatting record %i with output format %s.
        </span>""" % (recID, of)
    ############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
    if CFG_BIBFORMAT_USE_OLD_BIBFORMAT and CFG_PATH_PHP:
        return bibformat_engine.call_old_bibformat(recID,
                                                   of=of,
                                                   on_the_fly=on_the_fly)
    ############################# END ##################################
    if not on_the_fly and \
       (ln == CFG_SITE_LANG or \
        of.lower() == 'xm' or \
        CFG_BIBFORMAT_USE_OLD_BIBFORMAT or \
        (of.lower() in CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS)) and \
        record_exists(recID) != -1:
        # Try to fetch preformatted record. Only possible for records
        # formatted in CFG_SITE_LANG language (other are never
        # stored), or of='xm' which does not depend on language.
        # Exceptions are made for output formats defined in
        # CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS, which are
        # always served from the same cache for any language.  Also,
        # do not fetch from DB when record has been deleted: we want
        # to return an "empty" record in that case
        res = bibformat_dblayer.get_preformatted_record(recID, of)
        if res is not None:
            # record 'recID' is formatted in 'of', so return it
            if verbose == 9:
                last_updated = bibformat_dblayer.get_preformatted_record_date(
                    recID, of)
                out += """\n<br/><span class="quicknote">
                Found preformatted output for record %i (cache updated on %s).
                </span><br/>""" % (recID, last_updated)
            if of.lower() == 'xm':
                res = filter_hidden_fields(res, user_info)
            # try to replace language links in pre-cached res, if applicable:
            if ln != CFG_SITE_LANG and of.lower(
            ) in CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS:
                # The following statements try to quickly replace any
                # language arguments in URL links.  Not an exact
                # science, but should work most of the time for most
                # of the formats, with not too many false positives.
                # We don't have time to parse output much here.
                res = res.replace('?ln=' + CFG_SITE_LANG, '?ln=' + ln)
                res = res.replace('&ln=' + CFG_SITE_LANG, '&ln=' + ln)
                res = res.replace('&amp;ln=' + CFG_SITE_LANG, '&amp;ln=' + ln)
            out += res
            return out
        else:
            if verbose == 9:
                out += """\n<br/><span class="quicknote">
                No preformatted output found for record %s.
                </span>""" % recID

    # Live formatting of records in all other cases
    if verbose == 9:
        out += """\n<br/><span class="quicknote">
        Formatting record %i on-the-fly.
        </span>""" % recID

    try:
        out += bibformat_engine.format_record(recID=recID,
                                              of=of,
                                              ln=ln,
                                              verbose=verbose,
                                              search_pattern=search_pattern,
                                              xml_record=xml_record,
                                              user_info=user_info)
        if of.lower() == 'xm':
            out = filter_hidden_fields(out, user_info)
        return out
    except Exception, e:
        register_exception(prefix="An error occured while formatting record %i in %s" % \
                           (recID, of),
                           alert_admin=True)
        #Failsafe execution mode
        import invenio.template
        websearch_templates = invenio.template.load('websearch')
        if verbose == 9:
            out += """\n<br/><span class="quicknote">
            An error occured while formatting record %i. (%s)
            </span>""" % (recID, str(e))
        if of.lower() == 'hd':
            if verbose == 9:
                out += """\n<br/><span class="quicknote">
                Formatting record %i with websearch_templates.tmpl_print_record_detailed.
                </span><br/>""" % recID
                return out + websearch_templates.tmpl_print_record_detailed(
                    ln=ln,
                    recID=recID,
                )
        if verbose == 9:
            out += """\n<br/><span class="quicknote">
            Formatting record %i with websearch_templates.tmpl_print_record_brief.
            </span><br/>""" % recID
        return out + websearch_templates.tmpl_print_record_brief(
            ln=ln,
            recID=recID,
        )
def Ask_For_Record_Details_Confirmation(parameters, \
                                        curdir, \
                                        form, \
                                        user_info=None):
    """
       Display the details of a record on which some operation is to be carried
       out and prompt for the user's confirmation that it is the correct record.
       Upon the clicking of the confirmation button, augment step by one.

       Given the "recid" (001) of a record, retrieve the basic metadata
       (title, report-number(s) and author(s)) and display them in the
       user's browser along with a prompt asking them to confirm that
       it is indeed the record that they expected to see.

       The function depends upon the presence of the "sysno" global and the
       presence of the "step" field in the "form" parameter.
       When the user clicks on the "confirm" button, step will be augmented by
       1 and the form will be submitted.
       @parameters: None.
       @return: None.
       @Exceptions raise: InvenioWebSubmitFunctionError if problems are
        encountered;
        InvenioWebSubmitFunctionStop in order to display the details of the
        record and the confirmation message.
    """
    global sysno

    ## Make sure that we know the current step:
    try:
        current_step = int(form['step'])
    except TypeError:
        ## Can't determine step.
        msg = "Unable to determine submission step. Cannot continue."
        raise InvenioWebSubmitFunctionError(msg)
    else:
        newstep = current_step + 1

    ## Make sure that the sysno is valid:
    try:
        working_recid = int(sysno)
    except TypeError:
        ## Unable to find the details of this record - cannot query the database
        msg = "Unable to retrieve details of record - record id was invalid."
        raise InvenioWebSubmitFunctionError(msg)

    if not record_exists(working_recid):
        ## Record doesn't exist.
        msg = "Unable to retrieve details of record [%s] - record does not " \
              "exist." % working_recid
        raise InvenioWebSubmitFunctionError(msg)

    ## Retrieve the details to be displayed:
    ##
    ## Author(s):
    rec_authors = ""
    rec_first_author    = print_record(int(sysno), 'tm', "100__a")
    rec_other_authors   = print_record(int(sysno), 'tm', "700__a")
    if rec_first_author != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_first_author.split("\n")])
    if rec_other_authors != "":
        rec_authors += "".join(["%s<br />\n" % cgi.escape(author.strip()) for \
                                author in rec_other_authors.split("\n")])

    ## Title:
    rec_title = "".join(["%s<br />\n" % cgi.escape(title.strip()) for title in \
                          print_record(int(sysno), 'tm', "245__a").split("\n")])

    ## Report numbers:
    rec_reportnums = ""
    rec_reportnum        = print_record(int(sysno), 'tm', "037__a")
    rec_other_reportnums = print_record(int(sysno), 'tm', "088__a")
    if rec_reportnum != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in rec_reportnum.split("\n")])
    if rec_other_reportnums != "":
        rec_reportnums += "".join(["%s<br />\n" % cgi.escape(repnum.strip()) \
                                   for repnum in \
                                   rec_other_reportnums.split("\n")])

    raise InvenioWebSubmitFunctionStop(CFG_DOCUMENT_DETAILS_MESSAGE % \
                                  { 'report-numbers' : rec_reportnums, \
                                    'title'          : rec_title, \
                                    'author'         : rec_authors, \
                                    'newstep'        : newstep, \
                                    'admin-email'    : CFG_SITE_ADMIN_EMAIL, \
                                  }   )
    def display(self, req, form):
        """
        Show the tab 'holdings'.
        """

        argd = wash_urlargd(form, {'do': (str, "od"),
                                   'ds': (str, "all"),
                                   'nb': (int, 100),
                                   'p': (int, 1),
                                   'voted': (int, -1),
                                   'reported': (int, -1),
                                   })
        _ = gettext_set_language(argd['ln'])

        record_exists_p = record_exists(self.recid)
        if record_exists_p != 1:
            if record_exists_p == -1:
                msg = _("The record has been deleted.")
            else:
                msg = _("Requested record does not seem to exist.")
            msg = '<span class="quicknote">' + msg + '</span>'
            title, description, keywords = \
                   websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln'])
            return page(title = title,
                        show_title_p = False,
                        body = msg,
                        description = description,
                        keywords = keywords,
                        uid = getUid(req),
                        language = argd['ln'],
                        req = req,
                        navmenuid='search')

        body = perform_get_holdings_information(self.recid, req, argd['ln'])

        uid = getUid(req)


        user_info = collect_user_info(req)
        (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)
        if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']:
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            target = '/youraccount/login' + \
                make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                CFG_SITE_URL + user_info['uri']}, {})
            return redirect_to_url(req, target, norobot=True)
        elif auth_code:
            return page_not_authorized(req, "../", \
                text = auth_msg)


        unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(self.recid)),
                                                    self.recid,
                                                    ln=argd['ln'])
        ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()]
        ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
        link_ln = ''
        if argd['ln'] != CFG_SITE_LANG:
            link_ln = '?ln=%s' % argd['ln']
        tabs = [(unordered_tabs[tab_id]['label'], \
                 '%s/record/%s/%s%s' % (CFG_SITE_URL, self.recid, tab_id, link_ln), \
                 tab_id in ['holdings'],
                 unordered_tabs[tab_id]['enabled']) \
                for (tab_id, _order) in ordered_tabs_id
                if unordered_tabs[tab_id]['visible'] == True]
        top = webstyle_templates.detailed_record_container_top(self.recid,
                                                               tabs,
                                                               argd['ln'])
        bottom = webstyle_templates.detailed_record_container_bottom(self.recid,
                                                                     tabs,
                                                                     argd['ln'])

        title = websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln'])[0]
        navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid), ln=argd['ln'])
        navtrail += ' &gt; <a class="navtrail" href="%s/record/%s?ln=%s">'% (CFG_SITE_URL, self.recid, argd['ln'])
        navtrail += title
        navtrail += '</a>'

        return pageheaderonly(title=title,
                              navtrail=navtrail,
                              uid=uid,
                              verbose=1,
                              req=req,
                              metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_URL,
                              language=argd['ln'],
                              navmenuid='search',
                              navtrail_append_title_p=0) + \
                              websearch_templates.tmpl_search_pagestart(argd['ln']) + \
                              top + body + bottom + \
                              websearch_templates.tmpl_search_pageend(argd['ln']) + \
                              pagefooteronly(lastupdated=__lastupdated__, language=argd['ln'], req=req)
Exemplo n.º 44
0
def format_element(bfo):
    """
    Special contract with US DoE Office of Scientific and Technical Information
    29/08/2013 R.A.

    Serializes selected record info as "OSTI" xml
    """

    try:
        from lxml import etree
    except ImportError:
        return

    from invenio.search_engine import perform_request_search, get_fieldvalues, record_exists

    # a dictionary of Inspire subjects mapped to OSTI coded research categories
    osticats = {
        "Accelerators": "43 PARTICLE ACCELERATORS",
        "Computing": "99 GENERAL AND MISCELLANEOUS//MATHEMATICS, COMPUTING, AND INFORMATION SCIENCE",
        "Experiment-HEP": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "General Physics": "71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS",
        "Instrumentation": "46 INSTRUMENTATION RELATED TO NUCLEAR SCIENCE AND TECHNOLOGY",
        "Astrophysics": "71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS",
        "Lattice": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "Math and Math Physics": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "Theory-Nucl": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "Experiment-Nucl": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "Phenomenology-HEP": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "Theory-HEP": "72 PHYSICS OF ELEMENTARY PARTICLES AND FIELDS",
        "Other": "71 CLASSICAL AND QUANTUM MECHANICS, GENERAL PHYSICS",
    }

    rec = etree.Element("rec")
    recid = bfo.recID
    if record_exists(recid) == -1:
        return etree.tostring(rec, encoding="UTF-8", declaration=False)

    node = etree.SubElement

    for recnum in [
        x.replace("SPIRES-", "") for x in [r for r in get_fieldvalues(recid, "970__a") if r.startswith("SPIRES-")]
    ]:
        # SPIRES record number is used if available to prevent OSTI loading
        # duplicate records from INSPIRE which they already got from SPIRES.
        node(rec, "accession_num").text = recnum
    if not rec.xpath("//accession_num"):
        # use regular inspire recid
        node(rec, "accession_num").text = str(recid)

    for title in get_fieldvalues(recid, "245__a"):
        node(rec, "title").text = unicode(title, "utf-8")

    # The authors in the ostixml are all strung together between author tags
    # delimited by ';' If zero or > 10 authors don't show any authors
    authors = get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a")

    if len(authors) <= 10 and len(authors) > 0:
        node(rec, "author").text = "; ".join([unicode(a, "utf-8") for a in authors])

    for category in get_fieldvalues(recid, "65017a"):
        if osticats.has_key(category):
            node(rec, "subj_category").text = osticats[category]
            node(rec, "subj_keywords").text = category

    for pubdate in get_fieldvalues(recid, "269__c"):
        node(rec, "date").text = pubdate

    # Fermilab report numbers mapped to OSTI doc types
    for dtype in get_fieldvalues(recid, "037__a"):
        if "fermilab" in dtype.lower():
            if "PUB" in dtype:
                doctype = "JA"
            elif "CONF" in dtype:
                doctype = "CO"
            elif "THESIS" in dtype:
                doctype = "TD"
            else:
                doctype = "TR"
            node(rec, "doctype").text = doctype
    # One MARC field is used for conferences and journals.  So, the following
    # journal coding handles the variations, and outputs journal and
    # conf. cites in a nice order.  If the conf has a cnum, we get the conf
    # info from its separate record in the conf. "collection."  There are a
    # few if-then gymnastics to cover possible missing information and still
    # make a note that looks okay (sort of)
    journals = bfo.fields("773__", repeatable_subfields_p=True)
    for journal in journals:
        if journal.has_key("p"):
            jinfo = str(journal["p"][0])
            if journal.has_key("v"):
                jinfo += " %s" % journal["v"][0]
            if journal.has_key("c"):
                jinfo += ":%s" % journal["c"][0]
            if journal.has_key("y"):
                jinfo += ",%s" % journal["y"][0]
            node(rec, "journal_info").text = unicode(jinfo, "utf-8")

        confstring = ""
        # without t info or cnum don't print anything
        if journal.has_key("t"):
            confstring += "%s: " % journal["t"][0]
        if journal.has_key("w"):
            conf_info = {}
            cnum = journal["w"][0].replace("/", "-")
            idrec = perform_request_search(p="111__g:" + str(cnum), c="Conferences")
            if idrec:
                for subfield in ("a", "c", "d"):
                    val = get_fieldvalues(idrec[0], "111__%s" % subfield, repetitive_values=True)
                    if val:
                        conf_info[subfield] = val[0]
                confstring += "%s. %s, %s." % tuple(conf_info.get(x, "") for x in ("a", "c", "d"))
        if journal.has_key("c") and confstring != "":
            confstring += " pp: %s" % journal["c"][0]

    for doi in get_fieldvalues(recid, "0247_a"):
        node(rec, "doi").text = doi

    if journals and confstring != "":
        # because it has to come after doi (?)
        # although order is not guaranteed for XML serialization
        node(rec, "conf_info").text = unicode(confstring, "utf-8")

    for pages in get_fieldvalues(recid, "300__a"):
        node(rec, "format").text = "%s pages" % pages

    for lang in get_fieldvalues(recid, "041__a"):
        node(rec, "language").text = lang
    # As with journals, eprints are in with report nos. in our MARC format
    # so they have to be separated out
    eprint = ""
    for repno in get_fieldvalues(recid, "037__a"):
        if "arXiv" in repno:
            eprint = repno
            node(rec, "arXiv_eprint").text = "arXiv eprint number %s" % unicode(repno, "utf-8")
        else:
            node(rec, "report_number").text = unicode(repno, "utf-8")

    urls = bfo.fields("8564_", repeatable_subfields_p=True)
    for url in urls:
        if url.has_key("y") and "FERMILAB" in url["y"][0] and url.has_key("u"):
            node(rec, "url").text = "%s.pdf" % url["u"][0]

    if eprint:
        node(rec, "availability").text = "http://arXiv.org/abs/%s" % eprint

    node(rec, "sponsor_org").text = "DOE Office of Science"

    dt_harvest = get_modification_date(recid)
    if dt_harvest:
        node(rec, "dt_harvest").text = dt_harvest
    else:
        # fallback to SPIRES era marc
        for date in get_fieldvalues(recid, "961__c"):
            node(rec, "dt_harvest").text = date

    out = etree.tostring(rec, encoding="UTF-8", xml_declaration=False, pretty_print=True, method="xml").rstrip("\n")
    return out
def oailistidentifiers(args):
    "Prints OAI response to the ListIdentifiers verb."

    arg = parse_args(args)

    out = ""
    resumptionToken_printed = False

    sysno  = []
    sysnos = []

    if arg['resumptionToken']:
        filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken'])
        if os.path.exists(filename) == 0:
            out = out + oai_error("badResumptionToken", "ResumptionToken expired")
            out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
            return out

    if arg['resumptionToken']:
        sysnos = oaicacheout(arg['resumptionToken'])
    else:
        sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until'])

    if len(sysnos) == 0: # noRecordsMatch error
        out = out + oai_error("noRecordsMatch", "no records correspond to the request")
        out = oai_error_header(args, "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
        return out

    i = 0
    for sysno_ in sysnos:
        if sysno_:
            if i >= CFG_OAI_LOAD:           # cache or write?
                if not resumptionToken_printed: # resumptionToken?
                    arg['resumptionToken'] = oaigenresumptionToken()
                    extdate = oaigetresponsedate(CFG_OAI_EXPIRE)
                    if extdate:
                        out = "%s  <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (out, extdate, arg['resumptionToken'])
                    else:
                        out = "%s  <resumptionToken>%s</resumptionToken>\n" % (out, arg['resumptionToken'])
                    resumptionToken_printed = True
                sysno.append(sysno_)
            else:
                _record_exists = record_exists(sysno_)
                if (not _record_exists == -1 and CFG_OAI_DELETED_POLICY == "no"):
                    i = i + 1 # Increment limit only if record is returned
                for ident in get_field(sysno_, CFG_OAI_ID_FIELD):
                    if ident != '':
                        if _record_exists == -1: #Deleted?
                            if CFG_OAI_DELETED_POLICY == "persistent" \
                                   or CFG_OAI_DELETED_POLICY == "transient":
                                out = out + "    <header status=\"deleted\">\n"
                            else:
                                # In that case, print nothing (do not go further)
                                break
                        else:
                            out = out + "    <header>\n"
                        out = "%s      <identifier>%s</identifier>\n" % (out, escape_space(ident))
                        out = "%s      <datestamp>%s</datestamp>\n" % (out, get_modification_date(oaigetsysno(ident)))
                        for set in get_field(sysno_, CFG_OAI_SET_FIELD):
                            if set:
                                # Print only if field not empty
                                out = "%s      <setSpec>%s</setSpec>\n" % (out, set)
                        out = out + "    </header>\n"

    if resumptionToken_printed:
        oaicacheclean() # clean cache from expired resumptionTokens
        oaicachein(arg['resumptionToken'], sysno)

    out = oai_header(args, "ListIdentifiers") + out + oai_footer("ListIdentifiers")

    return out
Exemplo n.º 46
0
            style='conclusion')
        sys.exit(1)

    to_fix_marc = intbitset()
    print "Created a complete log file into %s" % logfilename
    try:
        try:
            for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc:
                try:
                    record_does_exist = True
                    recids = get_recid_from_docid(id_bibdoc1)
                    if not recids:
                        print "Skipping %s" % id_bibdoc1
                        continue
                    for recid in recids:
                        if record_exists(recid[0]) > 0:
                            to_fix_marc.add(recid[0])
                        else:
                            record_does_exist = False
                    if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
                        if record_does_exist:
                            raise StandardError(
                                "Error when correcting document ID %s" %
                                id_bibdoc1)
                except Exception, err:
                    print >> logfile, "ERROR: %s" % err
            print wrap_text_in_a_box("DONE", style='conclusion')
        except:
            logfile.close()
            register_exception()
            print wrap_text_in_a_box(
Exemplo n.º 47
0
def print_record(sysno, format='marcxml', record_exists_result=None):
    """Prints record 'sysno' formatted according to 'format'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(sysno) function (in order not to call that function
    again if already done.)
    """

    out = ""

    # sanity check:
    if record_exists_result is not None:
        _record_exists = record_exists_result
    else:
        _record_exists = record_exists(sysno)

    if not _record_exists:
        return

    if (format == "dc") or (format == "oai_dc"):
        format = "xd"

    # print record opening tags:

    out = out + "  <record>\n"

    if _record_exists == -1:  # Deleted?
        if CFG_OAI_DELETED_POLICY == "persistent" or \
               CFG_OAI_DELETED_POLICY == "transient":
            out = out + "    <header status=\"deleted\">\n"
        else:
            return
    else:
        out = out + "   <header>\n"

    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
        out = "%s    <identifier>%s</identifier>\n" % (out,
                                                       escape_space(ident))
    out = "%s    <datestamp>%s</datestamp>\n" % (out,
                                                 get_modification_date(sysno))
    for set in get_field(sysno, CFG_OAI_SET_FIELD):
        if set:
            # Print only if field not empty
            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
    out = out + "   </header>\n"

    if _record_exists == -1:  # Deleted?
        pass
    else:
        out = out + "   <metadata>\n"

        if format == "marcxml":
            formatted_record = get_preformatted_record(sysno, 'xm')
            if formatted_record is not None:
                ## MARCXML is already preformatted. Adapt it if needed
                # Infoscience modification :
                # Added custom validator from Swiss librarians
                formatted_record = formatted_record.replace(
                    "<record>",
                    "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                )
                formatted_record = formatted_record.replace(
                    "<record xmlns=\"http://www.loc.gov/MARC21/slim\">",
                    "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                )
                formatted_record = formatted_record.replace(
                    "</record", "</marc:record")
                formatted_record = formatted_record.replace(
                    "<controlfield", "<marc:controlfield")
                formatted_record = formatted_record.replace(
                    "</controlfield", "</marc:controlfield")
                formatted_record = formatted_record.replace(
                    "<datafield", "<marc:datafield")
                formatted_record = formatted_record.replace(
                    "</datafield", "</marc:datafield")
                formatted_record = formatted_record.replace(
                    "<subfield", "<marc:subfield")
                formatted_record = formatted_record.replace(
                    "</subfield", "</marc:subfield")
                out += formatted_record
            else:
                ## MARCXML is not formatted in the database, so produce it.
                # Infoscience modification
                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://ead.nb.admin.ch/web/standards/slb/MARC21/MARC21slim.xsd\" type=\"Bibliographic\">"
                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (
                    out, int(sysno))

                for digit1 in range(0, 10):
                    for digit2 in range(0, 10):
                        bibbx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx)
                        res = run_sql(query,
                                      (sysno, '%d%d%%' % (digit1, digit2)))
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_":
                                ind1 = " "
                            if ind2 == "_":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or field[:
                                                                         -1] != field_old[:
                                                                                          -1]:
                                if format == "marcxml":

                                    if field_number_old != -999:
                                        if field_old[0:2] == "00":
                                            out = out + "     </marc:controlfield>\n"
                                        else:
                                            out = out + "     </marc:datafield>\n"

                                    if field[0:2] == "00":
                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (
                                            out, encode_for_xml(field[0:3]))
                                    else:
                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (
                                            out, encode_for_xml(field[0:3]),
                                            encode_for_xml(ind1).lower(),
                                            encode_for_xml(ind2).lower())

                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            if format == "marcxml":
                                value = encode_for_xml(value)

                                if (field[0:2] == "00"):
                                    out = "%s      %s\n" % (out, value)
                                else:
                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (
                                        out, encode_for_xml(field[-1:]), value)

                            # fetch next subfield
                        # all fields/subfields printed in this run, so close the tag:
                        if (format == "marcxml") and field_number_old != -999:
                            if field_old[0:2] == "00":
                                out = out + "     </marc:controlfield>\n"
                            else:
                                out = out + "     </marc:datafield>\n"

                out = out + "    </marc:record>\n"

        elif format == "xd":
            out += format_record(sysno, 'xoaidc')

    # print record closing tags:

        out = out + "   </metadata>\n"

    out = out + "  </record>\n"

    return out
Exemplo n.º 48
0
    def decorated(recid, *args, **kwargs):
        # ensure recid to be integer
        recid = int(recid)
        g.collection = collection = Collection.query.filter(
            Collection.name == guess_primary_collection_of_a_record(recid)).\
            one()

        (auth_code, auth_msg) = check_user_can_view_record(current_user, recid)

        # only superadmins can use verbose parameter for obtaining debug information
        if not current_user.is_super_admin and 'verbose' in kwargs:
            kwargs['verbose'] = 0

        if auth_code and current_user.is_guest:
            cookie = mail_cookie_create_authorize_action(
                VIEWRESTRCOLL,
                {'collection': guess_primary_collection_of_a_record(recid)})
            url_args = {
                'action': cookie,
                'ln': g.ln,
                'referer': request.referrer
            }
            flash(_("Authorization failure"), 'error')
            return redirect(url_for('webaccount.login', **url_args))
        elif auth_code:
            flash(auth_msg, 'error')
            abort(apache.HTTP_UNAUTHORIZED)

        from invenio.search_engine import record_exists, get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(recid)
        merged_recid = get_merged_recid(recid)
        if record_status == -1 and merged_recid:
            return redirect(url_for('record.metadata', recid=merged_recid))
        elif record_status == -1:
            abort(apache.HTTP_GONE)  # The record is gone!

        g.record = record = Bibrec.query.get(recid)
        user = None
        if not current_user.is_guest:
            user = User.query.get(current_user.get_id())
        title = get_fieldvalues(recid, '245__a')
        title = title[0] if len(title) > 0 else ''

        b = [(_('Home'), '')] + collection.breadcrumbs()[1:]
        b += [(title, 'record.metadata', dict(recid=recid))]
        current_app.config['breadcrumbs_map'][request.endpoint] = b
        g.record_tab_keys = []
        tabs = []
        counts = get_detailed_page_tabs_counts(recid)
        for k, v in get_detailed_page_tabs(collection.id, recid,
                                           g.ln).iteritems():
            t = {}
            b = 'record'
            if k == '':
                k = 'metadata'
            if k == 'comments' or k == 'reviews':
                b = 'webcomment'
            if k == 'linkbacks':
                b = 'weblinkback'
                k = 'index'

            t['key'] = b + '.' + k
            t['count'] = counts.get(k.capitalize(), -1)

            t.update(v)
            tabs.append(t)
            if v['visible']:
                g.record_tab_keys.append(b + '.' + k)

        if CFG_WEBLINKBACK_TRACKBACK_ENABLED:

            @register_template_context_processor
            def trackback_context():
                from invenio.weblinkback_templates import get_trackback_auto_discovery_tag
                return dict(headerLinkbackTrackbackLink=
                            get_trackback_auto_discovery_tag(recid))

        @register_template_context_processor
        def record_context():
            return dict(recid=recid,
                        record=record,
                        user=user,
                        tabs=tabs,
                        title=title,
                        get_mini_reviews=lambda *args, **kwargs:
                        get_mini_reviews(*args, **kwargs).decode('utf8'),
                        collection=collection,
                        format_record=lambda recID, of='hb', ln=g.ln:
                        format_record(recID,
                                      of=of,
                                      ln=ln,
                                      verbose=0,
                                      search_pattern='',
                                      on_the_fly=False))

        return f(recid, *args, **kwargs)
Exemplo n.º 49
0
def oailistrecords(args):
    "Generates response to oailistrecords verb."

    arg = parse_args(args)

    out = ""
    resumptionToken_printed = False

    sysnos = []
    sysno = []
    # check if the resumptionToken did not expire
    if arg['resumptionToken']:
        filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken'])
        if os.path.exists(filename) == 0:
            out = oai_error("badResumptionToken", "ResumptionToken expired")
            out = oai_error_header(
                args, "ListRecords") + out + oai_error_footer("ListRecords")
            return out

    if arg['resumptionToken'] != "":
        sysnos = oaicacheout(arg['resumptionToken'])
        arg['metadataPrefix'] = sysnos.pop()
    else:
        sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until'])

    if len(sysnos) == 0:  # noRecordsMatch error

        out = out + oai_error("noRecordsMatch",
                              "no records correspond to the request")
        out = oai_error_header(
            args, "ListRecords") + out + oai_error_footer("ListRecords")
        return out

    i = 0
    for sysno_ in sysnos:
        if sysno_:
            if i >= CFG_OAI_LOAD:  # cache or write?
                if not resumptionToken_printed:  # resumptionToken?
                    arg['resumptionToken'] = oaigenresumptionToken()
                    extdate = oaigetresponsedate(CFG_OAI_EXPIRE)
                    if extdate:
                        out = "%s <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (
                            out, extdate, arg['resumptionToken'])
                    else:
                        out = "%s <resumptionToken>%s</resumptionToken>\n" % (
                            out, arg['resumptionToken'])
                    resumptionToken_printed = True
                sysno.append(sysno_)
            else:
                _record_exists = record_exists(sysno_)
                if not (_record_exists == -1
                        and CFG_OAI_DELETED_POLICY == "no"):
                    #Produce output only if record exists and had to be printed
                    i = i + 1  # Increment limit only if record is returned
                    res = print_record(sysno_, arg['metadataPrefix'],
                                       _record_exists)
                    if res:
                        out += res

    if resumptionToken_printed:
        oaicacheclean()
        sysno.append(arg['metadataPrefix'])
        oaicachein(arg['resumptionToken'], sysno)

    out = oai_header(args, "ListRecords") + out + oai_footer("ListRecords")
    return out
Exemplo n.º 50
0
    def test_record_creation(self):
        import os
        from wtforms import TextAreaField
        from datetime import datetime

        from invenio.search_engine import record_exists
        from invenio.cache import cache
        from invenio.config import CFG_PREFIX
        from invenio.webuser_flask import login_user
        from invenio.bibworkflow_model import Workflow
        from invenio.bibworkflow_config import CFG_WORKFLOW_STATUS
        from invenio.bibsched_model import SchTASK

        from invenio.webdeposit_utils import get_form, create_workflow, \
            set_form_status, CFG_DRAFT_STATUS
        from invenio.webdeposit_load_deposition_types import \
            deposition_metadata
        from invenio.webdeposit_workflow_utils import \
            create_record_from_marc
        from invenio.bibfield import get_record

        login_user(1)
        for deposition_type in deposition_metadata.keys():

            deposition = create_workflow(deposition_type, 1)
            assert deposition is not None

            # Check if deposition creates a record
            create_rec = create_record_from_marc()
            function_exists = False
            for workflow_function in deposition.workflow:
                if create_rec.func_code == workflow_function.func_code:
                    function_exists = True
            if not function_exists:
                # if a record is not created,
                #continue with the next deposition
                continue

            uuid = deposition.get_uuid()

            cache.delete_many("1:current_deposition_type", "1:current_uuid")
            cache.add("1:current_deposition_type", deposition_type)
            cache.add("1:current_uuid", uuid)

            # Run the workflow
            deposition.run()

            # Create form's json based on the field name
            form = get_form(1, uuid=uuid)
            webdeposit_json = {}

            # Fill the json with dummy data
            for field in form:
                if isinstance(field, TextAreaField):
                    # If the field is associated with a marc field
                    if field.has_recjson_key() or field.has_cook_function():
                        webdeposit_json[field.name] = "test " + field.name

            draft = dict(
                form_type=form.__class__.__name__,
                form_values=webdeposit_json,
                step=0,  # dummy step
                status=CFG_DRAFT_STATUS['finished'],
                timestamp=str(datetime.now()))

            # Add a draft for the first step
            Workflow.set_extra_data(user_id=1,
                                    uuid=uuid,
                                    key='drafts',
                                    value={0: draft})

            workflow_status = CFG_WORKFLOW_STATUS.RUNNING
            while workflow_status != CFG_WORKFLOW_STATUS.COMPLETED:
                # Continue workflow
                deposition.run()
                set_form_status(1, uuid, CFG_DRAFT_STATUS['finished'])
                workflow_status = deposition.get_status()

            # Workflow is finished. Test if record is created
            recid = deposition.get_data('recid')
            assert recid is not None
            # Test that record id exists
            assert record_exists(recid) == 1

            # Test that the task exists
            task_id = deposition.get_data('task_id')
            assert task_id is not None

            bibtask = SchTASK.query.filter(SchTASK.id == task_id).first()
            assert bibtask is not None

            # Run bibupload, bibindex, webcoll manually
            cmd = "%s/bin/bibupload %s" % (CFG_PREFIX, task_id)
            assert not os.system(cmd)
            rec = get_record(recid)
            marc = rec.legacy_export_as_marc()
            for field in form:
                if isinstance(field, TextAreaField):
                    # If the field is associated with a marc field
                    if field.has_recjson_key() or field.has_cook_function():
                        assert "test " + field.name in marc
Exemplo n.º 51
0
def oailistidentifiers(args):
    "Prints OAI response to the ListIdentifiers verb."

    arg = parse_args(args)

    out = ""
    resumptionToken_printed = False

    sysno = []
    sysnos = []

    if arg['resumptionToken']:
        filename = os.path.join(CFG_CACHEDIR, 'RTdata', arg['resumptionToken'])
        if os.path.exists(filename) == 0:
            out = out + oai_error("badResumptionToken",
                                  "ResumptionToken expired")
            out = oai_error_header(
                args,
                "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
            return out

    if arg['resumptionToken']:
        sysnos = oaicacheout(arg['resumptionToken'])
    else:
        sysnos = oaigetsysnolist(arg['set'], arg['from'], arg['until'])

    if len(sysnos) == 0:  # noRecordsMatch error
        out = out + oai_error("noRecordsMatch",
                              "no records correspond to the request")
        out = oai_error_header(
            args,
            "ListIdentifiers") + out + oai_error_footer("ListIdentifiers")
        return out

    i = 0
    for sysno_ in sysnos:
        if sysno_:
            if i >= CFG_OAI_LOAD:  # cache or write?
                if not resumptionToken_printed:  # resumptionToken?
                    arg['resumptionToken'] = oaigenresumptionToken()
                    extdate = oaigetresponsedate(CFG_OAI_EXPIRE)
                    if extdate:
                        out = "%s  <resumptionToken expirationDate=\"%s\">%s</resumptionToken>\n" % (
                            out, extdate, arg['resumptionToken'])
                    else:
                        out = "%s  <resumptionToken>%s</resumptionToken>\n" % (
                            out, arg['resumptionToken'])
                    resumptionToken_printed = True
                sysno.append(sysno_)
            else:
                _record_exists = record_exists(sysno_)
                if (not _record_exists == -1
                        and CFG_OAI_DELETED_POLICY == "no"):
                    i = i + 1  # Increment limit only if record is returned
                for ident in get_field(sysno_, CFG_OAI_ID_FIELD):
                    if ident != '':
                        if _record_exists == -1:  #Deleted?
                            if CFG_OAI_DELETED_POLICY == "persistent" \
                                   or CFG_OAI_DELETED_POLICY == "transient":
                                out = out + "    <header status=\"deleted\">\n"
                            else:
                                # In that case, print nothing (do not go further)
                                break
                        else:
                            out = out + "    <header>\n"
                        out = "%s      <identifier>%s</identifier>\n" % (
                            out, escape_space(ident))
                        out = "%s      <datestamp>%s</datestamp>\n" % (
                            out, get_modification_date(oaigetsysno(ident)))
                        for set in get_field(sysno_, CFG_OAI_SET_FIELD):
                            if set:
                                # Print only if field not empty
                                out = "%s      <setSpec>%s</setSpec>\n" % (out,
                                                                           set)
                        out = out + "    </header>\n"

    if resumptionToken_printed:
        oaicacheclean()  # clean cache from expired resumptionTokens
        oaicachein(arg['resumptionToken'], sysno)

    out = oai_header(args,
                     "ListIdentifiers") + out + oai_footer("ListIdentifiers")

    return out
Exemplo n.º 52
0
        print wrap_text_in_a_box("Unexpected error while backing up tables. Please, do your checks: %s" % e, style='conclusion')
        sys.exit(1)

    to_fix_marc = intbitset()
    print "Created a complete log file into %s" % logfilename
    try:
        try:
            for id_bibdoc1, id_bibdoc2 in bibdoc_bibdoc:
                try:
                    record_does_exist = True
                    recids = get_recid_from_docid(id_bibdoc1)
                    if not recids:
                        print "Skipping %s" % id_bibdoc1
                        continue
                    for recid in recids:
                        if record_exists(recid[0]) > 0:
                            to_fix_marc.add(recid[0])
                        else:
                            record_does_exist = False
                    if not fix_bibdoc_bibdoc(id_bibdoc1, id_bibdoc2, logfile):
                        if record_does_exist:
                            raise StandardError("Error when correcting document ID %s" % id_bibdoc1)
                except Exception, err:
                    print >> logfile, "ERROR: %s" % err
            print wrap_text_in_a_box("DONE", style='conclusion')
        except:
            logfile.close()
            register_exception()
            print wrap_text_in_a_box(title="INTERRUPTED BECAUSE OF ERROR!", body="""Please see the log file %s for what was the status of record %s prior to the error. Contact %s in case of problems, attaching the log.""" % (logfilename, BibDoc(id_bibdoc1).get_recid(), CFG_SITE_SUPPORT_EMAIL),
            style='conclusion')
            sys.exit(1)
Exemplo n.º 53
0
def retrieve_data_from_record(recid):
    """
    Extract data from a record id in order to import it to the Author list
    interface
    """
    if not record_exists(recid):
        return

    output = {}

    DEFAULT_AFFILIATION_TYPE = cfg.OPTIONS.AUTHOR_AFFILIATION_TYPE[0]
    DEFAULT_IDENTIFIER = cfg.OPTIONS.IDENTIFIERS_LIST[0]
    IDENTIFIERS_MAPPING = cfg.OPTIONS.IDENTIFIERS_MAPPING

    bibrecord = get_record(recid)

    try:
        paper_title = get_fieldvalues(recid, '245__a')[0]
    except IndexError:
        paper_title = ""
    try:
        collaboration_name = get_fieldvalues(recid, '710__g')
    except IndexError:
        collaboration_name = ""
    try:
        experiment_number = get_fieldvalues(recid, '693__e')
    except IndexError:
        experiment_number = ""

    record_authors = bibrecord.get('100', [])
    record_authors.extend(bibrecord.get('700', []))

    author_list = []
    unique_affiliations = []

    for i, field_instance in enumerate(record_authors, 1):
        family_name = ""
        given_name = ""
        name_on_paper = ""
        status = ""
        affiliations = []
        identifiers = []
        field = field_instance[0]
        for subfield_code, subfield_value in field:
            if subfield_code == "a":
                try:
                    family_name = subfield_value.split(',')[0]
                    given_name = subfield_value.split(',')[1].lstrip()
                except:
                    pass
                name_on_paper = subfield_value
            elif subfield_code == "u":
                affiliations.append([subfield_value, DEFAULT_AFFILIATION_TYPE])
                unique_affiliations.append(subfield_value)
            elif subfield_code == "i":
                # FIXME This will currently work only with INSPIRE IDs
                id_prefix = subfield_value.split("-")[0]
                if id_prefix in IDENTIFIERS_MAPPING:
                    identifiers.append([subfield_value, IDENTIFIERS_MAPPING[id_prefix]])
        if not identifiers:
            identifiers.append(['', DEFAULT_IDENTIFIER])
        if not affiliations:
            affiliations.append([UNKNOWN_AFFILIATION, DEFAULT_AFFILIATION_TYPE])
            unique_affiliations.append(UNKNOWN_AFFILIATION)
        author_list.append([
            i,              # Row number
            '',             # Place holder for the web interface
            family_name,
            given_name,
            name_on_paper,
            status,
            affiliations,
            identifiers
        ])

    unique_affiliations = list(set(unique_affiliations))

    output.update({'authors': author_list})

    # Generate all the affiliation related information
    affiliation_list = []
    for i, affiliation in enumerate(unique_affiliations, 1):
        institution = perform_request_search(c="Institutions", p='110__u:"' + affiliation + '"')
        full_name = affiliation
        if len(institution) == 1:
            full_name_110_a = get_fieldvalues(institution[0], '110__a')
            if full_name_110_a:
                full_name = str(full_name_110_a[0])
            full_name_110_b = get_fieldvalues(institution[0], '110__b')
            if full_name_110_b:
                full_name += ', ' + str(full_name_110_b[0])
        affiliation = [i,
                       '',
                       affiliation,
                       '',
                       full_name,
                       '',
                       True,
                       '']
        affiliation_list.append(affiliation)

    output.update({'affiliations': affiliation_list})
    output.update({'paper_title': paper_title,
                   'collaboration': collaboration_name,
                   'experiment_number': experiment_number,
                   'last_modified': int(time.time()),
                   'reference_ids': [],
                   'paper_id': '1'})

    return output
Exemplo n.º 54
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
Exemplo n.º 55
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        return create_record(print_record(recid, "xm"))[0]
def print_record(sysno, format='marcxml', record_exists_result=None):
    """Prints record 'sysno' formatted according to 'format'.

    - if record does not exist, return nothing.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is
      'transient' or 'deleted', then return only header, with status
      'deleted'.

    - if record has been deleted and CFG_OAI_DELETED_POLICY is 'no',
      then return nothing.

    Optional parameter 'record_exists_result' has the value of the result
    of the record_exists(sysno) function (in order not to call that function
    again if already done.)
    """

    out = ""

    # sanity check:
    if record_exists_result is not None:
        _record_exists = record_exists_result
    else:
        _record_exists = record_exists(sysno)

    if not _record_exists:
        return

    if (format == "dc") or (format == "oai_dc"):
        format = "xd"

    # print record opening tags:

    out = out + "  <record>\n"

    if _record_exists == -1: # Deleted?
        if CFG_OAI_DELETED_POLICY == "persistent" or \
               CFG_OAI_DELETED_POLICY == "transient":
            out = out + "    <header status=\"deleted\">\n"
        else:
            return
    else:
        out = out + "   <header>\n"

    for ident in get_field(sysno, CFG_OAI_ID_FIELD):
        out = "%s    <identifier>%s</identifier>\n" % (out, escape_space(ident))
    out = "%s    <datestamp>%s</datestamp>\n" % (out, get_modification_date(sysno))
    for set in get_field(sysno, CFG_OAI_SET_FIELD):
        if set:
            # Print only if field not empty
            out = "%s    <setSpec>%s</setSpec>\n" % (out, set)
    out = out + "   </header>\n"

    if _record_exists == -1: # Deleted?
        pass
    else:
        out = out + "   <metadata>\n"

        if format == "marcxml":
            formatted_record = get_preformatted_record(sysno, 'xm')
            if formatted_record is not None:
                ## MARCXML is already preformatted. Adapt it if needed
                formatted_record = formatted_record.replace("<record>", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
                formatted_record = formatted_record.replace("<record xmlns=\"http://www.loc.gov/MARC21/slim\">", "<marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">\n     <marc:leader>00000coc  2200000uu 4500</marc:leader>")
                formatted_record = formatted_record.replace("</record", "</marc:record")
                formatted_record = formatted_record.replace("<controlfield", "<marc:controlfield")
                formatted_record = formatted_record.replace("</controlfield", "</marc:controlfield")
                formatted_record = formatted_record.replace("<datafield", "<marc:datafield")
                formatted_record = formatted_record.replace("</datafield", "</marc:datafield")
                formatted_record = formatted_record.replace("<subfield", "<marc:subfield")
                formatted_record = formatted_record.replace("</subfield", "</marc:subfield")
                out += formatted_record
            else:
                ## MARCXML is not formatted in the database, so produce it.
                out = out + "    <marc:record xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\" type=\"Bibliographic\">"
                out = out + "     <marc:leader>00000coc  2200000uu 4500</marc:leader>"
                out = "%s     <marc:controlfield tag=\"001\">%d</marc:controlfield>\n" % (out, int(sysno))

                for digit1 in range(0, 10):
                    for digit2 in range(0, 10):
                        bibbx = "bib%d%dx" % (digit1, digit2)
                        bibx = "bibrec_bib%d%dx" % (digit1, digit2)
                        query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\
                                "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s "\
                                "ORDER BY bb.field_number, b.tag ASC" % (bibbx, bibx)
                        res = run_sql(query, (sysno, '%d%d%%' % (digit1, digit2)))
                        field_number_old = -999
                        field_old = ""
                        for row in res:
                            field, value, field_number = row[0], row[1], row[2]
                            ind1, ind2 = field[3], field[4]
                            if ind1 == "_":
                                ind1 = " "
                            if ind2 == "_":
                                ind2 = " "
                            # print field tag
                            if field_number != field_number_old or field[:-1] != field_old[:-1]:
                                if format == "marcxml":

                                    if field_number_old != -999:
                                        if field_old[0:2] == "00":
                                            out = out + "     </marc:controlfield>\n"
                                        else:
                                            out = out + "     </marc:datafield>\n"

                                    if field[0:2] == "00":
                                        out = "%s     <marc:controlfield tag=\"%s\">\n" % (out, encode_for_xml(field[0:3]))
                                    else:
                                        out = "%s     <marc:datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (out, encode_for_xml(field[0:3]), encode_for_xml(ind1).lower(), encode_for_xml(ind2).lower())


                                field_number_old = field_number
                                field_old = field
                            # print subfield value
                            if format == "marcxml":
                                value = encode_for_xml(value)

                                if(field[0:2] == "00"):
                                    out = "%s      %s\n" % (out, value)
                                else:
                                    out = "%s      <marc:subfield code=\"%s\">%s</marc:subfield>\n" % (out, encode_for_xml(field[-1:]), value)


                            # fetch next subfield
                        # all fields/subfields printed in this run, so close the tag:
                        if (format == "marcxml") and field_number_old != -999:
                            if field_old[0:2] == "00":
                                out = out + "     </marc:controlfield>\n"
                            else:
                                out = out + "     </marc:datafield>\n"

                out = out + "    </marc:record>\n"

        elif format == "xd":
            out += format_record(sysno, 'xoaidc')

    # print record closing tags:

        out = out + "   </metadata>\n"

    out = out + "  </record>\n"

    return out
Exemplo n.º 57
0
    def __call__(self, req, form):
        argd = wash_search_urlargd(form)

        argd['recid'] = self.recid

        argd['tab'] = self.tab

        if self.format is not None:
            argd['of'] = self.format
        req.argd = argd
        uid = getUid(req)
        if uid == -1:
            return page_not_authorized(req, "../",
                text="You are not authorized to view this record.",
                                       navmenuid='search')
        elif uid > 0:
            pref = get_user_preferences(uid)
            try:
                if not form.has_key('rg'):
                    # fetch user rg preference only if not overridden via URL
                    argd['rg'] = int(pref['websearch_group_records'])
            except (KeyError, ValueError):
                pass

        user_info = collect_user_info(req)
        (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid)

        if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and acc_authorize_action(req, 'runbibedit')[0] != 0:
            argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS

        #check if the user has rights to set a high wildcard limit
        #if not, reduce the limit set by user, with the default one
        if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0):
            if acc_authorize_action(req, 'runbibedit')[0] != 0:
                argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT

        # only superadmins can use verbose parameter for obtaining debug information
        if not isUserSuperAdmin(user_info):
            argd['verbose'] = 0

        if auth_code and user_info['email'] == 'guest':
            cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)})
            target = CFG_SITE_SECURE_URL + '/youraccount/login' + \
                    make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri}, {})
            return redirect_to_url(req, target, norobot=True)
        elif auth_code:
            return page_not_authorized(req, "../", \
                text=auth_msg, \
                navmenuid='search')

        from invenio.search_engine import record_exists, get_merged_recid
        # check if the current record has been deleted
        # and has been merged, case in which the deleted record
        # will be redirect to the new one
        record_status = record_exists(argd['recid'])
        merged_recid = get_merged_recid(argd['recid'])
        if record_status == -1 and merged_recid:
            url = CFG_SITE_URL + '/' + CFG_SITE_RECORD + '/%s?ln=%s'
            url %= (str(merged_recid), argd['ln'])
            redirect_to_url(req, url)

        # mod_python does not like to return [] in case when of=id:
        out = perform_request_search(req, **argd)
        if out == []:
            return str(out)
        else:
            return out
Exemplo n.º 58
0
    def display(self, req, form):
        """
        Show the tab 'holdings'.
        """

        argd = wash_urlargd(
            form, {
                'do': (str, "od"),
                'ds': (str, "all"),
                'nb': (int, 100),
                'p': (int, 1),
                'voted': (int, -1),
                'reported': (int, -1),
            })
        _ = gettext_set_language(argd['ln'])

        record_exists_p = record_exists(self.recid)
        if record_exists_p != 1:
            if record_exists_p == -1:
                msg = _("The record has been deleted.")
            else:
                msg = _("Requested record does not seem to exist.")
            msg = '<span class="quicknote">' + msg + '</span>'
            title, description, keywords = \
                   websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln'])
            return page(title=title,
                        show_title_p=False,
                        body=msg,
                        description=description,
                        keywords=keywords,
                        uid=getUid(req),
                        language=argd['ln'],
                        req=req,
                        navmenuid='search')

        body = perform_get_holdings_information(self.recid, req, argd['ln'])

        uid = getUid(req)

        user_info = collect_user_info(req)
        (auth_code,
         auth_msg) = check_user_can_view_record(user_info, self.recid)
        if auth_code and user_info['email'] == 'guest':
            cookie = mail_cookie_create_authorize_action(
                VIEWRESTRCOLL, {
                    'collection': guess_primary_collection_of_a_record(
                        self.recid)
                })
            target = '/youraccount/login' + \
                make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \
                CFG_SITE_URL + user_info['uri']}, {})
            return redirect_to_url(req, target, norobot=True)
        elif auth_code:
            return page_not_authorized(req, "../", \
                text = auth_msg)

        unordered_tabs = get_detailed_page_tabs(get_colID(
            guess_primary_collection_of_a_record(self.recid)),
                                                self.recid,
                                                ln=argd['ln'])
        ordered_tabs_id = [(tab_id, values['order'])
                           for (tab_id, values) in unordered_tabs.iteritems()]
        ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1]))
        link_ln = ''
        if argd['ln'] != CFG_SITE_LANG:
            link_ln = '?ln=%s' % argd['ln']
        tabs = [(unordered_tabs[tab_id]['label'], \
                 '%s/record/%s/%s%s' % (CFG_SITE_URL, self.recid, tab_id, link_ln), \
                 tab_id in ['holdings'],
                 unordered_tabs[tab_id]['enabled']) \
                for (tab_id, _order) in ordered_tabs_id
                if unordered_tabs[tab_id]['visible'] == True]
        top = webstyle_templates.detailed_record_container_top(
            self.recid, tabs, argd['ln'])
        bottom = webstyle_templates.detailed_record_container_bottom(
            self.recid, tabs, argd['ln'])

        title = websearch_templates.tmpl_record_page_header_content(
            req, self.recid, argd['ln'])[0]
        navtrail = create_navtrail_links(
            cc=guess_primary_collection_of_a_record(self.recid), ln=argd['ln'])
        navtrail += ' &gt; <a class="navtrail" href="%s/record/%s?ln=%s">' % (
            CFG_SITE_URL, self.recid, argd['ln'])
        navtrail += title
        navtrail += '</a>'

        return pageheaderonly(title=title,
                              navtrail=navtrail,
                              uid=uid,
                              verbose=1,
                              req=req,
                              metaheaderadd = "<link rel=\"stylesheet\" href=\"%s/img/jquery-ui.css\" type=\"text/css\" />" % CFG_SITE_URL,
                              language=argd['ln'],
                              navmenuid='search',
                              navtrail_append_title_p=0) + \
                              websearch_templates.tmpl_search_pagestart(argd['ln']) + \
                              top + body + bottom + \
                              websearch_templates.tmpl_search_pageend(argd['ln']) + \
                              pagefooteronly(lastupdated=__lastupdated__, language=argd['ln'], req=req)