Пример #1
0
def compare_references(test, a, b):
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertXmlEqual(a, b)
def compare_references(test, a, b):
    ## Let's normalize records to remove the Invenio refextract signature
    a = create_record(a)[0]
    b = create_record(b)[0]
    record_delete_field(a, '999', 'C', '6')
    a = record_xml_output(a)
    b = record_xml_output(b)
    test.assertXmlEqual(a, b)
Пример #3
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result[
                'resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result[
                'resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result[
                'resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record(print_record(recid, 'xm'))[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path):  #check if file doesn't exist
            result['resultCode'], result[
                'resultText'] = 1, 'Temporary file doesnt exist'
        else:  #open file
            tmpfile = open(file_path, 'r')
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result[
                    'resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result[
                'resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result[
            'resultText'] = 1, 'Invalid record mode for record2'
    return record
Пример #4
0
def get_templates(templatesDir, tmpl_name, tmpl_description, extractContent=False):
    """Return list of templates [filename, name, description, content*]
       the extractContent variable indicated if the parsed content should
       be included"""
    template_fnames = fnmatch.filter(os.listdir(templatesDir), "*.xml")

    templates = []
    for fname in template_fnames:
        filepath = "%s%s%s" % (templatesDir, os.sep, fname)
        template_file = open(filepath, "r")
        template = template_file.read()
        template_file.close()
        fname_stripped = os.path.splitext(fname)[0]
        mo_name = tmpl_name.search(template)
        mo_description = tmpl_description.search(template)
        date_modified = time.ctime(os.path.getmtime(filepath))
        if mo_name:
            name = mo_name.group(1)
        else:
            name = fname_stripped
        if mo_description:
            description = mo_description.group(1)
        else:
            description = ""
        if extractContent:
            parsedTemplate = create_record(template)[0]
            if parsedTemplate != None:
                # If the template was correct
                templates.append([fname_stripped, name, description, parsedTemplate])
            else:
                raise "Problem when parsing the template %s" % (fname,)
        else:
            templates.append([fname_stripped, name, description, date_modified])

    return templates
Пример #5
0
    def doilookup(self, req, form):
        """
        Returns the metadata from the crossref website based on the DOI.
        """
        args = wash_urlargd(form, {
            'doi': (str, '')})
        response = defaultdict(list)
        if args['doi']:
            doi = args['doi']
            try:
                marcxml_template = get_marcxml_for_doi(doi)
            except CrossrefError:
                # Just ignore Crossref errors
                pass
            else:
                record = create_record(marcxml_template)[0]
                if record:
                    # We need to convert this record structure to a simple dictionary
                    for key, value in record.items():  # key, value = (773, [([('0', 'PER:64142'), ...], ' ', ' ', '', 47)])
                        for val in value:  # val = ([('0', 'PER:64142'), ...], ' ', ' ', '', 47)
                            ind1 = val[1].replace(" ", "_")
                            ind2 = val[2].replace(" ", "_")
                            for (k, v) in val[0]:  # k, v = ('0', 'PER:5409')
                                response[key+ind1+ind2+k].append(v)
            # The output dictionary is something like:
            # {"100__a": ['Smith, J.'],
            #  "700__a": ['Anderson, J.', 'Someoneelse, E.'],
            #  "700__u": ['University1', 'University2']}

        # return dictionary as JSON
        return json.dumps(response)
Пример #6
0
def get_templates(templatesDir, tmpl_name, tmpl_description, extractContent = False):
    """Return list of templates [filename, name, description, content*]
       the extractContent variable indicated if the parsed content should
       be included"""
    template_fnames = fnmatch.filter(os.listdir(
            templatesDir), '*.xml')

    templates = []
    for fname in template_fnames:
        template_file = open('%s%s%s' % (
                templatesDir, os.sep, fname),'r')
        template = template_file.read()
        template_file.close()
        fname_stripped = os.path.splitext(fname)[0]
        mo_name = tmpl_name.search(template)
        mo_description = tmpl_description.search(template)
        if mo_name:
            name = mo_name.group(1)
        else:
            name = fname_stripped
        if mo_description:
            description = mo_description.group(1)
        else:
            description = ''
        if (extractContent):
            parsedTemplate = create_record(template)[0]
            if parsedTemplate != None:
                # If the template was correct
                templates.append([fname_stripped, name, description, parsedTemplate])
            else:
                raise "Problem when parsing the template %s" % (fname, )
        else:
            templates.append([fname_stripped, name, description])

    return templates
Пример #7
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1],
                                 field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(field_instance, code,
                                               field_get_subfield_values(template_field_instance,
                                               code)[0])
    return rec
Пример #8
0
def perform_request_holdingpen(request_type, recId, changeId=None):
    """
    A method performing the holdingPen ajax request. The following types of requests can be made:
       getHoldingPenUpdates - retrieving the holding pen updates pending for a given record
    """
    response = {}
    if request_type == 'getHoldingPenUpdates':
        changeSet = get_related_hp_changesets(recId)
        changes = []
        for change in changeSet:
            changes.append((str(change[0]), str(change[1])))
        response["changes"] = changes
    elif request_type == 'getHoldingPenUpdateDetails':
        # returning the list of changes related to the holding pen update
        # the format based on what the record difference xtool returns

        assert(changeId != None)
        hpContent = get_hp_update_xml(changeId)
        holdingPenRecord = create_record(hpContent[0], "xm")[0]
        databaseRecord = get_record(hpContent[1])
        response['record'] = holdingPenRecord
        response['changeset_number'] = changeId;
    elif request_type == 'deleteHoldingPenChangeset':
        assert(changeId != None)
        delete_hp_change(changeId);
    return response
Пример #9
0
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode("utf-8"))
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0], tag="999", ind1="%", ind2="%")
        # Replace 999 fields
        record_delete_fields(record, "999")
        record_add_fields(record, "999", fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
Пример #10
0
def merge_record_with_template(rec, template_name):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]

    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec,
                                 field_tag,
                                 field_instance[1],
                                 field_instance[2],
                                 subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(
                    template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code,
                                field_get_subfield_values(
                                    template_field_instance, code)[0])
    return rec
def fetch_remote_record(remote_url):
    """ Gets MARCXML from a server instance of Invenio and returns
    a single BibRecord structure.
    Raises ValueError if returned data is not MARCXML and URLError if
    there's an issue accessing the page after DOWNLOAD_ATTEMPTS times
    """
    url = "%s/export/xm" % (remote_url)
    for cnt in xrange(DOWNLOAD_ATTEMPTS):
        try:
            handle = urlopen(url)
            xml = handle.read()
            handle.close()
            record_creation = create_record(xml)
            if record_creation[1] == 0:
                print "Error: Could not parse record %s" % (url,)
                raise ValueError(str(record_creation[2]))
            return record_creation[0]
        except URLError as exc:
            if cnt < DOWNLOAD_ATTEMPTS - 1:
                print "Timeout #%d: waiting %d seconds..." % (cnt, TIMEOUT_WAIT)
                sleep(TIMEOUT_WAIT)
            else:
                print("ERROR: Could not download %s (tried %d times)" %
                      (url, DOWNLOAD_ATTEMPTS))
                raise exc
Пример #12
0
def cli_clean_revisions(recid, dry_run=True, verbose=True):
    """Clean revisions of the given recid, by removing duplicate revisions
    that do not change the content of the record."""
    if recid == '*':
        recids = intbitset(run_sql("SELECT DISTINCT id_bibrec FROM hstRECORD"))
    else:
        try:
            recids = [int(recid)]
        except ValueError:
            print 'ERROR: record ID must be integer, not %s.' % recid
            sys.exit(1)
    for recid in recids:
        all_revisions = run_sql("SELECT marcxml, job_id, job_name, job_person, job_date FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date ASC", (recid,))
        previous_rec = {}
        deleted_revisions = 0
        for marcxml, job_id, job_name, job_person, job_date in all_revisions:
            try:
                current_rec = create_record(zlib.decompress(marcxml))[0]
            except Exception:
                print >> sys.stderr, "ERROR: corrupted revisions found. Please run %s --fix-revisions '*'" % sys.argv[0]
                sys.exit(1)
            if records_identical(current_rec, previous_rec):
                deleted_revisions += 1
                if not dry_run:
                    run_sql("DELETE FROM hstRECORD WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s", (recid, job_id, job_name, job_person, job_date))
            previous_rec = current_rec
        if verbose and deleted_revisions:
            print "record %s: deleted %s duplicate revisions out of %s" % (recid, deleted_revisions, len(all_revisions))
    if verbose:
        print "DONE"
Пример #13
0
def replace_references(recid):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    """
    # Parse references
    references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode('utf-8'))
    # Record marc xml
    record = get_record(recid)

    if references[0]:
        fields_to_add = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='%',
                                                   ind2='%')
        # Replace 999 fields
        record_delete_fields(record, '999')
        record_add_fields(record, '999', fields_to_add)
        # Update record references
        out_xml = record_xml_output(record)
    else:
        out_xml = None

    return out_xml
Пример #14
0
   def test_copy_245_fields_add_caption(self):
       """ Test adding a completely new caption"""
       rec_string = """<record>
 <controlfield tag="001">123456</controlfield>
 <datafield tag="245" ind1=" " ind2=" ">
   <subfield code="a">Some caption</subfield>
 </datafield>
 <datafield tag="245" ind1=" " ind2=" ">
   <subfield code="z">Some ridiculous caption</subfield>
 </datafield>
 <!--some other fields-->
 <datafield tag="520" ind1="" ind2=" ">
   <subfield code="9">HEPDATA</subfield>
 </datafield>
 <datafield tag="245" ind1="z" ind2=" ">
   <subfield code="z">Some ridiculous caption</subfield>
 </datafield>
 <datafield tag="856" ind1="4" ind2=" ">
   <subfield code="z">Some other entry not following even the semantics 2</subfield>
   <subfield code="3">ANOTHER</subfield>
 </datafield>
 </record>"""
       rec = bibrecord.create_record(rec_string)[0]
       paper = hepdatautils.Paper.create_from_record(rec)
       self.assertEqual(None, paper.get_diff_marcxml(rec), \
                           "There should not be need of a patch on the same record")
       paper.comment = "azerty"
       diff_xml = paper.get_diff_marcxml(rec)
       self.assertTrue(diff_xml.find(">Some caption") == -1, \
                       "One of existing captions not found")
       self.assertTrue(diff_xml.find(">Some ridiculous caption") == -1, \
                       "One of existing captions not found")
       self.assertTrue(diff_xml.find(">azerty") != -1, \
                       "New caption not found")
Пример #15
0
def get_remote_record(recid):
    """ For a given remote record ID, we download the record XML and return
    the record in a BibRecord structure
    Parameter:
    (int) recid - record ID for remote record
    Returns: BibRecord
    """
    url = "%s/record/%d/export/xm?ot=001,035" % (REMOTE_URL, recid)
    tmp_file = ''
    try:
        bibrec = None
        tmp_file = download_url(url, retry_count=10, timeout=61.0)
        with open(tmp_file, 'r') as temp:
            content = temp.read()
            bibrec, code, errors = create_record(content)
            if code != 1 or errors:
                _print(
                    "Warning: There were errors creating BibRec structure " +
                    "from remote record #%d" % recid, 4)
        os.remove(tmp_file)
        return bibrec
    except (StandardError, InvenioFileDownloadError, HTTPError) as err:
        _print("Error: Could not download remote record #%d" % recid, 4)
        _print(str(err), 4)
        _print(traceback.format_exc(), 4)
Пример #16
0
def merge_record_with_template(rec, template_name, is_hp_record=False):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    if not template:
        return
    template_bibrec = create_record(template)[0]
    # if the record is a holding pen record make all subfields volatile
    if is_hp_record:
        record_make_all_subfields_volatile(template_bibrec)
    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec,
                                 field_tag,
                                 field_instance[1],
                                 field_instance[2],
                                 subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(
                    template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code,
                                field_get_subfield_values(
                                    template_field_instance, code)[0])
    record_order_subfields(rec)
    return rec
Пример #17
0
def record_collect_oai_identifiers(record_xml):
    """
    Collects all OAI identifiers from given MARCXML.

    Returns a list of found values in the tag
    CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG.

    @param record_xml: string containing MARCXML to parse

    @return list of identifiers
    """
    result = None
    (record, status_code, list_of_errors) = create_record(record_xml)
    if not status_code:
        # Error happened
        write_message("Error collecting OAI identifier from record: %s" %
                     ("\n".join(list_of_errors),))
    else:
        # All OK! We can get the IDs
        result = record_get_field_values(record,
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3],
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3],
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4],
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5])
        if not result:
            # No IDs found..
            write_message("No OAI IDs found in record")
    return result
Пример #18
0
    def doilookup(self, req, form):
        """
        Returns the metadata from the crossref website based on the DOI.
        """
        args = wash_urlargd(form, {'doi': (str, '')})
        response = defaultdict(list)
        if args['doi']:
            doi = args['doi']
            try:
                marcxml_template = get_marcxml_for_doi(doi)
            except CrossrefError:
                # Just ignore Crossref errors
                pass
            else:
                record = create_record(marcxml_template)[0]
                if record:
                    # We need to convert this record structure to a simple dictionary
                    for key, value in record.items(
                    ):  # key, value = (773, [([('0', 'PER:64142'), ...], ' ', ' ', '', 47)])
                        for val in value:  # val = ([('0', 'PER:64142'), ...], ' ', ' ', '', 47)
                            ind1 = val[1].replace(" ", "_")
                            ind2 = val[2].replace(" ", "_")
                            for (k, v) in val[0]:  # k, v = ('0', 'PER:5409')
                                response[key + ind1 + ind2 + k].append(v)
            # The output dictionary is something like:
            # {"100__a": ['Smith, J.'],
            #  "700__a": ['Anderson, J.', 'Someoneelse, E.'],
            #  "700__u": ['University1', 'University2']}

        # return dictionary as JSON
        return json.dumps(response)
Пример #19
0
def record_collect_oai_identifiers(record_xml):
    """
    Collects all OAI identifiers from given MARCXML.

    Returns a list of found values in the tag
    CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG.

    @param record_xml: string containing MARCXML to parse

    @return list of identifiers
    """
    result = None
    (record, status_code, list_of_errors) = create_record(record_xml)
    if not status_code:
        # Error happened
        write_message("Error collecting OAI identifier from record: %s" %
                      ("\n".join(list_of_errors), ))
    else:
        # All OK! We can get the IDs
        result = record_get_field_values(record,
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3],
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3],
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4],
                                         CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5])
        if not result:
            # No IDs found..
            write_message("No OAI IDs found in record")
    return result
Пример #20
0
    def _prepare_blob(self, *args, **kwargs):
        #FIXME stop using recstruct!
        from invenio.bibrecord import create_record

        class SaveDict(dict):
            __getitem__ = dict.get

        def dict_extend_helper(d, key, value):
            """
            If the key is present inside the dictionary it creates a list (it not
            present) and extends it with the new value. Almost as in C{list.extend}
            """
            if key in d:
                current_value = d.get(key)
                if not isinstance(current_value, list):
                    current_value = [current_value]
                current_value.append(value)
                value = current_value
            d[key] = value

        self.rec_tree = SaveDict()
        tmp = create_record(self.blob)[0]
        for key, values in tmp.iteritems():
            if key < '010' and key.isdigit():
                self.rec_tree[key] = [value[3] for value in values]
            else:
                for value in values:
                    field = SaveDict()
                    for subfield in value[0]:
                        dict_extend_helper(field, subfield[0], subfield[1])
                    dict_extend_helper(self.rec_tree, (key + value[1] + value[2]).replace(' ', '_'), field)
Пример #21
0
def save_xml_record(recid,
                    uid,
                    xml_record='',
                    to_upload=True,
                    to_merge=False,
                    spec_name=''):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_file_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            #            record_strip_empty_fields(record) # now performed for every record after removing unfilled volatile fields
            xml_record = record_xml_output(record)
            delete_cache_file(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    #TP: nechceme    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        file_path = '%s.xml' % _get_file_path(recid, uid)
    else:
        file_path = '%s_%s.xml' % (_get_file_path(
            recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
    xml_file = open(file_path, 'w')
    xml_file.write(xml_to_write)
    xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        # TP: check whether to add spec name
        if spec_name == '':
            # Pass XML file to BibUpload.
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name)
        else:
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name, '-N',
                                      spec_name)
    return True
Пример #22
0
def get_bibrec_for_record(marcxml, opt_mode):
    '''
    A record is uploaded to the system using mainly functionality
    of the bibupload module. Then a bibrec is returned for the record.
    '''
    recs = create_record(marcxml, parser='lxml')
    _, recid, _ = bibupload(recs[0], opt_mode=opt_mode)
    return recid
Пример #23
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == 'none':
        mode = 'none'
    if mode == 'recid':
        record_status = record_exists(recid)
        #check for errors
        if record_status == 0:
            result['resultCode'], result['resultText'] = 1, 'Non-existent record: %s' % recid
        elif record_status == -1:
            result['resultCode'], result['resultText'] = 1, 'Deleted record: %s' % recid
        elif record_locked_by_queue(recid):
            result['resultCode'], result['resultText'] = 1, 'Record %s locked by queue' % recid
        else:
            record = create_record( print_record(recid, 'xm') )[0]
            record_order_subfields(record)

    elif mode == 'tmpfile':
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                       CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path): #check if file doesn't exist
            result['resultCode'], result['resultText'] = 1, 'Temporary file doesnt exist'
        else: #open file
            tmpfile = open(file_path, 'r')
            record = create_record( tmpfile.read() )[0]
            tmpfile.close()

    elif mode == 'revision':
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result['resultCode'], result['resultText'] = 1, 'The specified revision does not exist'
        else:
            result['resultCode'], result['resultText'] = 1, 'Invalid revision id'

    elif mode == 'none':
        return {}

    else:
        result['resultCode'], result['resultText'] = 1, 'Invalid record mode for record2'
    return record
Пример #24
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Пример #25
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False,
                    task_name="bibedit", sequence_id=None):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
            xml_record = record_xml_output(record)
            delete_cache(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        fd, file_path = tempfile.mkstemp(dir=CFG_BIBEDIT_CACHEDIR,
                                         prefix="%s_" % CFG_BIBEDIT_FILENAME,
                                         suffix="_%s_%s.xml" % (recid, uid))
        f = os.fdopen(fd, 'w')
        f.write(xml_to_write)
        f.close()
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
        xml_file = open(file_path, 'w')
        xml_file.write(xml_to_write)
        xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        args = ['bibupload', user_name, '-P', '5', '-r',
                file_path, '-u', user_name]
        if task_name == "bibedit":
            args.extend(['--name', 'bibedit'])
        if sequence_id:
            args.extend(["-I", sequence_id])
        args.extend(['--email-logs-on-error'])
        task_low_level_submission(*args)
    return True
    def get_legacy_recstruct(self):
        """
        It creates the recstruct representation using the legacy rules defined in
        the configuration file

        #CHECK: it might be a bit overkilling
        """
        from invenio.bibrecord import create_record
        return create_record(self.legacy_export_as_marc())[0]
Пример #27
0
def rollback_record(recid):
        print 'id', recid
        for rev in get_record_revision_ids(recid):
            old_record = create_record(get_marcxml_of_revision_id(rev))
            fields_to_add = record_get_field_instances(old_record[0], tag='520')
            if fields_to_add:
                print 'reverting to', rev
                return create_our_record(recid, fields_to_add)
        print 'FAILED', recid
Пример #28
0
def get_bibrecord(recid):
    """Return record in BibRecord wrapping."""
    if record_exists(recid):
        record_revision_ids = get_record_revision_ids(recid)
        if record_revision_ids:
            return create_record(
                get_marcxml_of_revision_id(max(record_revision_ids)))[0]
        else:
            return get_record(recid)
Пример #29
0
    def get_legacy_recstruct(self):
        """
        It creates the recstruct representation using the legacy rules defined in
        the configuration file

        #CHECK: it might be a bit overkilling
        """
        from invenio.bibrecord import create_record
        return create_record(self.legacy_export_as_marc())[0]
Пример #30
0
def _get_record_slave(recid, result, mode=None, uid=None):
    """Check if record exists and return it in dictionary format.
       If any kind of error occurs returns None.
       If mode=='revision' then recid parameter is considered as revid."""
    record = None
    if recid == "none":
        mode = "none"
    if mode == "recid":
        record_status = record_exists(recid)
        # check for errors
        if record_status == 0:
            result["resultCode"], result["resultText"] = 1, "Non-existent record: %s" % recid
        elif record_status == -1:
            result["resultCode"], result["resultText"] = 1, "Deleted record: %s" % recid
        elif record_locked_by_queue(recid):
            result["resultCode"], result["resultText"] = 1, "Record %s locked by queue" % recid
        else:
            record = create_record(print_record(recid, "xm"))[0]

    elif mode == "tmpfile":
        file_path = "%s_%s.xml" % (_get_file_path(recid, uid), CFG_BIBEDIT_TO_MERGE_SUFFIX)
        if not os.path.isfile(file_path):  # check if file doesn't exist
            result["resultCode"], result["resultText"] = 1, "Temporary file doesnt exist"
        else:  # open file
            tmpfile = open(file_path, "r")
            record = create_record(tmpfile.read())[0]
            tmpfile.close()

    elif mode == "revision":
        if revision_format_valid_p(recid):
            marcxml = get_marcxml_of_revision_id(recid)
            if marcxml:
                record = create_record(marcxml)[0]
            else:
                result["resultCode"], result["resultText"] = 1, "The specified revision does not exist"
        else:
            result["resultCode"], result["resultText"] = 1, "Invalid revision id"

    elif mode == "none":
        return {}

    else:
        result["resultCode"], result["resultText"] = 1, "Invalid record mode for record2"
    return record
Пример #31
0
def get_rn(revision):
    rns = set()
    record = create_record(get_marcxml_of_revision_id(revision))[0]
    fields = record_get_field_instances(record, tag='999', ind1='C', ind2='5')
    for f in fields:
        subfields = field_get_subfield_instances(f)
        for index, s in enumerate(subfields):
            if s[0] == 'r':
                rns.add(tag_arxiv_more(s[1]))
    return rns
Пример #32
0
def _get_record_linking_fields(recid_b, recid_a, tag, ind1, ind2):
    """
    Returns the fields (defined by tag, ind1, ind2) in record (given
    by recid_b) that do not link to another given record (recid_a).
    """
    fields = []
    rec = create_record(format_record(recid_b, "xm"))[0]
    for field_instance in record_get_field_instances(rec, tag=tag, ind1=ind1, ind2=ind2):
        if not ('w', str(recid_a)) in field_instance[0]:
            fields.append(field_instance)
    return fields
Пример #33
0
def record_collect_recid(record_xml):
    """Return recid in MARCXML"""
    result = None
    (record, status_code, list_of_errors) = create_record(record_xml)
    if not status_code:
        # Error happened
        write_message("Error collecting OAI identifier from record: %s" %
                      ("\n".join(list_of_errors), ))
        return
    if "001" in record:
        return record['001'][0][3]
Пример #34
0
    def test_update_the_same_record(self):
        """Tests parsing Paper from a record and diffing with the same
           hepdata entry.
           """
        rec_string = """<record>
  <controlfield tag="001">123456</controlfield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="z">Some other entry not following even the semantics 2</subfield>
    <subfield code="3">ANOTHER</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="u">http://google.com</subfield>
    <subfield code="y">1 This is the link text</subfield>
    <subfield code="3">ADDITIONAL HEPDATA</subfield>
  </datafield>
  <datafield tag="856" ind1="4" ind2=" ">
    <subfield code="u">http://invenio-software.org</subfield>
    <subfield code="y">2 This is some other completely unrelated field</subfield>
    <subfield code="3">ADDITIONAL HEPDATA</subfield>
  </datafield>
  <datafield tag="856" ind1=" " ind2=" ">
    <subfield code="u">http://invenio-software.net</subfield>
    <subfield code="y">This should not be copied</subfield>
    <subfield code="3">Different type</subfield>
  </datafield>
  <datafield tag="520" ind1=" " ind2=" ">
    <subfield code="9">HEPDATA</subfield>
  </datafield>
</record>
"""
        rec = bibrecord.create_record(rec_string)[0]
        paper = hepdatautils.Paper.create_from_record(rec)
        diff_xml = paper.get_diff_marcxml(rec)

        self.assertTrue(diff_xml is None,
                        "Expecting empty XML in the case of the same dataset. Produced XML: %s" % (diff_xml, ))

        self.assertEqual(len(paper.additional_data_links), 2,
                         "Incorrect number of recognised additional data links")

        if paper.additional_data_links[0]["description"][0] > \
                paper.additional_data_links[1]["description"][0]:
            l1 = paper.additional_data_links[1]
            l2 = paper.additional_data_links[0]
        else:
            l1 = paper.additional_data_links[0]
            l2 = paper.additional_data_links[1]

        self.assertEqual(l1["description"], "1 This is the link text", "Incorrect first parsed link")
        self.assertEqual(l1["href"], "http://google.com", "Incorrect first parsed link")
        self.assertEqual(l2["description"], "2 This is some other completely unrelated field",
                         "Incorrect second parsed link")
        self.assertEqual(l2["href"], "http://invenio-software.org",
                         "Incorrect second parsed link")
def move_drafts_articles_to_ready(journal_name, issue):
    """
    Move draft articles to their final "collection".

    To do so we rely on the convention that an admin-chosen keyword
    must be removed from the metadata
    """
    protected_datafields = ['100', '245', '246', '520', '590', '700']
    keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
    collections_to_refresh = {}

    categories = get_journal_categories(journal_name, issue)
    for category in categories:
        articles = get_journal_articles(journal_name, issue, category)
        for order, recids in articles.iteritems():
            for recid in recids:
                record_xml = format_record(recid, of='xm')
                if not record_xml:
                    continue
                new_record_xml_path = os.path.join(CFG_TMPDIR,
                                                   'webjournal_publish_' + \
                                                   str(recid) + '.xml')
                if os.path.exists(new_record_xml_path):
                    # Do not modify twice
                    continue
                record_struc = create_record(record_xml)
                record = record_struc[0]
                new_record = update_draft_record_metadata(record,
                                                          protected_datafields,
                                                          keyword_to_remove)
                new_record_xml = print_rec(new_record)
                if new_record_xml.find(keyword_to_remove) >= 0:
                    new_record_xml = new_record_xml.replace(keyword_to_remove, '')
                    # Write to file
                    new_record_xml_file = file(new_record_xml_path, 'w')
                    new_record_xml_file.write(new_record_xml)
                    new_record_xml_file.close()
                    # Submit
                    task_low_level_submission('bibupload',
                                              'WebJournal',
                                              '-c', new_record_xml_path)
                    task_low_level_submission('bibindex',
                                              'WebJournal',
                                              '-i', str(recid))
                    for collection in get_all_collections_of_a_record(recid):
                        collections_to_refresh[collection] = ''

    # Refresh collections
    collections_to_refresh.update([(c, '') for c in get_journal_collection_to_refresh_on_release(journal_name)])
    for collection in collections_to_refresh.keys():
        task_low_level_submission('webcoll',
                                  'WebJournal',
                                  '-f', '-p', '2','-c', collection)
def move_drafts_articles_to_ready(journal_name, issue):
    """
    Move draft articles to their final "collection".

    To do so we rely on the convention that an admin-chosen keyword
    must be removed from the metadata
    """
    protected_datafields = ['100', '245', '246', '520', '590', '700']
    keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
    collections_to_refresh = {}

    categories = get_journal_categories(journal_name, issue)
    for category in categories:
        articles = get_journal_articles(journal_name, issue, category)
        for order, recids in articles.iteritems():
            for recid in recids:
                record_xml = format_record(recid, of='xm')
                if not record_xml:
                    continue
                new_record_xml_path = os.path.join(CFG_TMPDIR,
                                                   'webjournal_publish_' + \
                                                   str(recid) + '.xml')
                if os.path.exists(new_record_xml_path):
                    # Do not modify twice
                    continue
                record_struc = create_record(record_xml)
                record = record_struc[0]
                new_record = update_draft_record_metadata(
                    record, protected_datafields, keyword_to_remove)
                new_record_xml = print_rec(new_record)
                if new_record_xml.find(keyword_to_remove) >= 0:
                    new_record_xml = new_record_xml.replace(
                        keyword_to_remove, '')
                    # Write to file
                    new_record_xml_file = file(new_record_xml_path, 'w')
                    new_record_xml_file.write(new_record_xml)
                    new_record_xml_file.close()
                    # Submit
                    task_low_level_submission('bibupload', 'WebJournal', '-c',
                                              new_record_xml_path)
                    task_low_level_submission('bibindex', 'WebJournal', '-i',
                                              str(recid))
                    for collection in get_all_collections_of_a_record(recid):
                        collections_to_refresh[collection] = ''

    # Refresh collections
    collections_to_refresh.update([
        (c, '')
        for c in get_journal_collection_to_refresh_on_release(journal_name)
    ])
    for collection in collections_to_refresh.keys():
        task_low_level_submission('webcoll', 'WebJournal', '-f', '-p', '2',
                                  '-c', collection)
Пример #37
0
 def _prepare_blob(self):
     """
     Transforms the blob into rec_tree structure to use it in the standar
     translation phase inside C{JsonReader}
     """
     self.rec_tree = CoolDict()
     try:
         if self.blob_wrapper.schema.lower().startswith('file:'):
             self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name, 'r').read()
         if self.blob_wrapper.schema.lower() in ['recstruct']:
             self.__create_rectree_from_recstruct()
         elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']:
             #TODO: Implement translation directrly from xml
             from invenio.bibrecord import create_record
             self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
             self.__create_rectree_from_recstruct()
     except AttributeError:
         #Assume marcxml
         from invenio.bibrecord import create_record
         self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
         self.__create_rectree_from_recstruct()
Пример #38
0
def save_xml_record(recid, uid, xml_record='', to_upload=True, to_merge=False, spec_name=''):
    """Write XML record to file. Default behaviour is to read the record from
    a BibEdit cache file, filter out the unchanged volatile subfields,
    write it back to an XML file and then pass this file to BibUpload.

    @param xml_record: give XML as string in stead of reading cache file
    @param to_upload: pass the XML file to BibUpload
    @param to_merge: prepare an XML file for BibMerge to use

    """
    if not xml_record:
        # Read record from cache file.
        cache = get_cache_file_contents(recid, uid)
        if cache:
            record = cache[2]
            used_changes = cache[4]
#            record_strip_empty_fields(record) # now performed for every record after removing unfilled volatile fields
            xml_record = record_xml_output(record)
            delete_cache_file(recid, uid)
            delete_disabled_changes(used_changes)
    else:
        record = create_record(xml_record)[0]

    # clean the record from unfilled volatile fields
    record_strip_empty_volatile_subfields(record)
    record_strip_empty_fields(record)

    # order subfields alphabetically before saving the record
#TP: nechceme    record_order_subfields(record)

    xml_to_write = wash_for_xml(record_xml_output(record))

    # Write XML file.
    if not to_merge:
        file_path = '%s.xml' % _get_file_path(recid, uid)
    else:
        file_path = '%s_%s.xml' % (_get_file_path(recid, uid),
                                   CFG_BIBEDIT_TO_MERGE_SUFFIX)
    xml_file = open(file_path, 'w')
    xml_file.write(xml_to_write)
    xml_file.close()

    user_name = get_user_info(uid)[1]
    if to_upload:
        # TP: check whether to add spec name
        if spec_name == '':
            # Pass XML file to BibUpload.
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name)
        else:
            task_low_level_submission('bibupload', 'bibedit', '-P', '5', '-r',
                                      file_path, '-u', user_name, '-N', spec_name)
    return True
Пример #39
0
def crossref_process_template(template, change=False):
    """
    Creates record from template based on xml template
    @param change: if set to True, makes changes to the record (translating the
        title, unifying autroh names etc.), if not - returns record without
        any changes
    @return: record
    """
    record = create_record(template)[0]
    if change:
        crossref_translate_title(record)
        crossref_normalize_name(record)
    return record
Пример #40
0
def crossref_process_template(template, change=False):
    """
    Creates record from template based on xml template
    @param change: if set to True, makes changes to the record (translating the
        title, unifying autroh names etc.), if not - returns record without
        any changes
    @return: record
    """
    record = create_record(template)[0]
    if change:
        crossref_translate_title(record)
        crossref_normalize_name(record)
    return record
Пример #41
0
def modify_record_timestamp(revision_xml, last_revision_ts):
    """ Modify tag 005 to add the revision passed as parameter.
    @param revision_xml: marcxml representation of the record to modify
    @type revision_xml: string
    @param last_revision_ts: timestamp to add to 005 tag
    @type last_revision_ts: string

    @return: marcxml with 005 tag modified
    """
    recstruct = create_record(revision_xml)[0]
    record_modify_controlfield(recstruct, "005", last_revision_ts,
                                field_position_local=0)
    return record_xml_output(recstruct)
Пример #42
0
def _get_record_linking_fields(recid_b, recid_a, tag, ind1, ind2):
    """
    Returns the fields (defined by tag, ind1, ind2) in record (given
    by recid_b) that do not link to another given record (recid_a).
    """
    fields = []
    rec = create_record(format_record(recid_b, "xm"))[0]
    for field_instance in record_get_field_instances(rec,
                                                     tag=tag,
                                                     ind1=ind1,
                                                     ind2=ind2):
        if not ('w', str(recid_a)) in field_instance[0]:
            fields.append(field_instance)
    return fields
def rollback_record(recid, weight):
        print 'id', recid, 'weight', weight
        for rev in get_record_revision_ids(recid):
            if weight == 0:
                break
            if 'refextract' in get_info_of_revision_id(rev):
                weight -= 1
        print 'rev', rev
        old_record = create_record(get_marcxml_of_revision_id(rev))
        fields_to_add = record_get_field_instances(old_record[0],
                                                   tag='999',
                                                   ind1='%',
                                                   ind2='%')
        submit_xml(create_our_record(recid, fields_to_add))
 def _prepare_blob(self):
     """
     Transforms the blob into rec_tree structure to use it in the standar
     translation phase inside C{JsonReader}
     """
     self.rec_tree = CoolDict()
     try:
         if self.blob_wrapper.schema.lower().startswith('file:'):
             self.blob_wrapper.blob = open(self.blob_wrapper.blob_file_name,
                                           'r').read()
         if self.blob_wrapper.schema.lower() in ['recstruct']:
             self.__create_rectree_from_recstruct()
         elif self.blob_wrapper.schema.lower() in ['xml', 'file:xml']:
             #TODO: Implement translation directrly from xml
             from invenio.bibrecord import create_record
             self.blob_wrapper.blob = create_record(
                 self.blob_wrapper.blob)[0]
             self.__create_rectree_from_recstruct()
     except AttributeError:
         #Assume marcxml
         from invenio.bibrecord import create_record
         self.blob_wrapper.blob = create_record(self.blob_wrapper.blob)[0]
         self.__create_rectree_from_recstruct()
Пример #45
0
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        elif len(record_cnums) == 1:
            new_cnum = base_cnum + '.' + '1'
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max(
                [int(rev[0].split('.')[1]) for rev in record_cnums[1:]])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
Пример #46
0
    def _next_value(self, recid=None, xml_record=None, start_date=None):
        """
        Returns the next cnum for the given recid

        @param recid: id of the record where the cnum will be generated
        @type recid: int

        @param xml_record: record in xml format
        @type xml_record: string

        @param start_date: use given start date
        @type start_date: string

        @return: next cnum for the given recid. Format is Cyy-mm-dd.[.1n]
        @rtype: string

        @raises ConferenceNoStartDateError: No date information found in the
        given recid
        """
        bibrecord = None
        if recid is None and xml_record is not None:
            bibrecord = create_record(xml_record)[0]
        elif recid is not None:
            bibrecord = get_bibrecord(recid)

        if start_date is None and bibrecord is not None:
            start_date = record_get_field_value(bibrecord,
                                                tag="111",
                                                ind1="",
                                                ind2="",
                                                code="x")

        if not start_date:
            raise ConferenceNoStartDateError

        base_cnum = "C" + start_date[2:]

        record_cnums = self._get_record_cnums(base_cnum)
        if not record_cnums:
            new_cnum = base_cnum
        elif len(record_cnums) == 1:
            new_cnum = base_cnum + '.' + '1'
        else:
            # Get the max current revision, cnums are in format Cyy-mm-dd,
            # Cyy-mm-dd.1, Cyy-mm-dd.2
            highest_revision = max([int(rev[0].split('.')[1]) for rev in record_cnums[1:]])
            new_cnum = base_cnum + '.' + str(highest_revision + 1)

        return new_cnum
Пример #47
0
def modify_record_timestamp(revision_xml, last_revision_ts):
    """ Modify tag 005 to add the revision passed as parameter.
    @param revision_xml: marcxml representation of the record to modify
    @type revision_xml: string
    @param last_revision_ts: timestamp to add to 005 tag
    @type last_revision_ts: string

    @return: marcxml with 005 tag modified
    """
    recstruct = create_record(revision_xml)[0]
    record_modify_controlfield(recstruct,
                               "005",
                               last_revision_ts,
                               field_position_local=0)
    return record_xml_output(recstruct)
Пример #48
0
def move_drafts_articles_to_ready(journal_name, issue):
    """
    Move draft articles to their final "collection".

    To do so we rely on the convention that an admin-chosen keyword
    must be removed from the metadata
    """
    protected_datafields = ["100", "245", "246", "520", "590", "700"]
    keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name)
    collections_to_refresh = {}

    categories = get_journal_categories(journal_name, issue)
    for category in categories:
        articles = get_journal_articles(journal_name, issue, category)
        for order, recids in articles.iteritems():
            for recid in recids:
                record_xml = format_record(recid, of="xm")
                if not record_xml:
                    continue
                new_record_xml_path = os.path.join(CFG_TMPDIR, "webjournal_publish_" + str(recid) + ".xml")
                if os.path.exists(new_record_xml_path):
                    # Do not modify twice
                    continue
                record_struc = create_record(record_xml)
                record = record_struc[0]
                new_record = update_draft_record_metadata(record, protected_datafields, keyword_to_remove)
                new_record_xml = print_rec(new_record)
                if new_record_xml.find(keyword_to_remove) >= 0:
                    new_record_xml = new_record_xml.replace(keyword_to_remove, "")
                    # Write to file
                    new_record_xml_file = file(new_record_xml_path, "w")
                    new_record_xml_file.write(new_record_xml)
                    new_record_xml_file.close()
                    # Submit
                    task_low_level_submission("bibupload", "WebJournal", "-c", new_record_xml_path)
                    task_low_level_submission("bibindex", "WebJournal", "-i", str(recid))
                    for collection in get_all_collections_of_a_record(recid):
                        collections_to_refresh[collection] = ""

    # Refresh collections
    collections_to_refresh.update([(c, "") for c in get_journal_collection_to_refresh_on_release(journal_name)])
    for collection in collections_to_refresh.keys():
        task_low_level_submission("webcoll", "WebJournal", "-f", "-p", "2", "-c", collection)
Пример #49
0
def create_objects(path_to_file):
    from invenio.bibworkflow_model import BibWorkflowObject

    list_of_bwo = []
    f = open(path_to_file, "r")
    records = f.read()
    f.close()

    record_xmls = REGEXP_RECORD.findall(records)
    for record_xml in record_xmls:
        rec = "<record>"
        rec += record_xml
        rec += "</record>"
        rec = create_record(rec)[0]
        #check for errors, if record is empty

        bwo = BibWorkflowObject(rec, "bibrecord")
        list_of_bwo.append(bwo)
    return list_of_bwo
Пример #50
0
def replace_references(recid, uid=None, txt=None, url=None):
    """Replace references for a record

    The record itself is not updated, the marc xml of the document with updated
    references is returned

    Parameters:
    * recid: the id of the record
    * txt: references in text mode
    * inspire: format of ther references
    """
    # Parse references
    if txt is not None:
        references_xml = extract_references_from_string_xml(
            txt, is_only_references=True)
    elif url is not None:
        references_xml = extract_references_from_url_xml(url)
    else:
        references_xml = extract_references_from_record_xml(recid)
    references = create_record(references_xml.encode('utf-8'))

    dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(
        recid, uid)
    out_xml = None

    references_to_add = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='C',
                                                   ind2='5')
    refextract_status = record_get_field_instances(references[0],
                                                   tag='999',
                                                   ind1='C',
                                                   ind2='6')

    if references_to_add:
        # Replace 999 fields
        record_delete_fields(record, '999')
        record_add_fields(record, '999', references_to_add)
        record_add_fields(record, '999', refextract_status)
        # Update record references
        out_xml = record_xml_output(record)

    return out_xml
Пример #51
0
def match_record(obj, eng):
    """
    Will try to find matches in stored records
    """
    from invenio.bibrecord import create_record
    from invenio.bibmatch_engine import match_records

    obj.db_obj.last_task_name = 'match_record'
    rec = create_record(obj.data['data'])
    matches = match_records(records=[rec], qrystrs=[("title", "[245__a]")])
    obj.db_obj.extra_data['tasks_results']['match_record'] = matches
    if matches[2] or matches[3]:
        # we have ambiguous or fuzzy results
        # render holding pen corresponding template
        eng.halt("Match resolution needed")
    elif matches[0]:
        eng.log.info("Matching: new record")
    else:
        results = matches[1][0][1]
        eng.log.info("Matching: existing record %s" % (results, ))
Пример #52
0
def element_tree_collection_to_records(tree, header_subs=None):
    """ Takes an ElementTree and converts the nodes
    into BibRecord records so they can be worked with.
    This function is for a tree root of collection as such:
    <collection>
        <record>
            <!-- MARCXML -->
        </record>
        <record> ... </record>
    </collection>
    """
    records = []
    collection = tree.getroot()
    for record_element in collection.getchildren():
        marcxml = ET.tostring(record_element, encoding="utf-8")
        record, status, errors = create_record(marcxml)
        if errors:
            _print(str(status))
        records.append(record)
    return records, []
Пример #53
0
def element_tree_collection_to_records(tree, header_subs=None):
    """ Takes an ElementTree and converts the nodes
    into BibRecord records so they can be worked with.
    This function is for a tree root of collection as such:
    <collection>
        <record>
            <!-- MARCXML -->
        </record>
        <record> ... </record>
    </collection>
    """
    records = []
    collection = tree.getroot()
    for record_element in collection.getchildren():
        marcxml = ET.tostring(record_element, encoding="utf-8")
        record, status, errors = create_record(marcxml)
        if errors:
            _print(str(status))
        records.append(record)
    return records, []
Пример #54
0
def merge_record_with_template(rec, template_name):
    """ Extend the record rec with the contents of the template and return it"""
    template = get_record_template(template_name)
    template_bibrec = create_record(template)[0]

    for field_tag in template_bibrec:
        if not record_has_field(rec, field_tag):
            for field_instance in template_bibrec[field_tag]:
                record_add_field(rec, field_tag, field_instance[1], field_instance[2], subfields=field_instance[0])
        else:
            for template_field_instance in template_bibrec[field_tag]:
                subfield_codes_template = field_get_subfield_codes(template_field_instance)
                for field_instance in rec[field_tag]:
                    subfield_codes = field_get_subfield_codes(field_instance)
                    for code in subfield_codes_template:
                        if code not in subfield_codes:
                            field_add_subfield(
                                field_instance, code, field_get_subfield_values(template_field_instance, code)[0]
                            )
    return rec
Пример #55
0
def bst_labssync():
    """
    Synchronizes from Labs via redis.

    """
    r = redis.StrictRedis.from_url(CFG_REDIS_HOST_LABS)
    user_agent = make_user_agent_string('labssync')
    s = requests.Session()
    s.headers['User-Agent'] = user_agent
    s.headers['Accept'] = 'application/marcxml+xml'

    tot = r.scard(CFG_REDIS_KEY)
    if tot == 0:
        write_message("Nothing to do")
        return
    else:
        write_message("At least %s records to synchronize from labs" % tot)

    errors = []
    final_total = 0
    uploader = ChunkedBibUpload(mode='r', user='******')
    while True:
        elem = r.spop(CFG_REDIS_KEY)
        if not elem:
            break
        final_total += 1
        try:
            record = s.get("https://%s/api/%s" % (CFG_LABS_HOSTNAME, elem)).text

            # Let's strip collection/XML header
            record = record_xml_output(create_record(record)[0])
            uploader.add(record)
            task_sleep_now_if_required()
        except Exception as err:
            register_exception()
            write_message("ERROR: when retrieving %s: %s" % (elem, err), stream=sys.stderr)
            errors.append(elem)

    write_message("Finally synced %s records from labs" % final_total)
    if errors:
        write_message("All those %s records had errors and might need to be resynced: %s" % (len(errors), ', '.join(errors)))
Пример #56
0
    def _prepare_blob(self, *args, **kwargs):
        #FIXME stop using recstruct!
        from invenio.bibrecord import create_record

        class SaveDict(dict):
            __getitem__ = dict.get

        def dict_extend_helper(d, key, value):
            """
            If the key is present inside the dictionary it creates a list (it not
            present) and extends it with the new value. Almost as in C{list.extend}
            """
            if key in d:
                current_value = d.get(key)
                if not isinstance(current_value, list):
                    current_value = [current_value]
                current_value.append(value)
                value = current_value
            d[key] = value

        self.rec_tree = SaveDict()
        record, status_code, errors = create_record(self.blob)
        if status_code == 0:
            if isinstance(errors, list):
                errors = "\n".join(errors)
            # There was an error
            raise ReaderException(
                "There was an error while parsing MARCXML: %s" % (errors, ))

        for key, values in record.iteritems():
            if key < '010' and key.isdigit():
                self.rec_tree[key] = [value[3] for value in values]
            else:
                for value in values:
                    field = SaveDict()
                    for subfield in value[0]:
                        dict_extend_helper(field, subfield[0], subfield[1])
                    dict_extend_helper(self.rec_tree,
                                       (key + value[1] + value[2]).replace(
                                           ' ', '_'), field)
Пример #57
0
def output_record(xml_file):
    """
	Function that returns a record representation from a xml file
	
	@param xml_file: the file in xml format
	
	@return: the record
	"""
    xml_to_string = ''
    list_of_words = []
    f = open(xml_file)
    try:
        for line in f:
            words = line.split()
            for word in words:
                list_of_words.append(word)
    finally:
        f.close()
    xml_to_string = ' '.join(list_of_words)
    # create_record is a function that takes a string representation of an xml and returns a dictionary
    (record, status_code, list_of_errors) = create_record(xml_to_string)
    return record
Пример #58
0
def cli_clean_revisions(recid, dry_run=True, verbose=True):
    """Clean revisions of the given recid, by removing duplicate revisions
    that do not change the content of the record."""
    if recid == '*':
        recids = intbitset(run_sql("SELECT DISTINCT id_bibrec FROM hstRECORD"))
    else:
        try:
            recids = [int(recid)]
        except ValueError:
            print 'ERROR: record ID must be integer, not %s.' % recid
            sys.exit(1)
    for recid in recids:
        all_revisions = run_sql(
            "SELECT marcxml, job_id, job_name, job_person, job_date FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date ASC",
            (recid, ))
        previous_rec = {}
        deleted_revisions = 0
        for marcxml, job_id, job_name, job_person, job_date in all_revisions:
            try:
                current_rec = create_record(zlib.decompress(marcxml))[0]
            except Exception:
                print >> sys.stderr, "ERROR: corrupted revisions found. Please run %s --fix-revisions '*'" % sys.argv[
                    0]
                sys.exit(1)
            if records_identical(current_rec, previous_rec):
                deleted_revisions += 1
                if not dry_run:
                    run_sql(
                        "DELETE FROM hstRECORD WHERE id_bibrec=%s AND job_id=%s AND job_name=%s AND job_person=%s AND job_date=%s",
                        (recid, job_id, job_name, job_person, job_date))
            previous_rec = current_rec
        if verbose and deleted_revisions:
            print "record %s: deleted %s duplicate revisions out of %s" % (
                recid, deleted_revisions, len(all_revisions))
    if verbose:
        print "DONE"
def salvage_deleted_record_from_history(recid):
    return create_record(
        decompress(
            run_sql(
                "SELECT marcxml FROM hstRECORD WHERE id_bibrec=%s ORDER BY job_date DESC LIMIT 1",
                (recid, ))[0][0]))[0]