示例#1
0
    def test_gnd(self):
        """Test contributor rules."""
        from invenio.modules.records.api import Record
        r = Record.create({'contributors': [
                {'name': 'Smith, John',
                 'gnd': 'gnd:118604740',
                 'type': 'DataCurator'},
            ]}, 'json')

        print r.produce('json_for_marc')
        # Test that "gnd:" is not added in MARC
        assert {'700__0': ['(gnd)118604740', None],
                '700__4': 'cur',
                '700__a': 'Smith, John'} \
            in r.produce('json_for_marc')

        r = Record.create(
            '<record>'
            '<datafield tag="700" ind1=" " ind2=" ">'
            '<subfield code="4">cur</subfield>'
            '<subfield code="a">Smith, John</subfield>'
            '<subfield code="0">(gnd)118604740</subfield>'
            '</datafield>'
            '</record>',
            master_format='marc'
        )

        # Test that "gnd:" is added back in JSON
        print r['contributors']
        assert r['contributors'] == [{
            'gnd': 'gnd:118604740',
            'name': 'Smith, John',
            'orcid': '',
            'type': 'DataCurator'
        }]
示例#2
0
    def test_jsonalchemy_toint_usage(self):
        """Test the usage of ``to_int`` function in real life example.

        The ``test_toint`` model contains a field which contains an integer
        subfield. Whenever the record is obtained from ``MARCXML``, the
        string in mentioned subfield has to be converted to an integer.

        However, JSONAlchemy fills every absent subfield with a ``None`` value.
        If the record is not provided with the integer subfield and the
        built-in ``int`` function is used, the code will crash.

        The ``to_int`` function used inside definition of ``test_toint`` field
        prevents it. Here the unprovided subfield is ``999__a``.
        """
        xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \
              '<subfield code="b">Value</subfield></datafield></record>' \
              '</collection>'
        from invenio.modules.records.api import Record
        simple_record = Record.create(xml, master_format='marc',
                                      model="test_toint",
                                      namespace='testsuite')

        self.assertEqual(len(simple_record.__dict__['_dict']['__meta_metadata__']['__errors__']), 0)

        # Check if it works when the value is provided.
        xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \
              '<subfield code="a">9999</subfield>' \
              '<subfield code="b">Value</subfield></datafield></record>' \
              '</collection>'

        simple_record = Record.create(xml, master_format='marc',
                                      model="test_toint",
                                      namespace='testsuite')
        self.assertEqual(simple_record['with_integers'][0]['some_int'], 9999)
示例#3
0
    def test_subjects_gnd(self):
        """Test contributor rules."""
        from invenio.modules.records.api import Record
        r = Record.create({'subjects': [
                {'term': 'Smith, John',
                 'identifier': 'gnd:118604740',
                 'scheme': 'gnd'},
            ]}, 'json')

        # Test that "gnd:" is not added in MARC
        print(r.produce('json_for_marc'))
        assert {'6501_a': 'Smith, John', '6501_0': '(gnd)118604740'} \
            in r.produce('json_for_marc')

        r = Record.create(
            '<record>'
            '<datafield tag="650" ind1="1" ind2=" ">'
            '<subfield code="a">Smith, John</subfield>'
            '<subfield code="0">(gnd)118604740</subfield>'
            '</datafield>'
            '</record>',
            master_format='marc'
        )

        # Test that "gnd:" is added back in JSON
        assert r['subjects'] == [{
            'identifier': 'gnd:118604740',
            'scheme': 'gnd',
            'term': 'Smith, John'}]
示例#4
0
    def test_marc_export(self):
        from invenio.modules.records.api import Record
        from invenio.legacy.bibrecord import create_record, record_xml_output

        rec = Record(json=test_record, master_format='marc')

        # Needed to properly set authors when generating MARC
        first = rec['authors'][0]
        additional = rec['authors'][1:]
        rec['_first_author'] = first
        rec['_additional_authors'] = additional

        output_marc = record_xml_output(
            create_record(rec.legacy_export_as_marc())[0]
        )
        try:
            self.assertEqual(test_marc, output_marc)
        except AssertionError:
            # Print diff in case of errors.
            import difflib
            diff = "".join(difflib.unified_diff(
                test_marc.splitlines(1),
                output_marc.splitlines(1)
            ))
            raise AssertionError(diff)

        form_json = rec.produce('json_for_form')
        for k, v in test_form_json.items():
            self.assertEqual(form_json[k], test_form_json[k])
示例#5
0
    def test_types(self):
        """Test upload_type rules."""
        from invenio.modules.records.api import Record

        for t in cfg['UPLOAD_TYPES']:
            if t['subtypes']:
                for st in t['subtypes']:
                    r = Record.create(
                        '<record><datafield tag="980" ind1=" " ind2=" ">'
                        '<subfield code="b">{1}</subfield>'
                        '<subfield code="a">{0}</subfield>'
                        '</datafield></record>'.format(t['type'], st['type']),
                        master_format='marc'
                    )
                    assert r['upload_type'] == {"type": t['type'],
                                                "subtype": st['type']}
                    assert len(r.get('collections', [])) == 0
            else:
                r = Record.create(
                    '<record><datafield tag="980" ind1=" " ind2=" ">'
                    '<subfield code="a">{0}</subfield>'
                    '</datafield></record>'.format(t['type']),
                    master_format='marc'
                )
                assert r['upload_type'] == {"type": t['type']}
                assert len(r.get('collections', [])) == 0
示例#6
0
    def test_json_for_ld(self):
        from invenio.modules.records.api import Record
        r = Record.create({'title': 'Test'}, 'json')

        import copy
        r = Record(json=copy.copy(test_record), master_format='marc')
        r.produce('json_for_ld')
示例#7
0
    def formatter(bwo, **kwargs):
        """Nicely format the record."""
        from pprint import pformat
        from invenio.modules.records.api import Record

        data = bwo.get_data()
        if not data:
            return ''

        formatter = kwargs.get("formatter", None)
        of = kwargs.get("of", None)
        if formatter:
            # A separate formatter is supplied
            return formatter(data)

        if isinstance(data, collections.Mapping):
            # Dicts are cool on its own, but maybe its SmartJson (record)
            try:
                data = Record(data.dumps()).legacy_export_as_marc()
            except (TypeError, KeyError):
                pass

        if isinstance(data, string_types):
            # We can try formatter!
            # If already XML, format_record does not like it.
            if of and of != 'xm':
                try:
                    from invenio.modules.formatter import format_record
                    formatted_data = format_record(
                        recID=None,
                        of=of,
                        xml_record=data
                    )
                except TypeError:
                    # Wrong kind of type
                    pass
            else:
                # So, XML then
                from xml.dom.minidom import parseString

                try:
                    unpretty_data = parseString(data)
                    formatted_data = unpretty_data.toprettyxml()
                except TypeError:
                    # Probably not proper XML string then
                    return "Data cannot be parsed: %s" % (data,)
                except Exception:
                    # Just return raw string
                    pass

        if not formatted_data:
            formatted_data = data

        if isinstance(formatted_data, dict):
            formatted_data = pformat(formatted_data)
        return formatted_data
示例#8
0
    def test_marc_export(self):
        from invenio.modules.records.api import Record
        from invenio.legacy.bibrecord import create_record

        r = Record(json=test_record, master_format='marc')

        self.assertEqual(
            r.legacy_create_recstruct(),
            create_record(test_marc)[0],
        )
    def formatter(bwo, **kwargs):
        """Return a formatted version of the data."""
        from invenio.modules.formatter.engine import format_record

        data = bwo.get_data()
        if not data:
            return ''
        formatter = kwargs.get("formatter", None)
        format = kwargs.get("format", None)
        if formatter:
            # A seperate formatter is supplied
            return formatter(data)
        from invenio.modules.records.api import Record
        if isinstance(data, collections.Mapping):
            # Dicts are cool on its own, but maybe its SmartJson (record)
            try:
                data = Record(data.dumps()).legacy_export_as_marc()
            except (TypeError, KeyError):
                # Maybe not, submission?
                return data

        if isinstance(data, string_types):
            # Its a string type, lets try to convert
            if format:
                # We can try formatter!
                # If already XML, format_record does not like it.
                if format != 'xm':
                    try:
                        return format_record(recID=None,
                                             of=format,
                                             xml_record=data)
                    except TypeError:
                        # Wrong kind of type
                        pass
                else:
                    # So, XML then
                    from xml.dom.minidom import parseString

                    try:
                        pretty_data = parseString(data)
                        return pretty_data.toprettyxml()
                    except TypeError:
                        # Probably not proper XML string then
                        return "Data cannot be parsed: %s" % (data,)
                    except Exception:
                        # Some other parsing error
                        pass

            # Just return raw string
            return data
        if isinstance(data, set):
            return list(data)
        # Not any of the above types. How juicy!
        return data
示例#10
0
    def test_json_for_ld(self):
        from invenio.modules.records.api import Record

        r = Record.create({"title": "Test"}, "json")

        import copy

        r = Record(json=copy.copy(test_record), master_format="marc")

        ld = r.produce("json_for_ld")
        print(ld)
示例#11
0
    def test_json_for_form(self):
        from invenio.modules.records.api import Record
        r = Record.create({'title': 'Test'}, 'json')
        assert r.produce('json_for_form')['title'] == 'Test'
        assert {'245__a': 'Test'} in r.produce('json_for_marc')

        import copy
        r = Record(json=copy.copy(test_record), master_format='marc')

        form_json = r.produce('json_for_form')
        for k, v in test_form_json.items():
            self.assertEqual(form_json[k], test_form_json[k])
示例#12
0
    def test_marc_export(self):
        from invenio.modules.records.api import Record
        #from invenio.legacy.bibrecord import create_record

        r = Record(json=test_record, master_format='marc')
        # self.assertEqual(
        #     r.legacy_create_recstruct(),
        #     create_record(test_marc)[0],
        # )

        form_json = r.produce('json_for_form')
        for k, v in test_form_json.items():
            self.assertEqual(form_json[k], test_form_json[k])
示例#13
0
    def test_json_for_form(self):
        from invenio.modules.records.api import Record

        r = Record.create({"title": "Test"}, "json")
        assert r.produce("json_for_form")["title"] == "Test"
        assert {"245__a": "Test"} in r.produce("json_for_marc")

        import copy

        r = Record(json=copy.copy(test_record), master_format="marc")

        form_json = r.produce("json_for_form")
        for k, v in test_form_json.items():
            self.assertEqual(form_json[k], test_form_json[k])
示例#14
0
    def test_lossless_marc_import_export(self):
        from invenio.modules.records.api import Record

        r = Record.create(test_marc, master_format="marc").dumps()

        for k in test_record.keys():
            self.assertEqual(test_record[k], r[k])
示例#15
0
    def test_jsonalchemy_tooldvalue(self):
        """Test behaviour of ``set_default_value``.

        In this example, the value provided to the reader in ``d`` subfield
        is in wrong format. However, the behaviour of ``JSONAlchemy`` in such
        case is to skip the value.

        Given the below value of the subfield, the module crashes in
        ``set_default_value``. The error has been caught.
        What is the reason behind the mentioned behaviour needs further
        investigation.
        """
        from invenio.modules.records.api import Record

        # Check if it works when the value is provided.
        xml = '''<collection><record><datafield tag="100" ind1=" " ind2=" ">
              <subfield code="a">Guy, Bobby</subfield>
              <subfield code="d">I like trains</subfield>
              <subfield code="g">ACTIVE</subfield>
              <subfield code="q">Bobby Guy</subfield>
              </datafield></record></collection>'''

        simple_record = Record.create(xml, master_format='marc',
                                      model="test_oldvalue",
                                      namespace='testsuite')
        self.assertEqual(simple_record['dates']['birth'], None)
示例#16
0
def update(recid):
    """View for INSPIRE author update form."""
    # Store referrer in session for later redirection to original page
    session["author_update_referrer"] = request.referrer
    data = {}
    if recid:
        try:
            url = os.path.join(cfg["AUTHORS_UPDATE_BASE_URL"], "record",
                               str(recid), "export", "xm")
            xml = requests.get(url)
            data = Record.create(xml.text.encode("utf-8"), 'marc',
                                 model='author').produce("json_for_form")
            convert_for_form(data)
        except requests.exceptions.RequestException:
            pass
        data["recid"] = recid
    else:
        return redirect(url_for("inspire_authors.new"))
    form = AuthorUpdateForm(data=data)
    ctx = {
        "action": url_for('.submitupdate'),
        "name": "authorUpdateForm",
        "id": "authorUpdateForm",
    }

    return render_template('authors/forms/update_form.html', form=form, **ctx)
示例#17
0
 def _create_marcxml_record(obj, eng):
     from invenio.modules.records.api import Record
     obj.log.info("Creating marcxml record")
     x = Record.create(obj.data, 'json', model='author')
     obj.extra_data["marcxml"] = x.legacy_export_as_marc()
     obj.log.info("Produced MarcXML: \n {}".format(
         obj.extra_data["marcxml"])
     )
示例#18
0
    def marshal_deposition(cls, deposition):
        """
        Generate a JSON representation for REST API of a Deposition
        """
        # Get draft
        if deposition.has_sip() and '_edit' in deposition.drafts:
            draft = deposition.get_draft('_edit')
            metadata_fields = cls.marshal_metadata_edit_fields
        elif deposition.has_sip():
            # FIXME: Not based on latest available data in record.
            sip = deposition.get_latest_sip(sealed=True)
            draft = record_to_draft(
                Record.create(sip.package, master_format='marc'),
                post_process=process_draft
            )
            metadata_fields = cls.marshal_metadata_edit_fields
        else:
            draft = deposition.get_or_create_draft('_metadata')
            metadata_fields = cls.marshal_metadata_fields

        # Fix known differences in marshalling
        current_app.logger.debug(draft.values)
        draft.values = filter_empty_elements(draft.values)
        current_app.logger.debug(draft.values)

        # Set disabled values to None in output
        for field, flags in draft.flags.items():
            if 'disabled' in flags and field in draft.values:
                current_app.logger.debug(field)
                del draft.values[field]

        # Marshal deposition
        obj = marshal(deposition, cls.marshal_deposition_fields)
        # Marshal the metadata attribute
        obj['metadata'] = marshal(unicodifier(draft.values), metadata_fields)

        # Add record and DOI information from latest SIP
        for sip in deposition.sips:
            if sip.is_sealed():
                recjson = sip.metadata
                if recjson.get('recid'):
                    obj['record_id'] = fields.Integer().format(
                        recjson.get('recid')
                    )
                    obj['record_url'] = fields.String().format(url_for(
                        'record.metadata',
                        recid=recjson.get('recid'),
                        _external=True
                    ))
                if (recjson.get('doi') and recjson.get('doi').startswith(
                        cfg['CFG_DATACITE_DOI_PREFIX'] + "/")):
                    obj['doi'] = fields.String().format(recjson.get('doi'))
                    obj['doi_url'] = fields.String().format(
                        "http://dx.doi.org/%s" % obj['doi']
                    )
                break

        return obj
示例#19
0
 def test_pre1900_embargo_date(self):
     from invenio.modules.records.api import Record
     r = Record.create(
         '<record><datafield tag="942" ind1="" ind2="">'
         '<subfield code="a">0900-12-31</subfield>'
         '</datafield></record>', master_format='marc'
     )
     self.assertEqual(date(900, 12, 31), r['embargo_date'])
     self.assertEqual('0900-12-31', r.dumps()['embargo_date'])
     assert '0900-12-31' in r.legacy_export_as_marc()
示例#20
0
    def marshal_deposition(cls, deposition):
        """
        Generate a JSON representation for REST API of a Deposition
        """
        # Get draft
        if deposition.has_sip() and '_edit' in deposition.drafts:
            draft = deposition.get_draft('_edit')
            metadata_fields = cls.marshal_metadata_edit_fields
        elif deposition.has_sip():
            # FIXME: Not based on latest available data in record.
            sip = deposition.get_latest_sip(sealed=True)
            draft = record_to_draft(Record.create(sip.package,
                                                  master_format='marc'),
                                    post_process=process_draft)
            metadata_fields = cls.marshal_metadata_edit_fields
        else:
            draft = deposition.get_or_create_draft('_default')
            metadata_fields = cls.marshal_metadata_fields

        # Fix known differences in marshalling
        draft.values = filter_empty_elements(draft.values)
        if 'grants' not in draft.values:
            draft.values['grants'] = []

        # Set disabled values to None in output
        for field, flags in draft.flags.items():
            if 'disabled' in flags and field in draft.values:
                del draft.values[field]

        # Marshal deposition
        obj = marshal(deposition, cls.marshal_deposition_fields)
        # Marshal the metadata attribute
        obj['metadata'] = marshal(unicodifier(draft.values), metadata_fields)

        # Add record and DOI information from latest SIP
        for sip in deposition.sips:
            if sip.is_sealed():
                recjson = sip.metadata
                if recjson.get('recid'):
                    obj['record_id'] = fields.Integer().format(
                        recjson.get('recid'))
                    obj['record_url'] = fields.String().format(
                        url_for('record.metadata',
                                recid=recjson.get('recid'),
                                _external=True))
                if recjson.get('doi') and \
                   recjson.get('doi').startswith(cfg['CFG_DATACITE_DOI_PREFIX']
                                                +"/"):
                    obj['doi'] = fields.String().format(recjson.get('doi'))
                    obj['doi_url'] = fields.String().format(
                        "http://dx.doi.org/%s" % obj['doi'])
                break

        return obj
示例#21
0
    def test_pre1900_publication_date(self):
        from invenio.modules.records.api import Record

        r = Record.create(
            '<record><datafield tag="260" ind1="" ind2="">'
            '<subfield code="c">0900-12-31</subfield>'
            "</datafield></record>",
            master_format="marc",
        )
        self.assertEqual(date(900, 12, 31), r["publication_date"])
        self.assertEqual("0900-12-31", r.dumps()["publication_date"])
        assert "0900-12-31" in r.legacy_export_as_marc()
示例#22
0
    def get_description(bwo):
        """Get the description (identifiers and categories) from the object data."""
        from invenio.modules.records.api import Record
        from flask import render_template, current_app

        record = bwo.get_data()
        final_identifiers = {}
        try:
            identifiers = Record(record.dumps()).persistent_identifiers
            for values in identifiers.values():
                final_identifiers.extend([i.get("value") for i in values])
        except Exception:
            current_app.logger.exception("Could not get identifiers")
            if hasattr(record, "get"):
                final_identifiers = [
                    record.get("system_control_number",
                               {}).get("value", 'No ids')
                ]
            else:
                final_identifiers = []

        categories = []
        if hasattr(record, "get"):
            if 'subject' in record:
                lookup = ["subject", "term"]
            elif "subject_term" in record:
                lookup = ["subject_term", "term"]
            else:
                lookup = None
            if lookup:
                primary, secondary = lookup
                category_list = record.get(primary, [])
                if isinstance(category_list, dict):
                    category_list = [category_list]
                categories = [subject[secondary] for subject in category_list]

        return render_template('workflows/styles/harvesting_record.html',
                               categories=categories,
                               identifiers=final_identifiers)
示例#23
0
    def get_description(bwo):
        """Get the description (identifiers and categories) from the object data."""
        from invenio.modules.records.api import Record
        from flask import render_template, current_app

        record = bwo.get_data()
        final_identifiers = {}
        try:
            identifiers = Record(record.dumps()).persistent_identifiers
            for values in identifiers.values():
                final_identifiers.extend([i.get("value") for i in values])
        except Exception:
            current_app.logger.exception("Could not get identifiers")
            if hasattr(record, "get"):
                final_identifiers = [
                    record.get("system_control_number", {}).get("value", 'No ids')
                ]
            else:
                final_identifiers = []

        categories = []
        if hasattr(record, "get"):
            if 'subject' in record:
                lookup = ["subject", "term"]
            elif "subject_term" in record:
                lookup = ["subject_term", "term"]
            else:
                lookup = None
            if lookup:
                primary, secondary = lookup
                category_list = record.get(primary, [])
                if isinstance(category_list, dict):
                    category_list = [category_list]
                categories = [subject[secondary] for subject in category_list]

        return render_template('workflows/styles/harvesting_record.html',
                               categories=categories,
                               identifiers=final_identifiers)
示例#24
0
    def test_jsonalchemy_toint_usage(self):
        """Test the usage of ``to_int`` function in real life example.

        The ``test_toint`` model contains a field which contains an integer
        subfield. Whenever the record is obtained from ``MARCXML``, the
        string in mentioned subfield has to be converted to an integer.

        However, JSONAlchemy fills every absent subfield with a ``None`` value.
        If the record is not provided with the integer subfield and the
        built-in ``int`` function is used, the code will crash.

        The ``to_int`` function used inside definition of ``test_toint`` field
        prevents it. Here the unprovided subfield is ``999__a``.
        """
        xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \
              '<subfield code="b">Value</subfield></datafield></record>' \
              '</collection>'
        from invenio.modules.records.api import Record
        simple_record = Record.create(xml,
                                      master_format='marc',
                                      model="test_toint",
                                      namespace='testsuite')

        self.assertEqual(
            len(simple_record.__dict__['_dict']['__meta_metadata__']
                ['__errors__']), 0)

        # Check if it works when the value is provided.
        xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \
              '<subfield code="a">9999</subfield>' \
              '<subfield code="b">Value</subfield></datafield></record>' \
              '</collection>'

        simple_record = Record.create(xml,
                                      master_format='marc',
                                      model="test_toint",
                                      namespace='testsuite')
        self.assertEqual(simple_record['with_integers'][0]['some_int'], 9999)
def get_xml_and_jsonify(rep_no):
    """
    Retreives XML data from CDS and returns jsonified temp record
    :param rep_no: The report number to be retreived
    :type rep_no: String
    :returns: dict

    Workflow - Download an XML file from CDS using a link like:
    http://cds.cern.ch/search?p=reportnumber%3A"CERN-THESIS-2013-297"&of=xm
    JSONify the xml and return it.
    """
    xml = get("""http://cds.cern.ch/search?p=reportnumber%%3A"%s"&of=xm"""
              % rep_no).content
    if xml[83] == '1' and xml[84] == ' ':
        return Record.create(xml, 'marc', model='data_analysis_cds_extract')
    return None
def _get_arxiv_id_from_inspire(doi):
    """
    """
    url_values = urllib.urlencode({'p': 'doi', 'doi': doi, 'of': 'xm'})
    url = 'https://inspirehep.net/search?' + url_values
    try:
        collectionxml = urllib.urlopen(url).read()
    except IOError:
        raise
    else:
        try:
            recordxml = list(split_blob(collectionxml, 'marc'))[0]
        except IndexError:
            return None
        inspire_record = Record.create(recordxml, master_format='marc',
                                       namespace='recordext')
        return _get_arxiv_id_from_record(inspire_record)
示例#27
0
def dumprecords():
    """Dump records."""
    from invenio.base.factory import create_app

    app = create_app()
    with app.app_context():
        from invenio.modules.editor.models import Bibrec
        from invenio.modules.records.api import Record
        data = []
        q = Bibrec.query
        with click.progressbar(q, length=q.count()) as query:
            for r in query:
                d = Record.get_record(r.id)
                if d:
                    data.append(d.dumps(clean=True))

        with open('dump2.json', 'w') as f:
            json.dump(data, f)
def _get_arxiv_id_from_inspire(doi):
    """
    """
    url_values = urllib.urlencode({'p': 'doi', 'doi': doi, 'of': 'xm'})
    url = 'https://inspirehep.net/search?' + url_values
    try:
        collectionxml = urllib.urlopen(url).read()
    except IOError:
        raise
    else:
        try:
            recordxml = list(split_blob(collectionxml, 'marc'))[0]
        except IndexError:
            return None
        inspire_record = Record.create(recordxml,
                                       master_format='marc',
                                       namespace='recordext')
        return _get_arxiv_id_from_record(inspire_record)
示例#29
0
def dumprecords():
    """Dump records."""
    from invenio.base.factory import create_app

    app = create_app()
    with app.app_context():
        from invenio.modules.editor.models import Bibrec
        from invenio.modules.records.api import Record

        data = []
        q = Bibrec.query
        with click.progressbar(q, length=q.count()) as query:
            for r in query:
                d = Record.get_record(r.id)
                if d:
                    data.append(d.dumps(clean=True))

        with open("dump2.json", "w") as f:
            json.dump(data, f)
示例#30
0
 def get_mocked_record():
     from invenio.modules.records.api import Record
     if RecordMock.record is None:
         RecordMock.record = Record(
             json={
                 'doi': '10.1234/invenio.1234',
                 'files_to_upload': [  # replace with cfg['files_var_name']
                     ('path1.xls', 'this/is/a/long/path/to/the/file/location/path1.xls'),
                     ('path2.csv', 'path2.csv'),
                     ('path3.pdf', 'path3.pdf'), ],
                 'recid': 1,
                 # '_files': [  # replace with cfg['files_var_name']
                 #    'path1',
                 #    'path2',
                 #    'path3']
             },
             master_format='marc'
         )
     return RecordMock.record
示例#31
0
 def test_formjson_for_contributors(self):
     """Test contributor rules."""
     from invenio.modules.records.api import Record
     r = Record.create(
         '<record>'
         '<datafield tag="700" ind1=" " ind2=" ">'
         '<subfield code="u">Test</subfield>'
         '<subfield code="4">cph</subfield>'
         '<subfield code="a">Nielsen, Lars</subfield>'
         '</datafield>'
         '<datafield tag="700" ind1=" " ind2=" ">'
         '<subfield code="u">Hansen</subfield>'
         '<subfield code="4">edt</subfield>'
         '<subfield code="a">Viggo</subfield>'
         '</datafield>'
         '<datafield tag="700" ind1=" " ind2=" ">'
         '<subfield code="4">edt</subfield>'
         '<subfield code="a">Hansen</subfield>'
         '</datafield>'
         '</record>',
         master_format='marc'
     )
     assert len(r['contributors']) == 3
     assert len(r.produce('json_for_form')['contributors']) == 3
示例#32
0
def filter_step(obj, eng):
    """Run an external python script."""
    from invenio.modules.records.api import Record
    from invenio.utils.shell import run_shell_command

    repository = obj.extra_data.get("repository", {})
    arguments = repository.get("arguments", {})
    script_name = arguments.get("f_filter-file")
    if script_name:
        marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc()
        extract_path = os.path.join(
            cfg['CFG_TMPSHAREDDIR'],
            str(eng.uuid)
        )
        if not os.path.exists(extract_path):
            os.makedirs(extract_path)

        # Now we launch BibUpload tasks for the final MARCXML files
        marcxmlfile = extract_path + os.sep + str(obj.id)
        file_fd = open(marcxmlfile, 'w')
        file_fd.write(marcxml_value)
        file_fd.close()

        exitcode, cmd_stdout, cmd_stderr = run_shell_command(
            cmd="%s '%s'",
            args=(str(script_name),
                  str(marcxmlfile)))
        if exitcode != 0 or cmd_stderr != "":
            obj.log.error(
                "Error while running filtering script on %s\nError:%s"
                % (marcxmlfile, cmd_stderr)
            )
        else:
            obj.log.info(cmd_stdout)
    else:
        obj.log.error("No script file found!")
示例#33
0
def quick_match_record(obj, eng):
    """Retrieve the record Id from a record.

    Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID or
    DOI tag. opt_mod is the desired mode.

    001 fields even in the insert mode

    :param obj: Bibworkflow Object to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.bibupload.engine import (find_record_from_recid,
                                                 find_record_from_sysno,
                                                 find_records_from_extoaiid,
                                                 find_record_from_oaiid,
                                                 find_record_from_doi)
    from invenio.modules.records.api import Record

    identifier_function_to_check = {
        'recid': find_record_from_recid,
        'system_number': find_record_from_sysno,
        'oaiid': find_record_from_oaiid,
        'system_control_number': find_records_from_extoaiid,
        'doi': find_record_from_doi
    }

    record = Record(obj.data.dumps())
    try:
        identifiers = record.persistent_identifiers
    except Exception as e:
        # if anything goes wrong, assume we need to get it manually.
        eng.log.error("Problem with getting identifiers: %s\n%s" %
                      (str(e), traceback.format_exc()))
        identifiers = []

    obj.extra_data["persistent_ids"] = identifiers

    identifier_dict = {}
    for name, value in identifiers:
        value_dict = {}
        for dic in value:
            value_dict.update(dic)
        identifier_dict[name] = value_dict

    if "recid" in identifier_dict:
        # If there is a recid, we are good, right?
        obj.extra_data["persistent_ids"]["recid"] = identifier_dict["recid"]
        return True

    # So if there is no explicit recid key, then maybe we can find the record
    # using any of the other stable identifiers defined.
    found_recid = False
    for name, func in identifier_function_to_check.iteritems():
        if name in identifier_dict:
            if name in identifier_dict[name]:
                # To get {"doi": {"doi": val}}
                found_recid = func(identifier_dict[name][name])
            elif "value" in identifier_dict[name]:
                # To get {"doi": {"value": val}}
                found_recid = func(identifier_dict[name]["value"])

            if found_recid:
                break

    if found_recid:
        obj.extra_data["persistent_ids"]["recid"] = found_recid
        return True
    return False
示例#34
0
 def setUp(self):
     from invenio.modules.records.api import Record
     self.record_good = Bibtex(Record.create(test_record, 'json'))
     self.record_bad = Bibtex(Record.create(test_bad_record, 'json'))
     self.record_empty = Bibtex({})
示例#35
0
def upload_step(obj, eng):
    """Perform the upload step.

    :param obj: BibWorkflowObject to process
    :param eng: BibWorkflowEngine processing the object
    """
    from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str
    from invenio.modules.records.api import Record
    from invenio.legacy.bibsched.bibtask import task_low_level_submission

    repository = obj.extra_data.get("repository", {})
    sequence_id = random.randrange(1, 60000)

    arguments = repository.get("arguments", {})

    default_args = []
    default_args.extend(['-I', str(sequence_id)])
    if arguments.get('u_name', ""):
        default_args.extend(['-N', arguments.get('u_name', "")])
    if arguments.get('u_priority', 5):
        default_args.extend(['-P', str(arguments.get('u_priority', 5))])

    extract_path = os.path.join(
        cfg['CFG_TMPSHAREDDIR'],
        str(eng.uuid)
    )
    if not os.path.exists(extract_path):
        os.makedirs(extract_path)

    filepath = extract_path + os.sep + str(obj.id)
    if "f" in repository.get("postprocess", []):
        # We have a filter.
        file_uploads = [
            ("{0}.insert.xml".format(filepath), ["-i"]),
            ("{0}.append.xml".format(filepath), ["-a"]),
            ("{0}.correct.xml".format(filepath), ["-c"]),
            ("{0}.holdingpen.xml".format(filepath), ["-o"]),
        ]
    else:
        # We do not, so we get the data from the record
        marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc()
        file_fd = open(filepath, 'w')
        file_fd.write(marcxml_value)
        file_fd.close()
        file_uploads = [(filepath, ["-r", "-i"])]

    task_id = None
    for location, mode in file_uploads:
        if os.path.exists(location):
            try:
                args = mode + [filepath] + default_args
                task_id = task_low_level_submission("bibupload",
                                                    "oaiharvest",
                                                    *tuple(args))
                repo_id = repository.get("id")
                if repo_id:
                    create_oaiharvest_log_str(
                        task_id,
                        repo_id,
                        obj.get_data()
                    )
            except Exception as msg:
                eng.log.error(
                    "An exception during submitting oaiharvest task occured : %s " % (
                        str(msg)))
    if task_id is None:
        eng.log.error("an error occurred while uploading %s from %s" %
                      (filepath, repository.get("name", "Unknown")))
    else:
        eng.log.info(
            "material harvested from source %s was successfully uploaded" %
            (repository.get("name", "Unknown"),))
    eng.log.info("end of upload")
示例#36
0
 def setUp(self):
     from invenio.modules.records.api import Record
     self.record_good = Bibtex(Record.create(test_record, 'json'))
     self.record_bad = Bibtex(Record.create(test_bad_record, 'json'))
     self.record_empty = Bibtex({})
示例#37
0
 def test_json_for_form(self):
     from invenio.modules.records.api import Record
     r = Record.create({'title': 'Test'}, 'json')
     assert r.produce('json_for_form') == {'title': 'Test'}
     assert r.produce('json_for_marc') == [{'245__a': 'Test'}]
示例#38
0
    def get_description(bwo):
        """Get the description column part."""
        record = bwo.get_data()
        from invenio.modules.records.api import Record
        try:
            identifiers = Record(record.dumps()).persistent_identifiers
            final_identifiers = []
            for i in identifiers:
                final_identifiers.append(i['value'])
        except Exception:
            if hasattr(record, "get"):
                final_identifiers = [
                    record.get("system_number_external",
                               {}).get("value", 'No ids')
                ]
            else:
                final_identifiers = [' No ids']

        task_results = bwo.get_tasks_results()
        results = []
        if 'bibclassify' in task_results:
            try:
                result = task_results['bibclassify'][0]['result']
                fast_mode = result.get('fast_mode', False)
                result = result['dict']['complete_output']
                result_string = "<strong></br>Bibclassify result:"\
                                "</br></strong>"\
                                "Number of Core keywords: \t%s</br>"\
                                "PACS: \t%s</br>"\
                                % (len(result['Core keywords']),
                                   len(result['Field codes']))
                if fast_mode:
                    result_string += "(This task run at fast mode"\
                                     " taking into consideration"\
                                     " only the title and the abstract)"
                results.append(result_string)
            except (KeyError, IndexError):
                pass
        categories = []
        if hasattr(record, "get"):
            if 'subject' in record:
                lookup = ["subject", "term"]
            elif "subject_term":
                lookup = ["subject_term", "term"]
            else:
                lookup = None
            if lookup:
                primary, secondary = lookup
                category_list = record.get(primary, [])
                if isinstance(category_list, dict):
                    category_list = [category_list]
                for subject in category_list:
                    category = subject[secondary]
                    if len(subject) == 2:
                        if subject.keys()[1] == secondary:
                            source_list = subject[subject.keys()[0]]
                        else:
                            source_list = subject[subject.keys()[1]]
                    else:
                        try:
                            source_list = subject['source']
                        except KeyError:
                            source_list = ""
                    if source_list.lower() == 'inspire':
                        categories.append(category)

        from flask import render_template
        return render_template('workflows/styles/harvesting_record.html',
                               categories=categories,
                               identifiers=final_identifiers,
                               results=results)
示例#39
0
    def test_lossless_marc_import_export(self):
        from invenio.modules.records.api import Record
        r = Record.create(test_marc, master_format='marc').dumps()

        for k in test_record.keys():
            self.assertEqual(test_record[k], r[k])