def test_gnd(self): """Test contributor rules.""" from invenio.modules.records.api import Record r = Record.create({'contributors': [ {'name': 'Smith, John', 'gnd': 'gnd:118604740', 'type': 'DataCurator'}, ]}, 'json') print r.produce('json_for_marc') # Test that "gnd:" is not added in MARC assert {'700__0': ['(gnd)118604740', None], '700__4': 'cur', '700__a': 'Smith, John'} \ in r.produce('json_for_marc') r = Record.create( '<record>' '<datafield tag="700" ind1=" " ind2=" ">' '<subfield code="4">cur</subfield>' '<subfield code="a">Smith, John</subfield>' '<subfield code="0">(gnd)118604740</subfield>' '</datafield>' '</record>', master_format='marc' ) # Test that "gnd:" is added back in JSON print r['contributors'] assert r['contributors'] == [{ 'gnd': 'gnd:118604740', 'name': 'Smith, John', 'orcid': '', 'type': 'DataCurator' }]
def test_jsonalchemy_toint_usage(self): """Test the usage of ``to_int`` function in real life example. The ``test_toint`` model contains a field which contains an integer subfield. Whenever the record is obtained from ``MARCXML``, the string in mentioned subfield has to be converted to an integer. However, JSONAlchemy fills every absent subfield with a ``None`` value. If the record is not provided with the integer subfield and the built-in ``int`` function is used, the code will crash. The ``to_int`` function used inside definition of ``test_toint`` field prevents it. Here the unprovided subfield is ``999__a``. """ xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \ '<subfield code="b">Value</subfield></datafield></record>' \ '</collection>' from invenio.modules.records.api import Record simple_record = Record.create(xml, master_format='marc', model="test_toint", namespace='testsuite') self.assertEqual(len(simple_record.__dict__['_dict']['__meta_metadata__']['__errors__']), 0) # Check if it works when the value is provided. xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \ '<subfield code="a">9999</subfield>' \ '<subfield code="b">Value</subfield></datafield></record>' \ '</collection>' simple_record = Record.create(xml, master_format='marc', model="test_toint", namespace='testsuite') self.assertEqual(simple_record['with_integers'][0]['some_int'], 9999)
def test_subjects_gnd(self): """Test contributor rules.""" from invenio.modules.records.api import Record r = Record.create({'subjects': [ {'term': 'Smith, John', 'identifier': 'gnd:118604740', 'scheme': 'gnd'}, ]}, 'json') # Test that "gnd:" is not added in MARC print(r.produce('json_for_marc')) assert {'6501_a': 'Smith, John', '6501_0': '(gnd)118604740'} \ in r.produce('json_for_marc') r = Record.create( '<record>' '<datafield tag="650" ind1="1" ind2=" ">' '<subfield code="a">Smith, John</subfield>' '<subfield code="0">(gnd)118604740</subfield>' '</datafield>' '</record>', master_format='marc' ) # Test that "gnd:" is added back in JSON assert r['subjects'] == [{ 'identifier': 'gnd:118604740', 'scheme': 'gnd', 'term': 'Smith, John'}]
def test_marc_export(self): from invenio.modules.records.api import Record from invenio.legacy.bibrecord import create_record, record_xml_output rec = Record(json=test_record, master_format='marc') # Needed to properly set authors when generating MARC first = rec['authors'][0] additional = rec['authors'][1:] rec['_first_author'] = first rec['_additional_authors'] = additional output_marc = record_xml_output( create_record(rec.legacy_export_as_marc())[0] ) try: self.assertEqual(test_marc, output_marc) except AssertionError: # Print diff in case of errors. import difflib diff = "".join(difflib.unified_diff( test_marc.splitlines(1), output_marc.splitlines(1) )) raise AssertionError(diff) form_json = rec.produce('json_for_form') for k, v in test_form_json.items(): self.assertEqual(form_json[k], test_form_json[k])
def test_types(self): """Test upload_type rules.""" from invenio.modules.records.api import Record for t in cfg['UPLOAD_TYPES']: if t['subtypes']: for st in t['subtypes']: r = Record.create( '<record><datafield tag="980" ind1=" " ind2=" ">' '<subfield code="b">{1}</subfield>' '<subfield code="a">{0}</subfield>' '</datafield></record>'.format(t['type'], st['type']), master_format='marc' ) assert r['upload_type'] == {"type": t['type'], "subtype": st['type']} assert len(r.get('collections', [])) == 0 else: r = Record.create( '<record><datafield tag="980" ind1=" " ind2=" ">' '<subfield code="a">{0}</subfield>' '</datafield></record>'.format(t['type']), master_format='marc' ) assert r['upload_type'] == {"type": t['type']} assert len(r.get('collections', [])) == 0
def test_json_for_ld(self): from invenio.modules.records.api import Record r = Record.create({'title': 'Test'}, 'json') import copy r = Record(json=copy.copy(test_record), master_format='marc') r.produce('json_for_ld')
def formatter(bwo, **kwargs): """Nicely format the record.""" from pprint import pformat from invenio.modules.records.api import Record data = bwo.get_data() if not data: return '' formatter = kwargs.get("formatter", None) of = kwargs.get("of", None) if formatter: # A separate formatter is supplied return formatter(data) if isinstance(data, collections.Mapping): # Dicts are cool on its own, but maybe its SmartJson (record) try: data = Record(data.dumps()).legacy_export_as_marc() except (TypeError, KeyError): pass if isinstance(data, string_types): # We can try formatter! # If already XML, format_record does not like it. if of and of != 'xm': try: from invenio.modules.formatter import format_record formatted_data = format_record( recID=None, of=of, xml_record=data ) except TypeError: # Wrong kind of type pass else: # So, XML then from xml.dom.minidom import parseString try: unpretty_data = parseString(data) formatted_data = unpretty_data.toprettyxml() except TypeError: # Probably not proper XML string then return "Data cannot be parsed: %s" % (data,) except Exception: # Just return raw string pass if not formatted_data: formatted_data = data if isinstance(formatted_data, dict): formatted_data = pformat(formatted_data) return formatted_data
def test_marc_export(self): from invenio.modules.records.api import Record from invenio.legacy.bibrecord import create_record r = Record(json=test_record, master_format='marc') self.assertEqual( r.legacy_create_recstruct(), create_record(test_marc)[0], )
def formatter(bwo, **kwargs): """Return a formatted version of the data.""" from invenio.modules.formatter.engine import format_record data = bwo.get_data() if not data: return '' formatter = kwargs.get("formatter", None) format = kwargs.get("format", None) if formatter: # A seperate formatter is supplied return formatter(data) from invenio.modules.records.api import Record if isinstance(data, collections.Mapping): # Dicts are cool on its own, but maybe its SmartJson (record) try: data = Record(data.dumps()).legacy_export_as_marc() except (TypeError, KeyError): # Maybe not, submission? return data if isinstance(data, string_types): # Its a string type, lets try to convert if format: # We can try formatter! # If already XML, format_record does not like it. if format != 'xm': try: return format_record(recID=None, of=format, xml_record=data) except TypeError: # Wrong kind of type pass else: # So, XML then from xml.dom.minidom import parseString try: pretty_data = parseString(data) return pretty_data.toprettyxml() except TypeError: # Probably not proper XML string then return "Data cannot be parsed: %s" % (data,) except Exception: # Some other parsing error pass # Just return raw string return data if isinstance(data, set): return list(data) # Not any of the above types. How juicy! return data
def test_json_for_ld(self): from invenio.modules.records.api import Record r = Record.create({"title": "Test"}, "json") import copy r = Record(json=copy.copy(test_record), master_format="marc") ld = r.produce("json_for_ld") print(ld)
def test_json_for_form(self): from invenio.modules.records.api import Record r = Record.create({'title': 'Test'}, 'json') assert r.produce('json_for_form')['title'] == 'Test' assert {'245__a': 'Test'} in r.produce('json_for_marc') import copy r = Record(json=copy.copy(test_record), master_format='marc') form_json = r.produce('json_for_form') for k, v in test_form_json.items(): self.assertEqual(form_json[k], test_form_json[k])
def test_marc_export(self): from invenio.modules.records.api import Record #from invenio.legacy.bibrecord import create_record r = Record(json=test_record, master_format='marc') # self.assertEqual( # r.legacy_create_recstruct(), # create_record(test_marc)[0], # ) form_json = r.produce('json_for_form') for k, v in test_form_json.items(): self.assertEqual(form_json[k], test_form_json[k])
def test_json_for_form(self): from invenio.modules.records.api import Record r = Record.create({"title": "Test"}, "json") assert r.produce("json_for_form")["title"] == "Test" assert {"245__a": "Test"} in r.produce("json_for_marc") import copy r = Record(json=copy.copy(test_record), master_format="marc") form_json = r.produce("json_for_form") for k, v in test_form_json.items(): self.assertEqual(form_json[k], test_form_json[k])
def test_lossless_marc_import_export(self): from invenio.modules.records.api import Record r = Record.create(test_marc, master_format="marc").dumps() for k in test_record.keys(): self.assertEqual(test_record[k], r[k])
def test_jsonalchemy_tooldvalue(self): """Test behaviour of ``set_default_value``. In this example, the value provided to the reader in ``d`` subfield is in wrong format. However, the behaviour of ``JSONAlchemy`` in such case is to skip the value. Given the below value of the subfield, the module crashes in ``set_default_value``. The error has been caught. What is the reason behind the mentioned behaviour needs further investigation. """ from invenio.modules.records.api import Record # Check if it works when the value is provided. xml = '''<collection><record><datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Guy, Bobby</subfield> <subfield code="d">I like trains</subfield> <subfield code="g">ACTIVE</subfield> <subfield code="q">Bobby Guy</subfield> </datafield></record></collection>''' simple_record = Record.create(xml, master_format='marc', model="test_oldvalue", namespace='testsuite') self.assertEqual(simple_record['dates']['birth'], None)
def update(recid): """View for INSPIRE author update form.""" # Store referrer in session for later redirection to original page session["author_update_referrer"] = request.referrer data = {} if recid: try: url = os.path.join(cfg["AUTHORS_UPDATE_BASE_URL"], "record", str(recid), "export", "xm") xml = requests.get(url) data = Record.create(xml.text.encode("utf-8"), 'marc', model='author').produce("json_for_form") convert_for_form(data) except requests.exceptions.RequestException: pass data["recid"] = recid else: return redirect(url_for("inspire_authors.new")) form = AuthorUpdateForm(data=data) ctx = { "action": url_for('.submitupdate'), "name": "authorUpdateForm", "id": "authorUpdateForm", } return render_template('authors/forms/update_form.html', form=form, **ctx)
def _create_marcxml_record(obj, eng): from invenio.modules.records.api import Record obj.log.info("Creating marcxml record") x = Record.create(obj.data, 'json', model='author') obj.extra_data["marcxml"] = x.legacy_export_as_marc() obj.log.info("Produced MarcXML: \n {}".format( obj.extra_data["marcxml"]) )
def marshal_deposition(cls, deposition): """ Generate a JSON representation for REST API of a Deposition """ # Get draft if deposition.has_sip() and '_edit' in deposition.drafts: draft = deposition.get_draft('_edit') metadata_fields = cls.marshal_metadata_edit_fields elif deposition.has_sip(): # FIXME: Not based on latest available data in record. sip = deposition.get_latest_sip(sealed=True) draft = record_to_draft( Record.create(sip.package, master_format='marc'), post_process=process_draft ) metadata_fields = cls.marshal_metadata_edit_fields else: draft = deposition.get_or_create_draft('_metadata') metadata_fields = cls.marshal_metadata_fields # Fix known differences in marshalling current_app.logger.debug(draft.values) draft.values = filter_empty_elements(draft.values) current_app.logger.debug(draft.values) # Set disabled values to None in output for field, flags in draft.flags.items(): if 'disabled' in flags and field in draft.values: current_app.logger.debug(field) del draft.values[field] # Marshal deposition obj = marshal(deposition, cls.marshal_deposition_fields) # Marshal the metadata attribute obj['metadata'] = marshal(unicodifier(draft.values), metadata_fields) # Add record and DOI information from latest SIP for sip in deposition.sips: if sip.is_sealed(): recjson = sip.metadata if recjson.get('recid'): obj['record_id'] = fields.Integer().format( recjson.get('recid') ) obj['record_url'] = fields.String().format(url_for( 'record.metadata', recid=recjson.get('recid'), _external=True )) if (recjson.get('doi') and recjson.get('doi').startswith( cfg['CFG_DATACITE_DOI_PREFIX'] + "/")): obj['doi'] = fields.String().format(recjson.get('doi')) obj['doi_url'] = fields.String().format( "http://dx.doi.org/%s" % obj['doi'] ) break return obj
def test_pre1900_embargo_date(self): from invenio.modules.records.api import Record r = Record.create( '<record><datafield tag="942" ind1="" ind2="">' '<subfield code="a">0900-12-31</subfield>' '</datafield></record>', master_format='marc' ) self.assertEqual(date(900, 12, 31), r['embargo_date']) self.assertEqual('0900-12-31', r.dumps()['embargo_date']) assert '0900-12-31' in r.legacy_export_as_marc()
def marshal_deposition(cls, deposition): """ Generate a JSON representation for REST API of a Deposition """ # Get draft if deposition.has_sip() and '_edit' in deposition.drafts: draft = deposition.get_draft('_edit') metadata_fields = cls.marshal_metadata_edit_fields elif deposition.has_sip(): # FIXME: Not based on latest available data in record. sip = deposition.get_latest_sip(sealed=True) draft = record_to_draft(Record.create(sip.package, master_format='marc'), post_process=process_draft) metadata_fields = cls.marshal_metadata_edit_fields else: draft = deposition.get_or_create_draft('_default') metadata_fields = cls.marshal_metadata_fields # Fix known differences in marshalling draft.values = filter_empty_elements(draft.values) if 'grants' not in draft.values: draft.values['grants'] = [] # Set disabled values to None in output for field, flags in draft.flags.items(): if 'disabled' in flags and field in draft.values: del draft.values[field] # Marshal deposition obj = marshal(deposition, cls.marshal_deposition_fields) # Marshal the metadata attribute obj['metadata'] = marshal(unicodifier(draft.values), metadata_fields) # Add record and DOI information from latest SIP for sip in deposition.sips: if sip.is_sealed(): recjson = sip.metadata if recjson.get('recid'): obj['record_id'] = fields.Integer().format( recjson.get('recid')) obj['record_url'] = fields.String().format( url_for('record.metadata', recid=recjson.get('recid'), _external=True)) if recjson.get('doi') and \ recjson.get('doi').startswith(cfg['CFG_DATACITE_DOI_PREFIX'] +"/"): obj['doi'] = fields.String().format(recjson.get('doi')) obj['doi_url'] = fields.String().format( "http://dx.doi.org/%s" % obj['doi']) break return obj
def test_pre1900_publication_date(self): from invenio.modules.records.api import Record r = Record.create( '<record><datafield tag="260" ind1="" ind2="">' '<subfield code="c">0900-12-31</subfield>' "</datafield></record>", master_format="marc", ) self.assertEqual(date(900, 12, 31), r["publication_date"]) self.assertEqual("0900-12-31", r.dumps()["publication_date"]) assert "0900-12-31" in r.legacy_export_as_marc()
def get_description(bwo): """Get the description (identifiers and categories) from the object data.""" from invenio.modules.records.api import Record from flask import render_template, current_app record = bwo.get_data() final_identifiers = {} try: identifiers = Record(record.dumps()).persistent_identifiers for values in identifiers.values(): final_identifiers.extend([i.get("value") for i in values]) except Exception: current_app.logger.exception("Could not get identifiers") if hasattr(record, "get"): final_identifiers = [ record.get("system_control_number", {}).get("value", 'No ids') ] else: final_identifiers = [] categories = [] if hasattr(record, "get"): if 'subject' in record: lookup = ["subject", "term"] elif "subject_term" in record: lookup = ["subject_term", "term"] else: lookup = None if lookup: primary, secondary = lookup category_list = record.get(primary, []) if isinstance(category_list, dict): category_list = [category_list] categories = [subject[secondary] for subject in category_list] return render_template('workflows/styles/harvesting_record.html', categories=categories, identifiers=final_identifiers)
def test_jsonalchemy_toint_usage(self): """Test the usage of ``to_int`` function in real life example. The ``test_toint`` model contains a field which contains an integer subfield. Whenever the record is obtained from ``MARCXML``, the string in mentioned subfield has to be converted to an integer. However, JSONAlchemy fills every absent subfield with a ``None`` value. If the record is not provided with the integer subfield and the built-in ``int`` function is used, the code will crash. The ``to_int`` function used inside definition of ``test_toint`` field prevents it. Here the unprovided subfield is ``999__a``. """ xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \ '<subfield code="b">Value</subfield></datafield></record>' \ '</collection>' from invenio.modules.records.api import Record simple_record = Record.create(xml, master_format='marc', model="test_toint", namespace='testsuite') self.assertEqual( len(simple_record.__dict__['_dict']['__meta_metadata__'] ['__errors__']), 0) # Check if it works when the value is provided. xml = '<collection><record><datafield tag="999" ind1="" ind2= "">' \ '<subfield code="a">9999</subfield>' \ '<subfield code="b">Value</subfield></datafield></record>' \ '</collection>' simple_record = Record.create(xml, master_format='marc', model="test_toint", namespace='testsuite') self.assertEqual(simple_record['with_integers'][0]['some_int'], 9999)
def get_xml_and_jsonify(rep_no): """ Retreives XML data from CDS and returns jsonified temp record :param rep_no: The report number to be retreived :type rep_no: String :returns: dict Workflow - Download an XML file from CDS using a link like: http://cds.cern.ch/search?p=reportnumber%3A"CERN-THESIS-2013-297"&of=xm JSONify the xml and return it. """ xml = get("""http://cds.cern.ch/search?p=reportnumber%%3A"%s"&of=xm""" % rep_no).content if xml[83] == '1' and xml[84] == ' ': return Record.create(xml, 'marc', model='data_analysis_cds_extract') return None
def _get_arxiv_id_from_inspire(doi): """ """ url_values = urllib.urlencode({'p': 'doi', 'doi': doi, 'of': 'xm'}) url = 'https://inspirehep.net/search?' + url_values try: collectionxml = urllib.urlopen(url).read() except IOError: raise else: try: recordxml = list(split_blob(collectionxml, 'marc'))[0] except IndexError: return None inspire_record = Record.create(recordxml, master_format='marc', namespace='recordext') return _get_arxiv_id_from_record(inspire_record)
def dumprecords(): """Dump records.""" from invenio.base.factory import create_app app = create_app() with app.app_context(): from invenio.modules.editor.models import Bibrec from invenio.modules.records.api import Record data = [] q = Bibrec.query with click.progressbar(q, length=q.count()) as query: for r in query: d = Record.get_record(r.id) if d: data.append(d.dumps(clean=True)) with open('dump2.json', 'w') as f: json.dump(data, f)
def dumprecords(): """Dump records.""" from invenio.base.factory import create_app app = create_app() with app.app_context(): from invenio.modules.editor.models import Bibrec from invenio.modules.records.api import Record data = [] q = Bibrec.query with click.progressbar(q, length=q.count()) as query: for r in query: d = Record.get_record(r.id) if d: data.append(d.dumps(clean=True)) with open("dump2.json", "w") as f: json.dump(data, f)
def get_mocked_record(): from invenio.modules.records.api import Record if RecordMock.record is None: RecordMock.record = Record( json={ 'doi': '10.1234/invenio.1234', 'files_to_upload': [ # replace with cfg['files_var_name'] ('path1.xls', 'this/is/a/long/path/to/the/file/location/path1.xls'), ('path2.csv', 'path2.csv'), ('path3.pdf', 'path3.pdf'), ], 'recid': 1, # '_files': [ # replace with cfg['files_var_name'] # 'path1', # 'path2', # 'path3'] }, master_format='marc' ) return RecordMock.record
def test_formjson_for_contributors(self): """Test contributor rules.""" from invenio.modules.records.api import Record r = Record.create( '<record>' '<datafield tag="700" ind1=" " ind2=" ">' '<subfield code="u">Test</subfield>' '<subfield code="4">cph</subfield>' '<subfield code="a">Nielsen, Lars</subfield>' '</datafield>' '<datafield tag="700" ind1=" " ind2=" ">' '<subfield code="u">Hansen</subfield>' '<subfield code="4">edt</subfield>' '<subfield code="a">Viggo</subfield>' '</datafield>' '<datafield tag="700" ind1=" " ind2=" ">' '<subfield code="4">edt</subfield>' '<subfield code="a">Hansen</subfield>' '</datafield>' '</record>', master_format='marc' ) assert len(r['contributors']) == 3 assert len(r.produce('json_for_form')['contributors']) == 3
def filter_step(obj, eng): """Run an external python script.""" from invenio.modules.records.api import Record from invenio.utils.shell import run_shell_command repository = obj.extra_data.get("repository", {}) arguments = repository.get("arguments", {}) script_name = arguments.get("f_filter-file") if script_name: marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc() extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) # Now we launch BibUpload tasks for the final MARCXML files marcxmlfile = extract_path + os.sep + str(obj.id) file_fd = open(marcxmlfile, 'w') file_fd.write(marcxml_value) file_fd.close() exitcode, cmd_stdout, cmd_stderr = run_shell_command( cmd="%s '%s'", args=(str(script_name), str(marcxmlfile))) if exitcode != 0 or cmd_stderr != "": obj.log.error( "Error while running filtering script on %s\nError:%s" % (marcxmlfile, cmd_stderr) ) else: obj.log.info(cmd_stdout) else: obj.log.error("No script file found!")
def quick_match_record(obj, eng): """Retrieve the record Id from a record. Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID or DOI tag. opt_mod is the desired mode. 001 fields even in the insert mode :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.bibupload.engine import (find_record_from_recid, find_record_from_sysno, find_records_from_extoaiid, find_record_from_oaiid, find_record_from_doi) from invenio.modules.records.api import Record identifier_function_to_check = { 'recid': find_record_from_recid, 'system_number': find_record_from_sysno, 'oaiid': find_record_from_oaiid, 'system_control_number': find_records_from_extoaiid, 'doi': find_record_from_doi } record = Record(obj.data.dumps()) try: identifiers = record.persistent_identifiers except Exception as e: # if anything goes wrong, assume we need to get it manually. eng.log.error("Problem with getting identifiers: %s\n%s" % (str(e), traceback.format_exc())) identifiers = [] obj.extra_data["persistent_ids"] = identifiers identifier_dict = {} for name, value in identifiers: value_dict = {} for dic in value: value_dict.update(dic) identifier_dict[name] = value_dict if "recid" in identifier_dict: # If there is a recid, we are good, right? obj.extra_data["persistent_ids"]["recid"] = identifier_dict["recid"] return True # So if there is no explicit recid key, then maybe we can find the record # using any of the other stable identifiers defined. found_recid = False for name, func in identifier_function_to_check.iteritems(): if name in identifier_dict: if name in identifier_dict[name]: # To get {"doi": {"doi": val}} found_recid = func(identifier_dict[name][name]) elif "value" in identifier_dict[name]: # To get {"doi": {"value": val}} found_recid = func(identifier_dict[name]["value"]) if found_recid: break if found_recid: obj.extra_data["persistent_ids"]["recid"] = found_recid return True return False
def setUp(self): from invenio.modules.records.api import Record self.record_good = Bibtex(Record.create(test_record, 'json')) self.record_bad = Bibtex(Record.create(test_bad_record, 'json')) self.record_empty = Bibtex({})
def upload_step(obj, eng): """Perform the upload step. :param obj: BibWorkflowObject to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str from invenio.modules.records.api import Record from invenio.legacy.bibsched.bibtask import task_low_level_submission repository = obj.extra_data.get("repository", {}) sequence_id = random.randrange(1, 60000) arguments = repository.get("arguments", {}) default_args = [] default_args.extend(['-I', str(sequence_id)]) if arguments.get('u_name', ""): default_args.extend(['-N', arguments.get('u_name', "")]) if arguments.get('u_priority', 5): default_args.extend(['-P', str(arguments.get('u_priority', 5))]) extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) filepath = extract_path + os.sep + str(obj.id) if "f" in repository.get("postprocess", []): # We have a filter. file_uploads = [ ("{0}.insert.xml".format(filepath), ["-i"]), ("{0}.append.xml".format(filepath), ["-a"]), ("{0}.correct.xml".format(filepath), ["-c"]), ("{0}.holdingpen.xml".format(filepath), ["-o"]), ] else: # We do not, so we get the data from the record marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc() file_fd = open(filepath, 'w') file_fd.write(marcxml_value) file_fd.close() file_uploads = [(filepath, ["-r", "-i"])] task_id = None for location, mode in file_uploads: if os.path.exists(location): try: args = mode + [filepath] + default_args task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) repo_id = repository.get("id") if repo_id: create_oaiharvest_log_str( task_id, repo_id, obj.get_data() ) except Exception as msg: eng.log.error( "An exception during submitting oaiharvest task occured : %s " % ( str(msg))) if task_id is None: eng.log.error("an error occurred while uploading %s from %s" % (filepath, repository.get("name", "Unknown"))) else: eng.log.info( "material harvested from source %s was successfully uploaded" % (repository.get("name", "Unknown"),)) eng.log.info("end of upload")
def test_json_for_form(self): from invenio.modules.records.api import Record r = Record.create({'title': 'Test'}, 'json') assert r.produce('json_for_form') == {'title': 'Test'} assert r.produce('json_for_marc') == [{'245__a': 'Test'}]
def get_description(bwo): """Get the description column part.""" record = bwo.get_data() from invenio.modules.records.api import Record try: identifiers = Record(record.dumps()).persistent_identifiers final_identifiers = [] for i in identifiers: final_identifiers.append(i['value']) except Exception: if hasattr(record, "get"): final_identifiers = [ record.get("system_number_external", {}).get("value", 'No ids') ] else: final_identifiers = [' No ids'] task_results = bwo.get_tasks_results() results = [] if 'bibclassify' in task_results: try: result = task_results['bibclassify'][0]['result'] fast_mode = result.get('fast_mode', False) result = result['dict']['complete_output'] result_string = "<strong></br>Bibclassify result:"\ "</br></strong>"\ "Number of Core keywords: \t%s</br>"\ "PACS: \t%s</br>"\ % (len(result['Core keywords']), len(result['Field codes'])) if fast_mode: result_string += "(This task run at fast mode"\ " taking into consideration"\ " only the title and the abstract)" results.append(result_string) except (KeyError, IndexError): pass categories = [] if hasattr(record, "get"): if 'subject' in record: lookup = ["subject", "term"] elif "subject_term": lookup = ["subject_term", "term"] else: lookup = None if lookup: primary, secondary = lookup category_list = record.get(primary, []) if isinstance(category_list, dict): category_list = [category_list] for subject in category_list: category = subject[secondary] if len(subject) == 2: if subject.keys()[1] == secondary: source_list = subject[subject.keys()[0]] else: source_list = subject[subject.keys()[1]] else: try: source_list = subject['source'] except KeyError: source_list = "" if source_list.lower() == 'inspire': categories.append(category) from flask import render_template return render_template('workflows/styles/harvesting_record.html', categories=categories, identifiers=final_identifiers, results=results)
def test_lossless_marc_import_export(self): from invenio.modules.records.api import Record r = Record.create(test_marc, master_format='marc').dumps() for k in test_record.keys(): self.assertEqual(test_record[k], r[k])