def make_record(values, is_dump=True): """ Export recjson from drafts """ if is_dump: record = Record(json=values, master_format='marc') else: record = Record(master_format='marc') for k, v in six.iteritems(values): record[k] = v return record
def test_marc_export(self): from invenio.modules.records.api import Record from invenio.legacy.bibrecord import create_record, record_xml_output rec = Record(json=test_record, master_format='marc') # Needed to properly set authors when generating MARC first = rec['authors'][0] additional = rec['authors'][1:] rec['_first_author'] = first rec['_additional_authors'] = additional output_marc = record_xml_output( create_record(rec.legacy_export_as_marc())[0] ) try: self.assertEqual(test_marc, output_marc) except AssertionError: # Print diff in case of errors. import difflib diff = "".join(difflib.unified_diff( test_marc.splitlines(1), output_marc.splitlines(1) )) raise AssertionError(diff) form_json = rec.produce('json_for_form') for k, v in test_form_json.items(): self.assertEqual(form_json[k], test_form_json[k])
def test_json_for_ld(self): from invenio.modules.records.api import Record r = Record.create({'title': 'Test'}, 'json') import copy r = Record(json=copy.copy(test_record), master_format='marc') r.produce('json_for_ld')
def patch_get_record(self, get_record_patch): from invenio.modules.records.api import Record r = Record(json={ self.app.config['PIDSTORE_DATACITE_RECORD_DOI_FIELD']: '10.1234/invenio.1234', 'recid': 1, }, master_format='json') get_record_patch.return_value = r
def create_records_for_workflow(records, **kwargs): """Create the record object from the json. :param records: List of records to be process. :kwargs: """ from invenio.modules.records.api import Record for i, obj in enumerate(records): records[i] = (obj[0], Record(json=obj[1]))
def formatter(bwo, **kwargs): """Return a formatted version of the data.""" from invenio.modules.formatter.engine import format_record data = bwo.get_data() if not data: return '' formatter = kwargs.get("formatter", None) format = kwargs.get("format", None) if formatter: # A seperate formatter is supplied return formatter(data) from invenio.modules.records.api import Record if isinstance(data, collections.Mapping): # Dicts are cool on its own, but maybe its SmartJson (record) try: data = Record(data.dumps()).legacy_export_as_marc() except (TypeError, KeyError): # Maybe not, submission? return data if isinstance(data, string_types): # Its a string type, lets try to convert if format: # We can try formatter! # If already XML, format_record does not like it. if format != 'xm': try: return format_record(recID=None, of=format, xml_record=data) except TypeError: # Wrong kind of type pass else: # So, XML then from xml.dom.minidom import parseString try: pretty_data = parseString(data) return pretty_data.toprettyxml() except TypeError: # Probably not proper XML string then return "Data cannot be parsed: %s" % (data, ) except Exception: # Some other parsing error pass # Just return raw string return data if isinstance(data, set): return list(data) # Not any of the above types. How juicy! return data
def formatter(bwo, **kwargs): """Nicely format the record.""" from pprint import pformat from invenio.modules.records.api import Record data = bwo.get_data() if not data: return '' formatter = kwargs.get("formatter", None) of = kwargs.get("of", None) if formatter: # A separate formatter is supplied return formatter(data) if isinstance(data, collections.Mapping): # Dicts are cool on its own, but maybe its SmartJson (record) try: data = Record(data.dumps()).legacy_export_as_marc() except (TypeError, KeyError): pass if isinstance(data, string_types): # We can try formatter! # If already XML, format_record does not like it. if of and of != 'xm': try: from invenio.modules.formatter import format_record formatted_data = format_record(recID=None, of=of, xml_record=data) except TypeError: # Wrong kind of type pass else: # So, XML then from xml.dom.minidom import parseString try: unpretty_data = parseString(data) formatted_data = unpretty_data.toprettyxml() except TypeError: # Probably not proper XML string then return "Data cannot be parsed: %s" % (data, ) except Exception: # Just return raw string pass if not formatted_data: formatted_data = data if isinstance(formatted_data, dict): formatted_data = pformat(formatted_data) return formatted_data
def test_json_for_form(self): from invenio.modules.records.api import Record r = Record.create({'title': 'Test'}, 'json') assert r.produce('json_for_form')['title'] == 'Test' assert {'245__a': 'Test'} in r.produce('json_for_marc') import copy r = Record(json=copy.copy(test_record), master_format='marc') form_json = r.produce('json_for_form') for k, v in test_form_json.items(): self.assertEqual(form_json[k], test_form_json[k])
def get_mocked_record(): from invenio.modules.records.api import Record if RecordMock.record is None: RecordMock.record = Record( json={ 'doi': '10.1234/invenio.1234', 'files_to_upload': [ # replace with cfg['files_var_name'] ('path1.xls', 'this/is/a/long/path/to/the/file/location/path1.xls'), ('path2.csv', 'path2.csv'), ('path3.pdf', 'path3.pdf'), ], 'recid': 1, # '_files': [ # replace with cfg['files_var_name'] # 'path1', # 'path2', # 'path3'] }, master_format='marc' ) return RecordMock.record
def get_description(bwo): """Get the description (identifiers and categories) from the object data.""" from invenio.modules.records.api import Record from flask import render_template, current_app record = bwo.get_data() final_identifiers = {} try: identifiers = Record(record.dumps()).persistent_identifiers for values in identifiers.values(): final_identifiers.extend([i.get("value") for i in values]) except Exception: current_app.logger.exception("Could not get identifiers") if hasattr(record, "get"): final_identifiers = [ record.get("system_control_number", {}).get("value", 'No ids') ] else: final_identifiers = [] categories = [] if hasattr(record, "get"): if 'subject' in record: lookup = ["subject", "term"] elif "subject_term" in record: lookup = ["subject_term", "term"] else: lookup = None if lookup: primary, secondary = lookup category_list = record.get(primary, []) if isinstance(category_list, dict): category_list = [category_list] categories = [subject[secondary] for subject in category_list] return render_template('workflows/styles/harvesting_record.html', categories=categories, identifiers=final_identifiers)
def filter_step(obj, eng): """Run an external python script.""" from invenio.modules.records.api import Record from invenio.utils.shell import run_shell_command repository = obj.extra_data.get("repository", {}) arguments = repository.get("arguments", {}) script_name = arguments.get("f_filter-file") if script_name: marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc() extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) # Now we launch BibUpload tasks for the final MARCXML files marcxmlfile = extract_path + os.sep + str(obj.id) file_fd = open(marcxmlfile, 'w') file_fd.write(marcxml_value) file_fd.close() exitcode, cmd_stdout, cmd_stderr = run_shell_command( cmd="%s '%s'", args=(str(script_name), str(marcxmlfile))) if exitcode != 0 or cmd_stderr != "": obj.log.error( "Error while running filtering script on %s\nError:%s" % (marcxmlfile, cmd_stderr) ) else: obj.log.info(cmd_stdout) else: obj.log.error("No script file found!")
def get_description(bwo): """Get the description column part.""" record = bwo.get_data() from invenio.modules.records.api import Record try: identifiers = Record(record.dumps()).persistent_identifiers final_identifiers = [] for i in identifiers: final_identifiers.append(i['value']) except Exception: if hasattr(record, "get"): final_identifiers = [ record.get("system_number_external", {}).get("value", 'No ids') ] else: final_identifiers = [' No ids'] task_results = bwo.get_tasks_results() results = [] if 'bibclassify' in task_results: try: result = task_results['bibclassify'][0]['result'] fast_mode = result.get('fast_mode', False) result = result['dict']['complete_output'] result_string = "<strong></br>Bibclassify result:"\ "</br></strong>"\ "Number of Core keywords: \t%s</br>"\ "PACS: \t%s</br>"\ % (len(result['Core keywords']), len(result['Field codes'])) if fast_mode: result_string += "(This task run at fast mode"\ " taking into consideration"\ " only the title and the abstract)" results.append(result_string) except (KeyError, IndexError): pass categories = [] if hasattr(record, "get"): if 'subject' in record: lookup = ["subject", "term"] elif "subject_term": lookup = ["subject_term", "term"] else: lookup = None if lookup: primary, secondary = lookup category_list = record.get(primary, []) if isinstance(category_list, dict): category_list = [category_list] for subject in category_list: category = subject[secondary] if len(subject) == 2: if subject.keys()[1] == secondary: source_list = subject[subject.keys()[0]] else: source_list = subject[subject.keys()[1]] else: try: source_list = subject['source'] except KeyError: source_list = "" if source_list.lower() == 'inspire': categories.append(category) from flask import render_template return render_template('workflows/styles/harvesting_record.html', categories=categories, identifiers=final_identifiers, results=results)
def quick_match_record(obj, eng): """Retrieve the record Id from a record. Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID or DOI tag. opt_mod is the desired mode. 001 fields even in the insert mode :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.bibupload.engine import (find_record_from_recid, find_record_from_sysno, find_records_from_extoaiid, find_record_from_oaiid, find_record_from_doi) from invenio.modules.records.api import Record identifier_function_to_check = { 'recid': find_record_from_recid, 'system_number': find_record_from_sysno, 'oaiid': find_record_from_oaiid, 'system_control_number': find_records_from_extoaiid, 'doi': find_record_from_doi } record = Record(obj.data.dumps()) try: identifiers = record.persistent_identifiers except Exception as e: # if anything goes wrong, assume we need to get it manually. eng.log.error("Problem with getting identifiers: %s\n%s" % (str(e), traceback.format_exc())) identifiers = [] obj.extra_data["persistent_ids"] = identifiers identifier_dict = {} for name, value in identifiers: value_dict = {} for dic in value: value_dict.update(dic) identifier_dict[name] = value_dict if "recid" in identifier_dict: # If there is a recid, we are good, right? obj.extra_data["persistent_ids"]["recid"] = identifier_dict["recid"] return True # So if there is no explicit recid key, then maybe we can find the record # using any of the other stable identifiers defined. found_recid = False for name, func in identifier_function_to_check.iteritems(): if name in identifier_dict: if name in identifier_dict[name]: # To get {"doi": {"doi": val}} found_recid = func(identifier_dict[name][name]) elif "value" in identifier_dict[name]: # To get {"doi": {"value": val}} found_recid = func(identifier_dict[name]["value"]) if found_recid: break if found_recid: obj.extra_data["persistent_ids"]["recid"] = found_recid return True return False
def upload_step(obj, eng): """Perform the upload step. :param obj: BibWorkflowObject to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.oaiharvest.dblayer import create_oaiharvest_log_str from invenio.modules.records.api import Record from invenio.legacy.bibsched.bibtask import task_low_level_submission repository = obj.extra_data.get("repository", {}) sequence_id = random.randrange(1, 60000) arguments = repository.get("arguments", {}) default_args = [] default_args.extend(['-I', str(sequence_id)]) if arguments.get('u_name', ""): default_args.extend(['-N', arguments.get('u_name', "")]) if arguments.get('u_priority', 5): default_args.extend(['-P', str(arguments.get('u_priority', 5))]) extract_path = os.path.join( cfg['CFG_TMPSHAREDDIR'], str(eng.uuid) ) if not os.path.exists(extract_path): os.makedirs(extract_path) filepath = extract_path + os.sep + str(obj.id) if "f" in repository.get("postprocess", []): # We have a filter. file_uploads = [ ("{0}.insert.xml".format(filepath), ["-i"]), ("{0}.append.xml".format(filepath), ["-a"]), ("{0}.correct.xml".format(filepath), ["-c"]), ("{0}.holdingpen.xml".format(filepath), ["-o"]), ] else: # We do not, so we get the data from the record marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc() file_fd = open(filepath, 'w') file_fd.write(marcxml_value) file_fd.close() file_uploads = [(filepath, ["-r", "-i"])] task_id = None for location, mode in file_uploads: if os.path.exists(location): try: args = mode + [filepath] + default_args task_id = task_low_level_submission("bibupload", "oaiharvest", *tuple(args)) repo_id = repository.get("id") if repo_id: create_oaiharvest_log_str( task_id, repo_id, obj.get_data() ) except Exception as msg: eng.log.error( "An exception during submitting oaiharvest task occured : %s " % ( str(msg))) if task_id is None: eng.log.error("an error occurred while uploading %s from %s" % (filepath, repository.get("name", "Unknown"))) else: eng.log.info( "material harvested from source %s was successfully uploaded" % (repository.get("name", "Unknown"),)) eng.log.info("end of upload")