def index_after_commit(sender, changes): """Index a record in ES after it was committed to the DB. This cannot happen in an ``after_record_commit`` receiver from Invenio-Records because, despite the name, at that point we are not yet sure whether the record has been really committed to the DB. """ for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ("insert", "update", "delete"): LOGGER.debug(f"Record commited", change=change, uuid=str(model_instance.id)) pid_type = PidStoreBase.get_pid_type_from_schema( model_instance.json.get("$schema")) delete = "delete" in changes arguments = InspireRecord.get_subclasses( )[pid_type]._record_index(model_instance.json, _id=str(model_instance.id), force_delete=delete) arguments["record_version"] = model_instance.version_id LOGGER.debug( f"Record sending to index", uuid=str(model_instance.id), delete=delete, ) current_celery_app.send_task( "inspirehep.records.indexer.tasks.index_record", kwargs=arguments) else: raise RuntimeError("Wrong operation `%s` on record %r", change, model_instance.id)
def get_class_for_record(cls, data): type_from_schema = PidStoreBase.get_pid_type_from_schema(data["$schema"]) record_class = cls.get_subclasses().get(type_from_schema) if record_class is None: raise WrongRecordSubclass( f"Wrong subclass {cls} used for record of type {type_from_schema}" ) return record_class
def test_get_pid_type_from_schema( mock_get_config_pid_types_to_endpointsck_get, mock_get_config_pid_types_to_schema, schema, expected, ): result = PidStoreBase.get_pid_type_from_schema(schema) assert expected == result
def get_linked_book(self, data): parent = get_parent_record(data) if parent and "titles" in parent and "control_number" in parent: endpoint = PidStoreBase.get_endpoint_from_pid_type( PidStoreBase.get_pid_type_from_schema(data["$schema"])) endpoint_item = f"invenio_records_rest.{endpoint}_item" ref = get_value(parent, "self.$ref") or url_for( endpoint_item, pid_value=parent["control_number"], _external=True) return {**parent["titles"][0], "record": {"$ref": ref}} return None
def find_record_endpoint(pid, record_hit=None, **kwargs): """gets endpoint from pid type or from `$schema` if record_data is from search results, as all pid_types from search_result are `recid`. If both ways of resolving endpoint are not available gets it from pid_value - additional db query""" if pid.pid_type != "recid": return current_records_rest.default_endpoint_prefixes[pid.pid_type] elif record_hit and "$schema" in record_hit.get("_source", {}): return PidStoreBase.get_endpoint_from_pid_type( PidStoreBase.get_pid_type_from_schema( record_hit["_source"]["$schema"])) else: return PidStoreBase.get_endpoint_from_pid_type( PidStoreBase.get_pid_type_from_recid(pid.pid_value))
def _create_record(data, save_to_file=False): control_number = data["control_number"] click.echo(f"Creating record {control_number}.") record = InspireRecord.create_or_update(data) db.session.commit() record.index(delay=False) message = (f"Record created uuid:{record.id} with " f"pid:{control_number} has been created.") click.echo(click.style(message, fg="green")) if save_to_file: pid_type = PidStoreBase.get_pid_type_from_schema(data["$schema"]) endpoint = PidStoreBase.get_endpoint_from_pid_type(pid_type) file_path = os.path.join( f"data/records/{endpoint}/{control_number}.json") click.echo(click.style(f"Writing to {file_path}", fg="green")) with open(file_path, "w+") as file: file.write(orjson.dumps(data))
def migrate_record_from_mirror(prod_record, disable_external_push=True, disable_relations_update=True): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. Returns: dict: the migrated record metadata, which is also inserted into the database. """ logger = LOGGER.bind(recid=prod_record.recid) try: json_record = marcxml2record(prod_record.marcxml) except NotSupportedError as exc: logger.warning(str(exc), recid=prod_record.recid) prod_record.valid = True db.session.merge(prod_record) return except Exception as exc: logger.exception("Error converting from marcxml") prod_record.error = exc db.session.merge(prod_record) return if "$schema" in json_record: ensure_valid_schema(json_record) pid_type = PidStoreBase.get_pid_type_from_schema( json_record.get("$schema")) if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"): prod_record.error = Exception( f"Record: {prod_record.recid} has blacklisted pid_type: {pid_type} is blacklisted" ) db.session.merge(prod_record) return try: with db.session.begin_nested(): cls = InspireRecord.get_class_for_record(json_record) original_urls = replace_afs_file_locations_with_local(json_record) record = cls.create_or_update( json_record, disable_external_push=disable_external_push, disable_relations_update=disable_relations_update, ) cache_afs_file_locations(record) except ValidationError as exc: path = ".".join(exc.schema_path) logger.warn( "Migrator validator error", path=path, value=exc.instance, recid=prod_record.recid, ) prod_record.error = exc db.session.merge(prod_record) except DownloadFileError as exc: removed_cached_files = remove_cached_afs_file_locations(original_urls) if not removed_cached_files: logger.exception("DownloadFileError while migrate from mirror") prod_record.error = exc db.session.merge(prod_record) else: return migrate_record_from_mirror( prod_record=prod_record, disable_external_push=disable_external_push, disable_relations_update=disable_relations_update, ) except PIDValueError as exc: message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'" logger.exception("PIDValueError while migrate from mirror", msg=message) exc.args = (message, ) prod_record.error = exc db.session.merge(prod_record) except ThreadsTimeoutError: raise except Exception as exc: logger.exception("Error while migrating record into mirror") prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def _schema_type(self): return PidStoreBase.get_pid_type_from_schema(self["$schema"])
def migrate_record_from_mirror(prod_record, disable_orcid_push=True, disable_citation_update=True): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. Returns: dict: the migrated record metadata, which is also inserted into the database. """ logger = LOGGER.bind(recid=prod_record.recid) try: json_record = marcxml2record(prod_record.marcxml) except Exception as exc: logger.exception("Error converting from marcxml") prod_record.error = exc db.session.merge(prod_record) return None if "$schema" in json_record: ensure_valid_schema(json_record) pid_type = PidStoreBase.get_pid_type_from_schema( json_record.get("$schema")) if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"): prod_record.error = Exception( f"Record: ${prod_record.recid} has blacklisted pid_type: ${pid_type} is blacklisted" ) db.session.merge(prod_record) return try: with db.session.begin_nested(): cls = InspireRecord.get_class_for_record(json_record) for deleted_record in cls.get_linked_records_from_dict_field( json_record, "deleted_records"): deleted_record.pidstore_handler( deleted_record.id, deleted_record).delete_external_pids() record = cls.create_or_update( json_record, disable_orcid_push=disable_orcid_push, disable_citation_update=disable_citation_update, ) except ValidationError as exc: path = ".".join(exc.schema_path) logger.warn( "Migrator validator error", path=path, value=exc.instance, recid=prod_record.recid, ) prod_record.error = exc db.session.merge(prod_record) except PIDValueError as exc: message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'" logger.exception("PIDValueError while migrate from mirror", msg=message) exc.args = (message, ) prod_record.error = exc db.session.merge(prod_record) except Exception as exc: logger.exception("Error while migrating record into mirror") prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def get_endpoint_from_schema(schema): pid_type = PidStoreBase.get_pid_type_from_schema(schema) return PidStoreBase.get_endpoint_from_pid_type(pid_type)