def run(self): """Make changes to the records that need them.""" checked_count, modified_count = 0, 0 self.logger.info("Starting search, check & do job", reason=self.__doc__) for chunk in chunker(self.search(), self.size): uuids = [r.meta.id for r in chunk] self.logger.info("Received record IDs from ES", num_records=len(uuids)) records = InspireRecord.get_records(uuids) self.logger.info("Fetched chunk of records from DB", num_records=len(records)) for record in records: state = {} logger = self.logger.bind(recid=record["control_number"]) checked_count += 1 record = InspireRecord.get_class_for_record(record)( record, model=record.model) if not self.check(record, logger=logger, state=state): logger.info("Not modifying record, check negative") continue modified_count += 1 logger.info("Modifying record, check positive") self.do(record, logger=logger, state=state) record.update(dict(record)) if self.commit_after_each_batch: db.session.commit() db.session.commit() self.logger.info( "Search, check & do job finished successfully.", num_records_checked=checked_count, num_records_modified=modified_count, )
"value": "GAMBIT" }, "long_name": "GAMBIT : Global And Modular BSM Inference Tool", "inspire_classification": ["Non-experimental|Simulation tools"], "description": "GAMBIT is a global fitting code for generic Beyond the Standard Model theories, designed to allow fast and easy definition of new models, observables, likelihoods, scanners and backend physics codes.", "legacy_name": "GAMBIT", "experiment": { "value": "GAMBIT", "short_name": "GAMBIT" }, "$schema": "https://inspirebeta.net/schemas/records/experiments.json", "_collections": ["Experiments"], } cls = InspireRecord.get_class_for_record(json_record) record = cls.create_or_update(json_record, disable_external_push=True, disable_relations_update=True) pid = PersistentIdentifier.query.filter_by(pid_value="1775082").one() assert record.id assert InspireRecord.get_record_by_pid_value("1775082", "exp") assert pid.status == PIDStatus.DELETED def test_creating_record_with_id_provided_properly_mints_identifiers( inspire_app): record_data = { "$schema": "https://inspirebeta.net/schemas/records/hep.json", "control_number": 1_234_567, "arxiv_eprints": [{
def migrate_record_from_mirror(prod_record, disable_external_push=True, disable_relations_update=True): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. Returns: dict: the migrated record metadata, which is also inserted into the database. """ logger = LOGGER.bind(recid=prod_record.recid) try: json_record = marcxml2record(prod_record.marcxml) except NotSupportedError as exc: logger.warning(str(exc), recid=prod_record.recid) prod_record.valid = True db.session.merge(prod_record) return except Exception as exc: logger.exception("Error converting from marcxml") prod_record.error = exc db.session.merge(prod_record) return if "$schema" in json_record: ensure_valid_schema(json_record) pid_type = PidStoreBase.get_pid_type_from_schema( json_record.get("$schema")) if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"): prod_record.error = Exception( f"Record: {prod_record.recid} has blacklisted pid_type: {pid_type} is blacklisted" ) db.session.merge(prod_record) return try: with db.session.begin_nested(): cls = InspireRecord.get_class_for_record(json_record) original_urls = replace_afs_file_locations_with_local(json_record) record = cls.create_or_update( json_record, disable_external_push=disable_external_push, disable_relations_update=disable_relations_update, ) cache_afs_file_locations(record) except ValidationError as exc: path = ".".join(exc.schema_path) logger.warn( "Migrator validator error", path=path, value=exc.instance, recid=prod_record.recid, ) prod_record.error = exc db.session.merge(prod_record) except DownloadFileError as exc: removed_cached_files = remove_cached_afs_file_locations(original_urls) if not removed_cached_files: logger.exception("DownloadFileError while migrate from mirror") prod_record.error = exc db.session.merge(prod_record) else: return migrate_record_from_mirror( prod_record=prod_record, disable_external_push=disable_external_push, disable_relations_update=disable_relations_update, ) except PIDValueError as exc: message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'" logger.exception("PIDValueError while migrate from mirror", msg=message) exc.args = (message, ) prod_record.error = exc db.session.merge(prod_record) except ThreadsTimeoutError: raise except Exception as exc: logger.exception("Error while migrating record into mirror") prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record
def migrate_record_from_mirror(prod_record, disable_orcid_push=True, disable_citation_update=True): """Migrate a mirrored legacy record into an Inspire record. Args: prod_record(LegacyRecordsMirror): the mirrored record to migrate. Returns: dict: the migrated record metadata, which is also inserted into the database. """ logger = LOGGER.bind(recid=prod_record.recid) try: json_record = marcxml2record(prod_record.marcxml) except Exception as exc: logger.exception("Error converting from marcxml") prod_record.error = exc db.session.merge(prod_record) return None if "$schema" in json_record: ensure_valid_schema(json_record) pid_type = PidStoreBase.get_pid_type_from_schema( json_record.get("$schema")) if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"): prod_record.error = Exception( f"Record: ${prod_record.recid} has blacklisted pid_type: ${pid_type} is blacklisted" ) db.session.merge(prod_record) return try: with db.session.begin_nested(): cls = InspireRecord.get_class_for_record(json_record) for deleted_record in cls.get_linked_records_from_dict_field( json_record, "deleted_records"): deleted_record.pidstore_handler( deleted_record.id, deleted_record).delete_external_pids() record = cls.create_or_update( json_record, disable_orcid_push=disable_orcid_push, disable_citation_update=disable_citation_update, ) except ValidationError as exc: path = ".".join(exc.schema_path) logger.warn( "Migrator validator error", path=path, value=exc.instance, recid=prod_record.recid, ) prod_record.error = exc db.session.merge(prod_record) except PIDValueError as exc: message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'" logger.exception("PIDValueError while migrate from mirror", msg=message) exc.args = (message, ) prod_record.error = exc db.session.merge(prod_record) except Exception as exc: logger.exception("Error while migrating record into mirror") prod_record.error = exc db.session.merge(prod_record) else: prod_record.valid = True db.session.merge(prod_record) return record