Exemplo n.º 1
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ("insert", "update", "delete"):
                LOGGER.debug(f"Record commited",
                             change=change,
                             uuid=str(model_instance.id))
                pid_type = PidStoreBase.get_pid_type_from_schema(
                    model_instance.json.get("$schema"))
                delete = "delete" in changes
                arguments = InspireRecord.get_subclasses(
                )[pid_type]._record_index(model_instance.json,
                                          _id=str(model_instance.id),
                                          force_delete=delete)
                arguments["record_version"] = model_instance.version_id
                LOGGER.debug(
                    f"Record sending to index",
                    uuid=str(model_instance.id),
                    delete=delete,
                )
                current_celery_app.send_task(
                    "inspirehep.records.indexer.tasks.index_record",
                    kwargs=arguments)
            else:
                raise RuntimeError("Wrong operation `%s` on record %r", change,
                                   model_instance.id)
Exemplo n.º 2
0
 def get_class_for_record(cls, data):
     type_from_schema = PidStoreBase.get_pid_type_from_schema(data["$schema"])
     record_class = cls.get_subclasses().get(type_from_schema)
     if record_class is None:
         raise WrongRecordSubclass(
             f"Wrong subclass {cls} used for record of type {type_from_schema}"
         )
     return record_class
Exemplo n.º 3
0
def test_get_pid_type_from_schema(
    mock_get_config_pid_types_to_endpointsck_get,
    mock_get_config_pid_types_to_schema,
    schema,
    expected,
):
    result = PidStoreBase.get_pid_type_from_schema(schema)

    assert expected == result
Exemplo n.º 4
0
 def get_linked_book(self, data):
     parent = get_parent_record(data)
     if parent and "titles" in parent and "control_number" in parent:
         endpoint = PidStoreBase.get_endpoint_from_pid_type(
             PidStoreBase.get_pid_type_from_schema(data["$schema"]))
         endpoint_item = f"invenio_records_rest.{endpoint}_item"
         ref = get_value(parent, "self.$ref") or url_for(
             endpoint_item,
             pid_value=parent["control_number"],
             _external=True)
         return {**parent["titles"][0], "record": {"$ref": ref}}
     return None
Exemplo n.º 5
0
def find_record_endpoint(pid, record_hit=None, **kwargs):
    """gets endpoint from pid type or from `$schema` if record_data is from search results,
    as all pid_types from search_result are `recid`.
    If both ways of resolving endpoint are not available gets it from pid_value - additional db query"""
    if pid.pid_type != "recid":
        return current_records_rest.default_endpoint_prefixes[pid.pid_type]
    elif record_hit and "$schema" in record_hit.get("_source", {}):
        return PidStoreBase.get_endpoint_from_pid_type(
            PidStoreBase.get_pid_type_from_schema(
                record_hit["_source"]["$schema"]))
    else:
        return PidStoreBase.get_endpoint_from_pid_type(
            PidStoreBase.get_pid_type_from_recid(pid.pid_value))
Exemplo n.º 6
0
def _create_record(data, save_to_file=False):
    control_number = data["control_number"]

    click.echo(f"Creating record {control_number}.")

    record = InspireRecord.create_or_update(data)

    db.session.commit()
    record.index(delay=False)
    message = (f"Record created uuid:{record.id} with "
               f"pid:{control_number} has been created.")
    click.echo(click.style(message, fg="green"))

    if save_to_file:
        pid_type = PidStoreBase.get_pid_type_from_schema(data["$schema"])
        endpoint = PidStoreBase.get_endpoint_from_pid_type(pid_type)
        file_path = os.path.join(
            f"data/records/{endpoint}/{control_number}.json")
        click.echo(click.style(f"Writing to {file_path}", fg="green"))
        with open(file_path, "w+") as file:
            file.write(orjson.dumps(data))
Exemplo n.º 7
0
def migrate_record_from_mirror(prod_record,
                               disable_external_push=True,
                               disable_relations_update=True):
    """Migrate a mirrored legacy record into an Inspire record.
    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    logger = LOGGER.bind(recid=prod_record.recid)
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except NotSupportedError as exc:
        logger.warning(str(exc), recid=prod_record.recid)
        prod_record.valid = True
        db.session.merge(prod_record)
        return
    except Exception as exc:
        logger.exception("Error converting from marcxml")
        prod_record.error = exc
        db.session.merge(prod_record)
        return

    if "$schema" in json_record:
        ensure_valid_schema(json_record)

        pid_type = PidStoreBase.get_pid_type_from_schema(
            json_record.get("$schema"))
        if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"):
            prod_record.error = Exception(
                f"Record: {prod_record.recid} has blacklisted pid_type: {pid_type} is blacklisted"
            )
            db.session.merge(prod_record)
            return

    try:
        with db.session.begin_nested():
            cls = InspireRecord.get_class_for_record(json_record)
            original_urls = replace_afs_file_locations_with_local(json_record)
            record = cls.create_or_update(
                json_record,
                disable_external_push=disable_external_push,
                disable_relations_update=disable_relations_update,
            )
            cache_afs_file_locations(record)
    except ValidationError as exc:
        path = ".".join(exc.schema_path)
        logger.warn(
            "Migrator validator error",
            path=path,
            value=exc.instance,
            recid=prod_record.recid,
        )
        prod_record.error = exc
        db.session.merge(prod_record)
    except DownloadFileError as exc:
        removed_cached_files = remove_cached_afs_file_locations(original_urls)
        if not removed_cached_files:
            logger.exception("DownloadFileError while migrate from mirror")
            prod_record.error = exc
            db.session.merge(prod_record)
        else:
            return migrate_record_from_mirror(
                prod_record=prod_record,
                disable_external_push=disable_external_push,
                disable_relations_update=disable_relations_update,
            )
    except PIDValueError as exc:
        message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'"
        logger.exception("PIDValueError while migrate from mirror",
                         msg=message)
        exc.args = (message, )
        prod_record.error = exc
        db.session.merge(prod_record)
    except ThreadsTimeoutError:
        raise
    except Exception as exc:
        logger.exception("Error while migrating record into mirror")
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
Exemplo n.º 8
0
 def _schema_type(self):
     return PidStoreBase.get_pid_type_from_schema(self["$schema"])
Exemplo n.º 9
0
def migrate_record_from_mirror(prod_record,
                               disable_orcid_push=True,
                               disable_citation_update=True):
    """Migrate a mirrored legacy record into an Inspire record.
    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    logger = LOGGER.bind(recid=prod_record.recid)
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except Exception as exc:
        logger.exception("Error converting from marcxml")
        prod_record.error = exc
        db.session.merge(prod_record)
        return None

    if "$schema" in json_record:
        ensure_valid_schema(json_record)

        pid_type = PidStoreBase.get_pid_type_from_schema(
            json_record.get("$schema"))
        if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"):
            prod_record.error = Exception(
                f"Record: ${prod_record.recid} has blacklisted pid_type: ${pid_type} is blacklisted"
            )
            db.session.merge(prod_record)
            return

    try:
        with db.session.begin_nested():
            cls = InspireRecord.get_class_for_record(json_record)
            for deleted_record in cls.get_linked_records_from_dict_field(
                    json_record, "deleted_records"):
                deleted_record.pidstore_handler(
                    deleted_record.id, deleted_record).delete_external_pids()
            record = cls.create_or_update(
                json_record,
                disable_orcid_push=disable_orcid_push,
                disable_citation_update=disable_citation_update,
            )
    except ValidationError as exc:
        path = ".".join(exc.schema_path)
        logger.warn(
            "Migrator validator error",
            path=path,
            value=exc.instance,
            recid=prod_record.recid,
        )
        prod_record.error = exc
        db.session.merge(prod_record)
    except PIDValueError as exc:
        message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'"
        logger.exception("PIDValueError while migrate from mirror",
                         msg=message)
        exc.args = (message, )
        prod_record.error = exc
        db.session.merge(prod_record)
    except Exception as exc:
        logger.exception("Error while migrating record into mirror")
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
Exemplo n.º 10
0
def get_endpoint_from_schema(schema):
    pid_type = PidStoreBase.get_pid_type_from_schema(schema)
    return PidStoreBase.get_endpoint_from_pid_type(pid_type)