Exemplo n.º 1
0
 def get_linked_book(self, data):
     parent = get_parent_record(data)
     if parent and "titles" in parent and "control_number" in parent:
         endpoint = PidStoreBase.get_endpoint_from_pid_type(
             PidStoreBase.get_pid_type_from_schema(data["$schema"]))
         endpoint_item = f"invenio_records_rest.{endpoint}_item"
         ref = get_value(parent, "self.$ref") or url_for(
             endpoint_item,
             pid_value=parent["control_number"],
             _external=True)
         return {**parent["titles"][0], "record": {"$ref": ref}}
     return None
Exemplo n.º 2
0
def find_record_endpoint(pid, record_hit=None, **kwargs):
    """gets endpoint from pid type or from `$schema` if record_data is from search results,
    as all pid_types from search_result are `recid`.
    If both ways of resolving endpoint are not available gets it from pid_value - additional db query"""
    if pid.pid_type != "recid":
        return current_records_rest.default_endpoint_prefixes[pid.pid_type]
    elif record_hit and "$schema" in record_hit.get("_source", {}):
        return PidStoreBase.get_endpoint_from_pid_type(
            PidStoreBase.get_pid_type_from_schema(
                record_hit["_source"]["$schema"]))
    else:
        return PidStoreBase.get_endpoint_from_pid_type(
            PidStoreBase.get_pid_type_from_recid(pid.pid_value))
Exemplo n.º 3
0
    def get_facet_author_name(self, record):
        """Prepare record for ``facet_author_name`` field."""
        authors_with_record = list(
            InspireRecord.get_linked_records_from_dict_field(
                record, "authors.record"))
        found_authors_control_numbers = set([
            author["control_number"] for author in authors_with_record
            if author.get("control_number")
        ])
        authors_without_record = [
            author for author in record.get("authors", [])
            if "record" not in author or int(
                PidStoreBase.get_pid_from_record_uri(author["record"].get(
                    "$ref"))[1]) not in found_authors_control_numbers
        ]
        result = []

        for author in authors_with_record:
            result.append(get_facet_author_name_for_author(author))

        for author in authors_without_record:
            result.append("NOREC_{}".format(
                get_display_name_for_author_name(author["full_name"])))

        return result
Exemplo n.º 4
0
    def _get_linked_pids_from_field(cls, data, path):
        """Return a list of (pid_type, pid_value) tuples for all records referenced
        in the field at the given path

        Args:
            data (dict): data from which records should be extracted
            path (str): the path of the linked records (where $ref is located).
        Returns:
            list: tuples containing (pid_type, pid_value) of the linked records

        Examples:
            >>> data = {
                'references': [
                    {
                        'record': {
                            '$ref': 'http://localhost/literature/1'
                        }
                    }
                ]
            }
            >>>  record = InspireRecord(data)
            >>>  records = record.get_linked_pids_from_field("references.record")
            ('lit', 1)
        """
        full_path = ".".join([path, "$ref"])
        pids = [
            PidStoreBase.get_pid_from_record_uri(rec)
            for rec in flatten_list(get_value(data, full_path, []))
        ]
        return pids
Exemplo n.º 5
0
 def redirect_pids(self, pids):
     if current_app.config.get("FEATURE_FLAG_ENABLE_REDIRECTION_OF_PIDS"):
         for pid in pids:
             pid_type, pid_value = PidStoreBase.get_pid_from_record_uri(
                 pid["$ref"])
             self.redirect_pid(pid_type, pid_value)
         return pids
Exemplo n.º 6
0
def index_after_commit(sender, changes):
    """Index a record in ES after it was committed to the DB.

    This cannot happen in an ``after_record_commit`` receiver from Invenio-Records
    because, despite the name, at that point we are not yet sure whether the record
    has been really committed to the DB.
    """
    for model_instance, change in changes:
        if isinstance(model_instance, RecordMetadata):
            if change in ("insert", "update", "delete"):
                LOGGER.debug(f"Record commited",
                             change=change,
                             uuid=str(model_instance.id))
                pid_type = PidStoreBase.get_pid_type_from_schema(
                    model_instance.json.get("$schema"))
                delete = "delete" in changes
                arguments = InspireRecord.get_subclasses(
                )[pid_type]._record_index(model_instance.json,
                                          _id=str(model_instance.id),
                                          force_delete=delete)
                arguments["record_version"] = model_instance.version_id
                LOGGER.debug(
                    f"Record sending to index",
                    uuid=str(model_instance.id),
                    delete=delete,
                )
                current_celery_app.send_task(
                    "inspirehep.records.indexer.tasks.index_record",
                    kwargs=arguments)
            else:
                raise RuntimeError("Wrong operation `%s` on record %r", change,
                                   model_instance.id)
Exemplo n.º 7
0
 def get_class_for_record(cls, data):
     type_from_schema = PidStoreBase.get_pid_type_from_schema(data["$schema"])
     record_class = cls.get_subclasses().get(type_from_schema)
     if record_class is None:
         raise WrongRecordSubclass(
             f"Wrong subclass {cls} used for record of type {type_from_schema}"
         )
     return record_class
Exemplo n.º 8
0
def redirect_references_to_merged_record(self, uuid):
    record = InspireRecord.get_record(uuid, with_deleted=True)
    new_record_ref = record["new_record"]["$ref"]
    deleted_record_ref = record["self"]["$ref"]
    record_schema = PidStoreBase.get_schema_name_from_uri(record["$schema"])
    possible_refs_to_record = get_refs_to_schemas()[record_schema]
    update_references_pointing_to_merged_record(possible_refs_to_record,
                                                deleted_record_ref,
                                                new_record_ref)
Exemplo n.º 9
0
def test_get_pid_type_from_schema(
    mock_get_config_pid_types_to_endpointsck_get,
    mock_get_config_pid_types_to_schema,
    schema,
    expected,
):
    result = PidStoreBase.get_pid_type_from_schema(schema)

    assert expected == result
Exemplo n.º 10
0
def redirect_record(record_path):
    recid = record_path.split("/")[0]
    endpoint = PidStoreBase.get_endpoint_for_recid(recid)
    if not endpoint:
        abort(404)

    if endpoint == "data":
        return redirect(
            f"{current_app.config['LEGACY_BASE_URL']}/record/{recid}", 302)

    return redirect(f"/{endpoint}/{recid}", 301)
Exemplo n.º 11
0
    def get_record_data_from_es(record):
        """Queries Elastic Search for this record and returns it as dictionary

        Returns:
            dict:This record in a way it is represented in Elastic Search

        """
        endpoint = PidStoreBase._get_config_pid_types_to_endpoints()[record.pid_type]
        search_conf = current_app.config["RECORDS_REST_ENDPOINTS"][endpoint]
        search_class = search_conf["search_class"]()
        return search_class.get_source(record.id)
Exemplo n.º 12
0
def _create_record(data, save_to_file=False):
    control_number = data["control_number"]

    click.echo(f"Creating record {control_number}.")

    record = InspireRecord.create_or_update(data)

    db.session.commit()
    record.index(delay=False)
    message = (f"Record created uuid:{record.id} with "
               f"pid:{control_number} has been created.")
    click.echo(click.style(message, fg="green"))

    if save_to_file:
        pid_type = PidStoreBase.get_pid_type_from_schema(data["$schema"])
        endpoint = PidStoreBase.get_endpoint_from_pid_type(pid_type)
        file_path = os.path.join(
            f"data/records/{endpoint}/{control_number}.json")
        click.echo(click.style(f"Writing to {file_path}", fg="green"))
        with open(file_path, "w+") as file:
            file.write(orjson.dumps(data))
Exemplo n.º 13
0
 def delete_records_from_deleted_records(cls, data):
     # Hack for migrator in case new record takes pids from other records
     # which should be deleted but they are not deleted yet.
     for pid in data.get("deleted_records", []):
         pid_type, pid_value = PidStoreBase.get_pid_from_record_uri(pid["$ref"])
         try:
             record_to_delete = cls.get_record_by_pid_value(
                 pid_value, pid_type, original_record=True
             )
         except PIDDoesNotExistError:
             LOGGER.warning(
                 "This pid is missing while still is marked as deleted by another record.",
                 marked_by=data.get("control_number"),
                 marked_to_delete=(pid_type, pid_value),
             )
         else:
             record_to_delete.delete()
Exemplo n.º 14
0
    def resolve_conference_record_as_root(self, pub_info_item):
        conference_record = pub_info_item.get("conference_record")
        if conference_record is None:
            return {}

        _, recid = PidStoreBase.get_pid_from_record_uri(
            conference_record.get("$ref"))
        try:
            conference = InspireRecord.get_record_by_pid_value(pid_value=recid,
                                                               pid_type="con")
        except PIDDoesNotExistError:
            return {}

        titles = conference.get("titles")
        if not titles:
            return {}
        pub_info_item.update(conference)
        return pub_info_item
Exemplo n.º 15
0
def test_get_pid_type_from_endpoint(
        momock_get_config_pid_types_to_endpointsck_get, endpoint, expected):
    result = PidStoreBase.get_pid_type_from_endpoint(endpoint)

    assert expected == result
Exemplo n.º 16
0
 def _schema_type(self):
     return PidStoreBase.get_pid_type_from_schema(self["$schema"])
Exemplo n.º 17
0
def test_get_endpoint_from_pid_type(mock_get_config_pid_types_to_endpoints,
                                    pid_type, expected):
    result = PidStoreBase.get_endpoint_from_pid_type(pid_type)

    assert expected == result
Exemplo n.º 18
0
def test_get_pid_from_record_uri(url, expected):
    data_result = PidStoreBase.get_pid_from_record_uri(url)

    assert expected == data_result
Exemplo n.º 19
0
def test_get_config_for_schema(appctx):
    pids_to_endpoints = PidStoreBase._get_config_pid_types_to_schema()

    assert pids_to_endpoints is not None
Exemplo n.º 20
0
def migrate_record_from_mirror(prod_record,
                               disable_orcid_push=True,
                               disable_citation_update=True):
    """Migrate a mirrored legacy record into an Inspire record.
    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    logger = LOGGER.bind(recid=prod_record.recid)
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except Exception as exc:
        logger.exception("Error converting from marcxml")
        prod_record.error = exc
        db.session.merge(prod_record)
        return None

    if "$schema" in json_record:
        ensure_valid_schema(json_record)

        pid_type = PidStoreBase.get_pid_type_from_schema(
            json_record.get("$schema"))
        if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"):
            prod_record.error = Exception(
                f"Record: ${prod_record.recid} has blacklisted pid_type: ${pid_type} is blacklisted"
            )
            db.session.merge(prod_record)
            return

    try:
        with db.session.begin_nested():
            cls = InspireRecord.get_class_for_record(json_record)
            for deleted_record in cls.get_linked_records_from_dict_field(
                    json_record, "deleted_records"):
                deleted_record.pidstore_handler(
                    deleted_record.id, deleted_record).delete_external_pids()
            record = cls.create_or_update(
                json_record,
                disable_orcid_push=disable_orcid_push,
                disable_citation_update=disable_citation_update,
            )
    except ValidationError as exc:
        path = ".".join(exc.schema_path)
        logger.warn(
            "Migrator validator error",
            path=path,
            value=exc.instance,
            recid=prod_record.recid,
        )
        prod_record.error = exc
        db.session.merge(prod_record)
    except PIDValueError as exc:
        message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'"
        logger.exception("PIDValueError while migrate from mirror",
                         msg=message)
        exc.args = (message, )
        prod_record.error = exc
        db.session.merge(prod_record)
    except Exception as exc:
        logger.exception("Error while migrating record into mirror")
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
Exemplo n.º 21
0
def migrate_record_from_mirror(prod_record,
                               disable_external_push=True,
                               disable_relations_update=True):
    """Migrate a mirrored legacy record into an Inspire record.
    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    logger = LOGGER.bind(recid=prod_record.recid)
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except NotSupportedError as exc:
        logger.warning(str(exc), recid=prod_record.recid)
        prod_record.valid = True
        db.session.merge(prod_record)
        return
    except Exception as exc:
        logger.exception("Error converting from marcxml")
        prod_record.error = exc
        db.session.merge(prod_record)
        return

    if "$schema" in json_record:
        ensure_valid_schema(json_record)

        pid_type = PidStoreBase.get_pid_type_from_schema(
            json_record.get("$schema"))
        if pid_type in current_app.config.get("MIGRATION_PID_TYPE_BLACKLIST"):
            prod_record.error = Exception(
                f"Record: {prod_record.recid} has blacklisted pid_type: {pid_type} is blacklisted"
            )
            db.session.merge(prod_record)
            return

    try:
        with db.session.begin_nested():
            cls = InspireRecord.get_class_for_record(json_record)
            original_urls = replace_afs_file_locations_with_local(json_record)
            record = cls.create_or_update(
                json_record,
                disable_external_push=disable_external_push,
                disable_relations_update=disable_relations_update,
            )
            cache_afs_file_locations(record)
    except ValidationError as exc:
        path = ".".join(exc.schema_path)
        logger.warn(
            "Migrator validator error",
            path=path,
            value=exc.instance,
            recid=prod_record.recid,
        )
        prod_record.error = exc
        db.session.merge(prod_record)
    except DownloadFileError as exc:
        removed_cached_files = remove_cached_afs_file_locations(original_urls)
        if not removed_cached_files:
            logger.exception("DownloadFileError while migrate from mirror")
            prod_record.error = exc
            db.session.merge(prod_record)
        else:
            return migrate_record_from_mirror(
                prod_record=prod_record,
                disable_external_push=disable_external_push,
                disable_relations_update=disable_relations_update,
            )
    except PIDValueError as exc:
        message = f"pid_type:'{exc.pid_type}', pid_value:'{exc.pid_value}'"
        logger.exception("PIDValueError while migrate from mirror",
                         msg=message)
        exc.args = (message, )
        prod_record.error = exc
        db.session.merge(prod_record)
    except ThreadsTimeoutError:
        raise
    except Exception as exc:
        logger.exception("Error while migrating record into mirror")
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
Exemplo n.º 22
0
def get_ref_from_pid(pid_type, pid_value):
    """Return full $ref for record with pid_type and pid_value"""
    return get_record_ref(pid_value,
                          PidStoreBase.get_endpoint_from_pid_type(pid_type))
Exemplo n.º 23
0
def get_endpoint_from_schema(schema):
    pid_type = PidStoreBase.get_pid_type_from_schema(schema)
    return PidStoreBase.get_endpoint_from_pid_type(pid_type)