Python ElasticsearchDumper示例，invenio_records.dumpers.ElasticsearchDumper Python示例

示例#1

0

显示文件

def test_esdumper_without_model(testapp, db, example_data):
    """Test the Elasticsearch dumper."""
    # Dump without a model.
    dump = Record(example_data).dumps(dumper=ElasticsearchDumper())
    for k in ['uuid', 'version_id', 'created', 'updated']:
        assert dump[k] is None  # keys is set to none without a model
    # Load without a model defined
    record = Record.loads(dump, loader=ElasticsearchDumper())
    assert record.model is None  # model will not be set
    assert record == example_data  # data is equivalent to initial data

示例#2

0

显示文件

def test_locationsdumper_with_polygon_and_shapely(app, db, minimal_record):
    pytest.importorskip('shapely')

    dumper = ElasticsearchDumper(extensions=[LocationsDumper()])

    # This also tests shapes with elevations
    minimal_record['locations'] = {
        'features': [{
            'geometry': {
                'type':
                'Polygon',
                'coordinates': [[
                    [100.0, 0.0, 10],
                    [101.0, 0.0, 10],
                    [101.0, 1.0, 30],
                    [100.0, 1.0, 30],
                    [100.0, 0.0, 10],
                ]]
            }
        }],
    }

    record = RDMRecord.create(minimal_record)

    dump = record.dumps(dumper=dumper)

    # 3D geometries still lead to 2D centroids
    assert dump['locations']['features'][0]['centroid'] == [100.5, 0.5]

示例#3

0

显示文件

class CommonFieldsMixin:
    """Common system fields between records and drafts."""

    versions_model_cls = models.RDMVersionsState
    parent_record_cls = RDMParent

    schema = ConstantField(
       '$schema', 'local://records/record-v2.0.0.json')

    dumper = ElasticsearchDumper(
        extensions=[
            EDTFDumperExt('metadata.publication_date'),
            EDTFListDumperExt("metadata.dates", "date"),
            RelationDumperExt('relations'),
        ]
    )

    relations = RelationsField(
        languages=PIDListRelation(
            'metadata.languages',
            attrs=['id', 'title'],
            pid_field=Vocabulary.pid.with_type_ctx('languages')
        ),
    )

    bucket_id = ModelField(dump=False)

    bucket = ModelField(dump=False)

    access = RecordAccessField()

    is_published = PIDStatusCheckField(status=PIDStatus.REGISTERED, dump=True)

    pids = DictField("pids")

示例#4

0

显示文件

文件： api.py 项目： chokribr/invenio-rdm-records

class BibliographicRecord(Record):
    """Bibliographic Record API."""

    model_cls = models.RecordMetadata

    index = IndexField('rdmrecords-records-record-v1.0.0',
                       search_alias='rdmrecords-records')

    dumper = ElasticsearchDumper(extensions=[
        EDTFDumperExt('metadata.publication_date'),
        RelationDumperExt('relations'),
    ])

    relations = RelationsField(languages=PIDListRelation(
        'metadata.languages', attrs=['metadata'], pid_field=Language.pid), )

    files = FilesField(
        store=False,
        file_cls=RecordFile,
        # Don't create
        create=False,
        # Don't delete, we'll manage in the service
        delete=False,
    )
    bucket_id = ModelField(dump=False)
    bucket = ModelField(dump=False)

示例#5

0

显示文件

def test_locationsdumper_with_no_featurecollection(app, db, minimal_record):
    dumper = ElasticsearchDumper(extensions=[LocationsDumper()])

    record = RDMRecord.create(minimal_record)

    # Dump it
    dump = record.dumps(dumper=dumper)

示例#6

0

显示文件

def test_locationsdumper_with_polygon_and_mock_shapely(app, db,
                                                       minimal_record):
    with unittest.mock.patch(
            'invenio_rdm_records.records.dumpers.locations.shapely'
    ) as shapely:
        dumper = ElasticsearchDumper(extensions=[LocationsDumper()])

        minimal_record['metadata']['locations'] = {
            'features': [{
                'geometry': {
                    'type':
                    'Polygon',
                    'coordinates': [[
                        [100.0, 0.0],
                        [101.0, 0.0],
                        [101.0, 1.0],
                        [100.0, 1.0],
                        [100.0, 0.0],
                    ]]
                }
            }],
        }

        record = RDMRecord.create(minimal_record)

        shape = unittest.mock.Mock()
        shape.centroid.x, shape.centroid.y = 100.5, 0.5
        shapely.geometry.shape.return_value = shape

        dump = record.dumps(dumper=dumper)

        shapely.geometry.shape.assert_called_once_with(
            minimal_record['metadata']['locations']['features'][0]['geometry'])
        assert dump['metadata']['locations']['features'][0]['centroid'] == \
            [100.5, 0.5]

示例#7

0

显示文件

def test_locationsdumper_with_polygon_and_no_shapely(app, db, minimal_record):
    dumper = ElasticsearchDumper(extensions=[LocationsDumper()])

    minimal_record['metadata']['locations'] = {
        'features': [{
            'geometry': {
                'type':
                'Polygon',
                'coordinates': [[
                    [100.0, 0.0],
                    [101.0, 0.0],
                    [101.0, 1.0],
                    [100.0, 1.0],
                    [100.0, 0.0],
                ]]
            }
        }],
    }

    record = RDMRecord.create(minimal_record)

    with pytest.warns(UserWarning):
        dump = record.dumps(dumper=dumper)

    assert 'centroid' not in dump['metadata']['locations']['features'][0]

示例#8

0

显示文件

文件： api.py 项目： astrowq/invenio-rdm-records

class CommonFieldsMixin:
    """Common system fields between records and drafts."""

    versions_model_cls = models.RDMVersionsState
    parent_record_cls = RDMParent

    schema = ConstantField(
       '$schema', 'http://localhost/schemas/records/record-v2.0.0.json')

    dumper = ElasticsearchDumper(
        extensions=[
            EDTFDumperExt('metadata.publication_date'),
            EDTFListDumperExt("metadata.dates", "date"),
            RelationDumperExt('relations'),
        ]
    )

    relations = RelationsField(
        languages=PIDListRelation(
            'metadata.languages',
            attrs=['id', 'title'],
            pid_field=Vocabulary.pid.with_type_ctx('languages')
        ),
    )

    bucket_id = ModelField(dump=False)

    bucket = ModelField(dump=False)

    access = RecordAccessField()

    # We redefine the property as we extend the `PIDStatusCheckField` to dump
    # the property in ES in order to be available for aggregation
    is_published = IsPublishedField(status=PIDStatus.REGISTERED)

示例#9

0

显示文件

文件： test_edtf_dumpers.py 项目： astrowq/invenio-rdm-records

def test_eslistdumper_with_edtfext_parse_error(app, db, minimal_record):
    """Test edft extension implementation."""
    dumper = ElasticsearchDumper(
        extensions=[
            EDTFListDumperExt("metadata.creators", "family_name"),
        ]
    )

    # Create the record
    record = RDMRecord.create(minimal_record, parent=RDMParent.create({}))
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    person_or_org = dump["metadata"]["creators"][0]["person_or_org"]
    assert "family_name_range" not in person_or_org
    assert "family_name" in person_or_org

    # Load it
    new_record = RDMRecord.loads(dump, loader=dumper)
    person_or_org = dump["metadata"]["creators"][0]["person_or_org"]
    assert 'family_name_range' not in person_or_org
    assert 'family_name' in person_or_org
    assert 'type_start' not in new_record['metadata']['resource_type']
    assert 'type_end' not in new_record['metadata']['resource_type']
    assert 'type' in new_record['metadata']['resource_type']

示例#10

0

显示文件

def test_esdumper_with_edtfext(app, db, minimal_record, location, date,
                               expected_start, expected_end):
    """Test edft extension implementation."""
    # Create a simple extension that adds a computed field.

    dumper = ElasticsearchDumper(extensions=[
        EDTFDumperExt("metadata.publication_date"),
        EDTFListDumperExt("metadata.dates", "date"),
    ])

    minimal_record["metadata"]["publication_date"] = date
    minimal_record["metadata"]["dates"] = [{"date": date}]

    # Create the record
    record = RDMRecord.create(minimal_record, parent=RDMParent.create({}))
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert dump["metadata"]["publication_date_range"]["gte"] == expected_start
    assert dump["metadata"]["publication_date_range"]["lte"] == expected_end
    assert dump["metadata"]["publication_date"] == date
    assert dump["metadata"]["dates"][0]["date_range"]["gte"] == expected_start
    assert dump["metadata"]["dates"][0]["date_range"]["lte"] == expected_end
    assert dump["metadata"]["dates"][0]["date"] == date

    # Load it
    new_record = RDMRecord.loads(dump, loader=dumper)
    assert "publication_date_range" not in new_record["metadata"]
    assert "publication_date" in new_record["metadata"]
    assert "date_range" not in new_record["metadata"]["dates"][0]
    assert "date" in new_record["metadata"]["dates"][0]

示例#11

0

显示文件

class Vocabulary(RecordBase):
    """Example record API."""

    # Configuration
    model_cls = VocabularyMetadata

    dumper = ElasticsearchDumper(
        extensions=[VocabularyTypeElasticsearchDumperExt()]
    )

    # System fields
    schema = ConstantField(
        "$schema",
        "https://localhost/schemas/vocabularies/vocabulary-v1.0.0.json",
    )

    index = IndexField(
        "vocabularies-vocabulary-v1.0.0", search_alias="vocabularies"
    )

    # TODO: This should be changed to use something else than the recidv2
    pid = PIDField("id", provider=RecordIdProviderV2)

    vocabulary_type_id = ModelField()

    vocabulary_type = VocabularyTypeField(dump=False)

示例#12

0

显示文件

def test_esdumper_with_edtfext(app, db, minimal_record, location, date,
                               expected_start, expected_end):
    """Test edft extension implementation."""
    # Create a simple extension that adds a computed field.

    dumper = ElasticsearchDumper(
        extensions=[EDTFDumperExt('metadata.publication_date')])

    minimal_record['metadata']['publication_date'] = date

    # Create the record
    record = BibliographicRecord.create(minimal_record)
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert dump['metadata']['publication_date_start'] == expected_start
    assert dump['metadata']['publication_date_end'] == expected_end
    assert dump['metadata']['publication_date'] == date

    # Load it
    new_record = BibliographicRecord.loads(dump, loader=dumper)
    assert 'publication_date_start' not in new_record['metadata']
    assert 'publication_date_end' not in new_record['metadata']
    assert 'publication_date' in new_record['metadata']

示例#13

0

显示文件

def test_locationsdumper_with_point_geometry(app, db, minimal_record):
    dumper = ElasticsearchDumper(extensions=[LocationsDumper()])

    minimal_record['metadata']['locations'] = {
        'features': [{
            'geometry': {
                'type': 'Point',
                'coordinates': [6.052778, 46.234167]
            }
        }]
    }

    record = RDMRecord.create(minimal_record)

    # Dump it
    dump = record.dumps(dumper=dumper)

    # Centroid has been inferred
    dumped_feature = dump['metadata']['locations']['features'][0]
    expected_feature = minimal_record['metadata']['locations']['features'][0]
    assert (dumped_feature['centroid'] == expected_feature['geometry']
            ['coordinates'])

    # And it round-trips
    assert (record.loads(dump, loader=dumper)['metadata']['locations'] ==
            minimal_record['metadata']['locations'])

示例#14

0

显示文件

    class RecordWithRelations(Record):
        relations = RelationsField(
            language=PKRelation(
                key='language', attrs=['iso'], record_cls=Record),
            languages=PKListRelation(
                key='languages', attrs=['iso'], record_cls=Record),
        )

        dumper = ElasticsearchDumper(
            extensions=[RelationDumperExt('relations')])

示例#15

0

显示文件

文件： api.py 项目： slint/invenio-rdm-records

class BibliographicDraft(Draft):
    """Bibliographic draft API."""

    model_cls = DraftMetadata

    index = IndexField('rdmrecords-drafts-draft-v1.0.0',
                       search_alias='rdmrecords-drafts')

    dumper = ElasticsearchDumper(
        extensions=[EDTFDumperExt('metadata.publication_date')])

示例#16

0

显示文件

文件： api.py 项目： slint/invenio-rdm-records

class BibliographicRecord(Record):
    """Bibliographic Record API."""

    model_cls = RecordMetadata

    index = IndexField('rdmrecords-records-record-v1.0.0',
                       search_alias='rdmrecords-records')

    dumper = ElasticsearchDumper(
        extensions=[EDTFDumperExt('metadata.publication_date')])

示例#17

0

显示文件

 def create_record_class(self):
     """Create record class."""
     record_class_attributes = {
         "model_cls": self.model_cls,
         "schema": ConstantField("$schema", self.schema_path),
         "index": IndexField(self.index_name),
         "pid": PIDField("id", provider=RecordIdProviderV2),
         "dumper": self.record_dumper or ElasticsearchDumper(),
     }
     self.record_cls = type(self.record_type_name, (Record, ),
                            record_class_attributes)

示例#18

0

显示文件

def test_esdumper_with_model(testapp, db, example_data):
    """Test the Elasticsearch dumper."""
    # Create a record
    record = Record.create(example_data)
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=ElasticsearchDumper())
    assert dump['uuid'] == str(record.id)
    assert dump['version_id'] == record.revision_id + 1
    assert dump['created'][:19] == record.created.isoformat()[:19]
    assert dump['updated'][:19] == record.updated.isoformat()[:19]

    # Load it
    new_record = Record.loads(dump, loader=ElasticsearchDumper())
    assert new_record == record
    assert new_record.id == record.id
    assert new_record.revision_id == record.revision_id
    assert new_record.created == record.created
    assert new_record.updated == record.updated
    assert new_record.model.json == record.model.json

示例#19

0

显示文件

class FileRecord(RecordBase, SystemFieldsMixin):
    """Base class for a record describing a file."""
    @classmethod
    def get_by_key(cls, record_id, key):
        """Get a record file by record ID and filename/key."""
        with db.session.no_autoflush:
            obj = cls.model_cls.query.filter(cls.record_id == record_id,
                                             cls.key == key).one_or_none()
            if obj:
                return cls(obj.data, model=obj)

    @classmethod
    def list_by_record(cls, record_id):
        """List all record files by record ID."""
        for obj in cls.model_cls.query.filter(
                cls.model_cls.record_id == record_id):
            yield cls(obj.data, model=obj)

    @property
    def file(self):
        """File wrapper object."""
        if self.object_version:
            return File(object_model=self.object_version)

    @property
    def record(self):
        """Get the file's record."""
        return self.record_cls(self._record.data, model=self._record)

    send_signals = False
    enable_jsonref = False

    #: Default model class used by the record API (specify in subclass).
    model_cls = None

    #: Record API class.
    record_cls = None

    #: Default dumper (which happens to also be used for indexing).
    dumper = ElasticsearchDumper()

    #: Metadata system field.
    metadata = DictField(clear_none=True, create_if_missing=True)

    key = ModelField()
    object_version_id = ModelField()
    object_version = ModelField(dump=False)
    record_id = ModelField()
    _record = ModelField('record', dump=False)

    def __repr__(self, ):
        """Represenation string for the record file."""
        return f"<{type(self).__name__}({self.key}, {self.metadata})"

示例#20

0

显示文件

文件： factory.py 项目： fenekku/invenio-resources

    def create_record_class(self):
        """Create record class."""
        pid_field = self.pid_field_cls("id", **self.pid_field_kwargs)

        record_class_attributes = {
            "model_cls": self.model_cls,
            "schema": ConstantField("$schema", self.schema_path),
            "index": IndexField(self.index_name),
            "pid": pid_field,
            "dumper": self.record_dumper or ElasticsearchDumper(),
        }
        self.record_cls = type(self.record_type_name, (Record, ),
                               record_class_attributes)

示例#21

0

显示文件

class RDMParent(ParentRecordBase):
    """Example parent record."""

    # Configuration
    model_cls = models.RDMParentMetadata

    dumper = ElasticsearchDumper(extensions=[
        GrantTokensDumperExt("access.grant_tokens"),
    ])

    # System fields
    schema = ConstantField('$schema', 'local://records/parent-v1.0.0.json')

    access = ParentRecordAccessField()

示例#22

0

显示文件

def test_esdumper_sa_datatypes(testapp, database):
    """Test to determine the data type of an SQLAlchemy field."""
    db = database

    class Model(db.Model, RecordMetadataBase):
        string = db.Column(db.String(255))
        text = db.Column(db.Text)
        biginteger = db.Column(db.BigInteger)
        integer = db.Column(db.Integer)
        boolean = db.Column(db.Boolean(name='boolean'))
        text_variant = db.Column(db.Text().with_variant(
            mysql.VARCHAR(255), 'mysql'))

    assert ElasticsearchDumper._sa_type(Model, 'biginteger') == int
    assert ElasticsearchDumper._sa_type(Model, 'boolean') == bool
    assert ElasticsearchDumper._sa_type(Model, 'created') == datetime
    assert ElasticsearchDumper._sa_type(Model, 'id') == UUID
    assert ElasticsearchDumper._sa_type(Model, 'integer') == int
    assert ElasticsearchDumper._sa_type(Model, 'json') == dict
    assert ElasticsearchDumper._sa_type(Model, 'text_variant') == str
    assert ElasticsearchDumper._sa_type(Model, 'text') == str
    assert ElasticsearchDumper._sa_type(Model, 'updated') == datetime
    assert ElasticsearchDumper._sa_type(Model, 'invalid') is None

示例#23

0

显示文件

class Record(RecordBase):
    """Example bibliographic record API."""

    model_cls = models.RecordMetadata
    schema = ConstantField('$schema', 'local://records/record-v1.0.0.json')
    index = IndexField('records-record-v1.0.0', search_alias='records')
    pid = PIDField('id', provider=RecordIdProviderV2)

    # Definitions of relationships from a bibliographic record to the
    # generic vocabularies.
    relations = RelationsField(languages=PIDListRelation(
        'metadata.languages',
        keys=['id', 'title'],
        pid_field=Vocabulary.pid.with_type_ctx('languages')), )

    dumper = ElasticsearchDumper(extensions=[
        RelationDumperExt('relations'),
    ])

示例#24

0

显示文件

class Record(RecordBase, SystemFieldsMixin):
    """Base class for record APIs.

    Subclass this record, and specify as minimum the ``model_cls`` class-level
    attribute.
    """

    #: Disable signals - we use record extensions instead (more precise).
    send_signals = False

    #: Disable JSONRef replacement (due to complexity of configuration).
    enable_jsonref = False

    #: Default model class used by the record API (specify in subclass).
    model_cls = None

    #: Default dumper (which happens to also be used for indexing).
    dumper = ElasticsearchDumper()

    #: Metadata system field.
    metadata = DictField(clear_none=True, create_if_missing=True)

示例#25

0

显示文件

def test_eslistdumper_with_edtfext_not_defined(app, db, minimal_record):
    """Test edft extension implementation."""
    # Create a simple extension that adds a computed field.

    dumper = ElasticsearchDumper(extensions=[
        EDTFListDumperExt("metadata.non_existing_array_field", "date"),
    ])

    # Create the record
    record = RDMRecord.create(minimal_record, parent=RDMParent.create({}))
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert "non_existing_array_field_range" not in dump["metadata"]
    assert "non_existing_array_field" not in dump["metadata"]

    # Load it
    new_record = RDMRecord.loads(dump, loader=dumper)
    assert "non_existing_array_field_range" not in new_record["metadata"]
    assert "non_existing_array_field" not in new_record["metadata"]

示例#26

0

显示文件

def test_esdumper_with_edtfext_parse_error(app, db, location, minimal_record):
    """Test edft extension implementation."""
    # NOTE: We cannot trigger this on publication_date because it is checked
    # by marshmallow on record creation. We can simply give a non date field.
    dumper = ElasticsearchDumper(extensions=[
        EDTFDumperExt("metadata.resource_type.type"),
    ])

    # Create the record
    record = RDMRecord.create(minimal_record, parent=RDMParent.create({}))
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert "type_range" not in dump["metadata"]["resource_type"]
    assert "type" in dump["metadata"]["resource_type"]

    # Load it
    new_record = RDMRecord.loads(dump, loader=dumper)
    assert "type_range" not in new_record["metadata"]["resource_type"]
    assert "type" in new_record["metadata"]["resource_type"]

示例#27

0

显示文件

def test_grant_tokens_dumper(app, db, minimal_record, location):
    """Test grant token dumper extension implementation."""
    dumper = ElasticsearchDumper(
        extensions=[GrantTokensDumperExt("access.grant_tokens")])

    data = {
        "access": {
            "grants": [
                {
                    "subject": "user",
                    "id": "1",
                    "level": "view"
                },
                {
                    "subject": "user",
                    "id": "2",
                    "level": "manage"
                },
            ]
        }
    }

    # Create the parent record
    parent = RDMParent.create(data)
    parent.commit()
    db.session.commit()

    grant1 = parent.access.grants[0]
    grant2 = parent.access.grants[1]

    # Dump it
    dump = parent.dumps(dumper=dumper)
    assert len(dump["access"]["grant_tokens"]) == 2
    assert grant1.to_token() in dump["access"]["grant_tokens"]
    assert grant2.to_token() in dump["access"]["grant_tokens"]

    # Load it
    new_record = RDMParent.loads(dump, loader=dumper)
    assert "grant_tokens" not in new_record["access"]
    assert "grant_tokens" not in new_record["access"]

示例#28

0

显示文件

def test_esdumper_with_edtfext_parse_error(app, db, location, minimal_record):
    """Test edft extension implementation."""
    # NOTE: We cannot trigger this on publication_date because it is checked
    # by marshmallow on record creation. We can simply give a non date field.
    dumper = ElasticsearchDumper(
        extensions=[EDTFDumperExt('metadata.resource_type.type')])

    # Create the record
    record = BibliographicRecord.create(minimal_record)
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert 'type_start' not in dump['metadata']['resource_type']
    assert 'type_end' not in dump['metadata']['resource_type']
    assert 'type' in dump['metadata']['resource_type']

    # Load it
    new_record = BibliographicRecord.loads(dump, loader=dumper)
    assert 'type_start' not in new_record['metadata']['resource_type']
    assert 'type_end' not in new_record['metadata']['resource_type']
    assert 'type' in new_record['metadata']['resource_type']

示例#29

0

显示文件

def test_esdumper_with_edtfext_not_defined(app, db, location, minimal_record):
    """Test edft extension implementation."""
    # Create a simple extension that adds a computed field.

    dumper = ElasticsearchDumper(
        extensions=[EDTFDumperExt('metadata.non_existing_field')])

    # Create the record
    record = BibliographicRecord.create(minimal_record)
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert 'non_existing_field_start' not in dump['metadata']
    assert 'non_existing_field_end' not in dump['metadata']
    assert 'non_existing_field' not in dump['metadata']

    # Load it
    new_record = BibliographicRecord.loads(dump, loader=dumper)
    assert 'non_existing_field_start' not in new_record['metadata']
    assert 'non_existing_field_end' not in new_record['metadata']
    assert 'non_existing_field' not in new_record['metadata']

示例#30

0

显示文件

def test_esdumper_with_extensions(testapp, db, example_data):
    """Test extensions implementation."""
    # Create a simple extension that adds a computed field.
    class TestExt(ElasticsearchDumperExt):
        def dump(self, record, data):
            data['count'] = len(data['mylist'])

        def load(self, data, record_cls):
            data.pop('count')

    dumper = ElasticsearchDumper(extensions=[TestExt()])

    # Create the record
    record = Record.create({'mylist': ['a', 'b']})
    db.session.commit()

    # Dump it
    dump = record.dumps(dumper=dumper)
    assert dump['count'] == 2

    # Load it
    new_record = Record.loads(dump, loader=dumper)
    assert 'count' not in new_record