def test_esdumper_without_model(testapp, db, example_data): """Test the Elasticsearch dumper.""" # Dump without a model. dump = Record(example_data).dumps(dumper=ElasticsearchDumper()) for k in ['uuid', 'version_id', 'created', 'updated']: assert dump[k] is None # keys is set to none without a model # Load without a model defined record = Record.loads(dump, loader=ElasticsearchDumper()) assert record.model is None # model will not be set assert record == example_data # data is equivalent to initial data
def test_locationsdumper_with_polygon_and_shapely(app, db, minimal_record): pytest.importorskip('shapely') dumper = ElasticsearchDumper(extensions=[LocationsDumper()]) # This also tests shapes with elevations minimal_record['locations'] = { 'features': [{ 'geometry': { 'type': 'Polygon', 'coordinates': [[ [100.0, 0.0, 10], [101.0, 0.0, 10], [101.0, 1.0, 30], [100.0, 1.0, 30], [100.0, 0.0, 10], ]] } }], } record = RDMRecord.create(minimal_record) dump = record.dumps(dumper=dumper) # 3D geometries still lead to 2D centroids assert dump['locations']['features'][0]['centroid'] == [100.5, 0.5]
class CommonFieldsMixin: """Common system fields between records and drafts.""" versions_model_cls = models.RDMVersionsState parent_record_cls = RDMParent schema = ConstantField( '$schema', 'local://records/record-v2.0.0.json') dumper = ElasticsearchDumper( extensions=[ EDTFDumperExt('metadata.publication_date'), EDTFListDumperExt("metadata.dates", "date"), RelationDumperExt('relations'), ] ) relations = RelationsField( languages=PIDListRelation( 'metadata.languages', attrs=['id', 'title'], pid_field=Vocabulary.pid.with_type_ctx('languages') ), ) bucket_id = ModelField(dump=False) bucket = ModelField(dump=False) access = RecordAccessField() is_published = PIDStatusCheckField(status=PIDStatus.REGISTERED, dump=True) pids = DictField("pids")
class BibliographicRecord(Record): """Bibliographic Record API.""" model_cls = models.RecordMetadata index = IndexField('rdmrecords-records-record-v1.0.0', search_alias='rdmrecords-records') dumper = ElasticsearchDumper(extensions=[ EDTFDumperExt('metadata.publication_date'), RelationDumperExt('relations'), ]) relations = RelationsField(languages=PIDListRelation( 'metadata.languages', attrs=['metadata'], pid_field=Language.pid), ) files = FilesField( store=False, file_cls=RecordFile, # Don't create create=False, # Don't delete, we'll manage in the service delete=False, ) bucket_id = ModelField(dump=False) bucket = ModelField(dump=False)
def test_locationsdumper_with_no_featurecollection(app, db, minimal_record): dumper = ElasticsearchDumper(extensions=[LocationsDumper()]) record = RDMRecord.create(minimal_record) # Dump it dump = record.dumps(dumper=dumper)
def test_locationsdumper_with_polygon_and_mock_shapely(app, db, minimal_record): with unittest.mock.patch( 'invenio_rdm_records.records.dumpers.locations.shapely' ) as shapely: dumper = ElasticsearchDumper(extensions=[LocationsDumper()]) minimal_record['metadata']['locations'] = { 'features': [{ 'geometry': { 'type': 'Polygon', 'coordinates': [[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0], [100.0, 1.0], [100.0, 0.0], ]] } }], } record = RDMRecord.create(minimal_record) shape = unittest.mock.Mock() shape.centroid.x, shape.centroid.y = 100.5, 0.5 shapely.geometry.shape.return_value = shape dump = record.dumps(dumper=dumper) shapely.geometry.shape.assert_called_once_with( minimal_record['metadata']['locations']['features'][0]['geometry']) assert dump['metadata']['locations']['features'][0]['centroid'] == \ [100.5, 0.5]
def test_locationsdumper_with_polygon_and_no_shapely(app, db, minimal_record): dumper = ElasticsearchDumper(extensions=[LocationsDumper()]) minimal_record['metadata']['locations'] = { 'features': [{ 'geometry': { 'type': 'Polygon', 'coordinates': [[ [100.0, 0.0], [101.0, 0.0], [101.0, 1.0], [100.0, 1.0], [100.0, 0.0], ]] } }], } record = RDMRecord.create(minimal_record) with pytest.warns(UserWarning): dump = record.dumps(dumper=dumper) assert 'centroid' not in dump['metadata']['locations']['features'][0]
class CommonFieldsMixin: """Common system fields between records and drafts.""" versions_model_cls = models.RDMVersionsState parent_record_cls = RDMParent schema = ConstantField( '$schema', 'http://localhost/schemas/records/record-v2.0.0.json') dumper = ElasticsearchDumper( extensions=[ EDTFDumperExt('metadata.publication_date'), EDTFListDumperExt("metadata.dates", "date"), RelationDumperExt('relations'), ] ) relations = RelationsField( languages=PIDListRelation( 'metadata.languages', attrs=['id', 'title'], pid_field=Vocabulary.pid.with_type_ctx('languages') ), ) bucket_id = ModelField(dump=False) bucket = ModelField(dump=False) access = RecordAccessField() # We redefine the property as we extend the `PIDStatusCheckField` to dump # the property in ES in order to be available for aggregation is_published = IsPublishedField(status=PIDStatus.REGISTERED)
def test_eslistdumper_with_edtfext_parse_error(app, db, minimal_record): """Test edft extension implementation.""" dumper = ElasticsearchDumper( extensions=[ EDTFListDumperExt("metadata.creators", "family_name"), ] ) # Create the record record = RDMRecord.create(minimal_record, parent=RDMParent.create({})) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) person_or_org = dump["metadata"]["creators"][0]["person_or_org"] assert "family_name_range" not in person_or_org assert "family_name" in person_or_org # Load it new_record = RDMRecord.loads(dump, loader=dumper) person_or_org = dump["metadata"]["creators"][0]["person_or_org"] assert 'family_name_range' not in person_or_org assert 'family_name' in person_or_org assert 'type_start' not in new_record['metadata']['resource_type'] assert 'type_end' not in new_record['metadata']['resource_type'] assert 'type' in new_record['metadata']['resource_type']
def test_esdumper_with_edtfext(app, db, minimal_record, location, date, expected_start, expected_end): """Test edft extension implementation.""" # Create a simple extension that adds a computed field. dumper = ElasticsearchDumper(extensions=[ EDTFDumperExt("metadata.publication_date"), EDTFListDumperExt("metadata.dates", "date"), ]) minimal_record["metadata"]["publication_date"] = date minimal_record["metadata"]["dates"] = [{"date": date}] # Create the record record = RDMRecord.create(minimal_record, parent=RDMParent.create({})) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert dump["metadata"]["publication_date_range"]["gte"] == expected_start assert dump["metadata"]["publication_date_range"]["lte"] == expected_end assert dump["metadata"]["publication_date"] == date assert dump["metadata"]["dates"][0]["date_range"]["gte"] == expected_start assert dump["metadata"]["dates"][0]["date_range"]["lte"] == expected_end assert dump["metadata"]["dates"][0]["date"] == date # Load it new_record = RDMRecord.loads(dump, loader=dumper) assert "publication_date_range" not in new_record["metadata"] assert "publication_date" in new_record["metadata"] assert "date_range" not in new_record["metadata"]["dates"][0] assert "date" in new_record["metadata"]["dates"][0]
class Vocabulary(RecordBase): """Example record API.""" # Configuration model_cls = VocabularyMetadata dumper = ElasticsearchDumper( extensions=[VocabularyTypeElasticsearchDumperExt()] ) # System fields schema = ConstantField( "$schema", "https://localhost/schemas/vocabularies/vocabulary-v1.0.0.json", ) index = IndexField( "vocabularies-vocabulary-v1.0.0", search_alias="vocabularies" ) # TODO: This should be changed to use something else than the recidv2 pid = PIDField("id", provider=RecordIdProviderV2) vocabulary_type_id = ModelField() vocabulary_type = VocabularyTypeField(dump=False)
def test_esdumper_with_edtfext(app, db, minimal_record, location, date, expected_start, expected_end): """Test edft extension implementation.""" # Create a simple extension that adds a computed field. dumper = ElasticsearchDumper( extensions=[EDTFDumperExt('metadata.publication_date')]) minimal_record['metadata']['publication_date'] = date # Create the record record = BibliographicRecord.create(minimal_record) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert dump['metadata']['publication_date_start'] == expected_start assert dump['metadata']['publication_date_end'] == expected_end assert dump['metadata']['publication_date'] == date # Load it new_record = BibliographicRecord.loads(dump, loader=dumper) assert 'publication_date_start' not in new_record['metadata'] assert 'publication_date_end' not in new_record['metadata'] assert 'publication_date' in new_record['metadata']
def test_locationsdumper_with_point_geometry(app, db, minimal_record): dumper = ElasticsearchDumper(extensions=[LocationsDumper()]) minimal_record['metadata']['locations'] = { 'features': [{ 'geometry': { 'type': 'Point', 'coordinates': [6.052778, 46.234167] } }] } record = RDMRecord.create(minimal_record) # Dump it dump = record.dumps(dumper=dumper) # Centroid has been inferred dumped_feature = dump['metadata']['locations']['features'][0] expected_feature = minimal_record['metadata']['locations']['features'][0] assert (dumped_feature['centroid'] == expected_feature['geometry'] ['coordinates']) # And it round-trips assert (record.loads(dump, loader=dumper)['metadata']['locations'] == minimal_record['metadata']['locations'])
class RecordWithRelations(Record): relations = RelationsField( language=PKRelation( key='language', attrs=['iso'], record_cls=Record), languages=PKListRelation( key='languages', attrs=['iso'], record_cls=Record), ) dumper = ElasticsearchDumper( extensions=[RelationDumperExt('relations')])
class BibliographicDraft(Draft): """Bibliographic draft API.""" model_cls = DraftMetadata index = IndexField('rdmrecords-drafts-draft-v1.0.0', search_alias='rdmrecords-drafts') dumper = ElasticsearchDumper( extensions=[EDTFDumperExt('metadata.publication_date')])
class BibliographicRecord(Record): """Bibliographic Record API.""" model_cls = RecordMetadata index = IndexField('rdmrecords-records-record-v1.0.0', search_alias='rdmrecords-records') dumper = ElasticsearchDumper( extensions=[EDTFDumperExt('metadata.publication_date')])
def create_record_class(self): """Create record class.""" record_class_attributes = { "model_cls": self.model_cls, "schema": ConstantField("$schema", self.schema_path), "index": IndexField(self.index_name), "pid": PIDField("id", provider=RecordIdProviderV2), "dumper": self.record_dumper or ElasticsearchDumper(), } self.record_cls = type(self.record_type_name, (Record, ), record_class_attributes)
def test_esdumper_with_model(testapp, db, example_data): """Test the Elasticsearch dumper.""" # Create a record record = Record.create(example_data) db.session.commit() # Dump it dump = record.dumps(dumper=ElasticsearchDumper()) assert dump['uuid'] == str(record.id) assert dump['version_id'] == record.revision_id + 1 assert dump['created'][:19] == record.created.isoformat()[:19] assert dump['updated'][:19] == record.updated.isoformat()[:19] # Load it new_record = Record.loads(dump, loader=ElasticsearchDumper()) assert new_record == record assert new_record.id == record.id assert new_record.revision_id == record.revision_id assert new_record.created == record.created assert new_record.updated == record.updated assert new_record.model.json == record.model.json
class FileRecord(RecordBase, SystemFieldsMixin): """Base class for a record describing a file.""" @classmethod def get_by_key(cls, record_id, key): """Get a record file by record ID and filename/key.""" with db.session.no_autoflush: obj = cls.model_cls.query.filter(cls.record_id == record_id, cls.key == key).one_or_none() if obj: return cls(obj.data, model=obj) @classmethod def list_by_record(cls, record_id): """List all record files by record ID.""" for obj in cls.model_cls.query.filter( cls.model_cls.record_id == record_id): yield cls(obj.data, model=obj) @property def file(self): """File wrapper object.""" if self.object_version: return File(object_model=self.object_version) @property def record(self): """Get the file's record.""" return self.record_cls(self._record.data, model=self._record) send_signals = False enable_jsonref = False #: Default model class used by the record API (specify in subclass). model_cls = None #: Record API class. record_cls = None #: Default dumper (which happens to also be used for indexing). dumper = ElasticsearchDumper() #: Metadata system field. metadata = DictField(clear_none=True, create_if_missing=True) key = ModelField() object_version_id = ModelField() object_version = ModelField(dump=False) record_id = ModelField() _record = ModelField('record', dump=False) def __repr__(self, ): """Represenation string for the record file.""" return f"<{type(self).__name__}({self.key}, {self.metadata})"
def create_record_class(self): """Create record class.""" pid_field = self.pid_field_cls("id", **self.pid_field_kwargs) record_class_attributes = { "model_cls": self.model_cls, "schema": ConstantField("$schema", self.schema_path), "index": IndexField(self.index_name), "pid": pid_field, "dumper": self.record_dumper or ElasticsearchDumper(), } self.record_cls = type(self.record_type_name, (Record, ), record_class_attributes)
class RDMParent(ParentRecordBase): """Example parent record.""" # Configuration model_cls = models.RDMParentMetadata dumper = ElasticsearchDumper(extensions=[ GrantTokensDumperExt("access.grant_tokens"), ]) # System fields schema = ConstantField('$schema', 'local://records/parent-v1.0.0.json') access = ParentRecordAccessField()
def test_esdumper_sa_datatypes(testapp, database): """Test to determine the data type of an SQLAlchemy field.""" db = database class Model(db.Model, RecordMetadataBase): string = db.Column(db.String(255)) text = db.Column(db.Text) biginteger = db.Column(db.BigInteger) integer = db.Column(db.Integer) boolean = db.Column(db.Boolean(name='boolean')) text_variant = db.Column(db.Text().with_variant( mysql.VARCHAR(255), 'mysql')) assert ElasticsearchDumper._sa_type(Model, 'biginteger') == int assert ElasticsearchDumper._sa_type(Model, 'boolean') == bool assert ElasticsearchDumper._sa_type(Model, 'created') == datetime assert ElasticsearchDumper._sa_type(Model, 'id') == UUID assert ElasticsearchDumper._sa_type(Model, 'integer') == int assert ElasticsearchDumper._sa_type(Model, 'json') == dict assert ElasticsearchDumper._sa_type(Model, 'text_variant') == str assert ElasticsearchDumper._sa_type(Model, 'text') == str assert ElasticsearchDumper._sa_type(Model, 'updated') == datetime assert ElasticsearchDumper._sa_type(Model, 'invalid') is None
class Record(RecordBase): """Example bibliographic record API.""" model_cls = models.RecordMetadata schema = ConstantField('$schema', 'local://records/record-v1.0.0.json') index = IndexField('records-record-v1.0.0', search_alias='records') pid = PIDField('id', provider=RecordIdProviderV2) # Definitions of relationships from a bibliographic record to the # generic vocabularies. relations = RelationsField(languages=PIDListRelation( 'metadata.languages', keys=['id', 'title'], pid_field=Vocabulary.pid.with_type_ctx('languages')), ) dumper = ElasticsearchDumper(extensions=[ RelationDumperExt('relations'), ])
class Record(RecordBase, SystemFieldsMixin): """Base class for record APIs. Subclass this record, and specify as minimum the ``model_cls`` class-level attribute. """ #: Disable signals - we use record extensions instead (more precise). send_signals = False #: Disable JSONRef replacement (due to complexity of configuration). enable_jsonref = False #: Default model class used by the record API (specify in subclass). model_cls = None #: Default dumper (which happens to also be used for indexing). dumper = ElasticsearchDumper() #: Metadata system field. metadata = DictField(clear_none=True, create_if_missing=True)
def test_eslistdumper_with_edtfext_not_defined(app, db, minimal_record): """Test edft extension implementation.""" # Create a simple extension that adds a computed field. dumper = ElasticsearchDumper(extensions=[ EDTFListDumperExt("metadata.non_existing_array_field", "date"), ]) # Create the record record = RDMRecord.create(minimal_record, parent=RDMParent.create({})) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert "non_existing_array_field_range" not in dump["metadata"] assert "non_existing_array_field" not in dump["metadata"] # Load it new_record = RDMRecord.loads(dump, loader=dumper) assert "non_existing_array_field_range" not in new_record["metadata"] assert "non_existing_array_field" not in new_record["metadata"]
def test_esdumper_with_edtfext_parse_error(app, db, location, minimal_record): """Test edft extension implementation.""" # NOTE: We cannot trigger this on publication_date because it is checked # by marshmallow on record creation. We can simply give a non date field. dumper = ElasticsearchDumper(extensions=[ EDTFDumperExt("metadata.resource_type.type"), ]) # Create the record record = RDMRecord.create(minimal_record, parent=RDMParent.create({})) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert "type_range" not in dump["metadata"]["resource_type"] assert "type" in dump["metadata"]["resource_type"] # Load it new_record = RDMRecord.loads(dump, loader=dumper) assert "type_range" not in new_record["metadata"]["resource_type"] assert "type" in new_record["metadata"]["resource_type"]
def test_grant_tokens_dumper(app, db, minimal_record, location): """Test grant token dumper extension implementation.""" dumper = ElasticsearchDumper( extensions=[GrantTokensDumperExt("access.grant_tokens")]) data = { "access": { "grants": [ { "subject": "user", "id": "1", "level": "view" }, { "subject": "user", "id": "2", "level": "manage" }, ] } } # Create the parent record parent = RDMParent.create(data) parent.commit() db.session.commit() grant1 = parent.access.grants[0] grant2 = parent.access.grants[1] # Dump it dump = parent.dumps(dumper=dumper) assert len(dump["access"]["grant_tokens"]) == 2 assert grant1.to_token() in dump["access"]["grant_tokens"] assert grant2.to_token() in dump["access"]["grant_tokens"] # Load it new_record = RDMParent.loads(dump, loader=dumper) assert "grant_tokens" not in new_record["access"] assert "grant_tokens" not in new_record["access"]
def test_esdumper_with_edtfext_parse_error(app, db, location, minimal_record): """Test edft extension implementation.""" # NOTE: We cannot trigger this on publication_date because it is checked # by marshmallow on record creation. We can simply give a non date field. dumper = ElasticsearchDumper( extensions=[EDTFDumperExt('metadata.resource_type.type')]) # Create the record record = BibliographicRecord.create(minimal_record) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert 'type_start' not in dump['metadata']['resource_type'] assert 'type_end' not in dump['metadata']['resource_type'] assert 'type' in dump['metadata']['resource_type'] # Load it new_record = BibliographicRecord.loads(dump, loader=dumper) assert 'type_start' not in new_record['metadata']['resource_type'] assert 'type_end' not in new_record['metadata']['resource_type'] assert 'type' in new_record['metadata']['resource_type']
def test_esdumper_with_edtfext_not_defined(app, db, location, minimal_record): """Test edft extension implementation.""" # Create a simple extension that adds a computed field. dumper = ElasticsearchDumper( extensions=[EDTFDumperExt('metadata.non_existing_field')]) # Create the record record = BibliographicRecord.create(minimal_record) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert 'non_existing_field_start' not in dump['metadata'] assert 'non_existing_field_end' not in dump['metadata'] assert 'non_existing_field' not in dump['metadata'] # Load it new_record = BibliographicRecord.loads(dump, loader=dumper) assert 'non_existing_field_start' not in new_record['metadata'] assert 'non_existing_field_end' not in new_record['metadata'] assert 'non_existing_field' not in new_record['metadata']
def test_esdumper_with_extensions(testapp, db, example_data): """Test extensions implementation.""" # Create a simple extension that adds a computed field. class TestExt(ElasticsearchDumperExt): def dump(self, record, data): data['count'] = len(data['mylist']) def load(self, data, record_cls): data.pop('count') dumper = ElasticsearchDumper(extensions=[TestExt()]) # Create the record record = Record.create({'mylist': ['a', 'b']}) db.session.commit() # Dump it dump = record.dumps(dumper=dumper) assert dump['count'] == 2 # Load it new_record = Record.loads(dump, loader=dumper) assert 'count' not in new_record