def test_mongo_special_id(self, mapper): from optimade.filtertransformers.mongo import MongoTransformer from bson import ObjectId class MyMapper(mapper("StructureMapper")): ALIASES = (("immutable_id", "_id"), ) transformer = MongoTransformer(mapper=MyMapper()) parser = LarkParser(version=self.version, variant=self.variant) assert transformer.transform( parser.parse('immutable_id = "5cfb441f053b174410700d02"')) == { "_id": { "$eq": ObjectId("5cfb441f053b174410700d02") } } assert transformer.transform( parser.parse('immutable_id != "5cfb441f053b174410700d02"')) == { "_id": { "$ne": ObjectId("5cfb441f053b174410700d02") } } for op in ("CONTAINS", "STARTS WITH", "ENDS WITH", "HAS"): with pytest.raises( BadRequest, match= r".*not supported for query on field 'immutable_id', can only test for equality.*", ): transformer.transform( parser.parse(f'immutable_id {op} "abcdef"'))
def test_aliased_length_operator(self, mapper): from optimade.filtertransformers.mongo import MongoTransformer class MyMapper(mapper("StructureMapper")): ALIASES = (("elements", "my_elements"), ("nelements", "nelem")) LENGTH_ALIASES = ( ("chemsys", "nelements"), ("cartesian_site_positions", "nsites"), ("elements", "nelements"), ) PROVIDER_FIELDS = ("chemsys",) transformer = MongoTransformer(mapper=MyMapper()) parser = LarkParser(version=self.version, variant=self.variant) assert transformer.transform( parser.parse("cartesian_site_positions LENGTH <= 3") ) == {"nsites": {"$lte": 3}} assert transformer.transform( parser.parse("cartesian_site_positions LENGTH < 3") ) == {"nsites": {"$lt": 3}} assert transformer.transform( parser.parse("cartesian_site_positions LENGTH 3") ) == {"nsites": 3} assert transformer.transform( parser.parse("cartesian_site_positions LENGTH 3") ) == {"nsites": 3} assert transformer.transform( parser.parse("cartesian_site_positions LENGTH >= 10") ) == {"nsites": {"$gte": 10}} assert transformer.transform( parser.parse("structure_features LENGTH > 10") ) == {"structure_features.11": {"$exists": True}} assert transformer.transform(parser.parse("nsites LENGTH > 10")) == { "nsites.11": {"$exists": True} } assert transformer.transform(parser.parse("elements LENGTH 3")) == {"nelem": 3} assert transformer.transform(parser.parse('elements HAS "Ag"')) == { "my_elements": {"$in": ["Ag"]} } assert transformer.transform(parser.parse("chemsys LENGTH 3")) == {"nelem": 3}
def test_list_length_aliases(self, mapper): from optimade.filtertransformers.mongo import MongoTransformer transformer = MongoTransformer(mapper=mapper("StructureMapper")()) parser = LarkParser(version=self.version, variant=self.variant) assert transformer.transform(parser.parse("elements LENGTH 3")) == { "nelements": 3 } assert transformer.transform( parser.parse('elements HAS "Li" AND elements LENGTH = 3') ) == {"$and": [{"elements": {"$in": ["Li"]}}, {"nelements": 3}]} assert transformer.transform(parser.parse("elements LENGTH > 3")) == { "nelements": {"$gt": 3} } assert transformer.transform(parser.parse("elements LENGTH < 3")) == { "nelements": {"$lt": 3} } assert transformer.transform(parser.parse("elements LENGTH = 3")) == { "nelements": 3 } assert transformer.transform( parser.parse("cartesian_site_positions LENGTH <= 3") ) == {"nsites": {"$lte": 3}} assert transformer.transform( parser.parse("cartesian_site_positions LENGTH >= 3") ) == {"nsites": {"$gte": 3}}
def test_other_provider_fields(self, mapper): """Test that fields from other providers generate queries that treat the value of the field as `null`. """ from optimade.filtertransformers.mongo import MongoTransformer t = MongoTransformer(mapper=mapper("StructureMapper")) p = LarkParser(version=self.version, variant=self.variant) assert t.transform(p.parse("_other_provider_field > 1")) == { "_other_provider_field": { "$gt": 1 } }
def __init__(self): p = LarkParser(version=(1, 0, 0), variant="default") t = MongoTransformer() self.transform = lambda inp: t.transform(p.parse(inp)) client = MongoClient('mongodb://{}:{}@{}:{}/?authSource={}'.format( "admin", "admin", "localhost", "27017", "admin")) db = client["MaterialsDB"] self.cl = db["Data.Calculation.StaticCalculation"] self.lu = Lower2Upper() self.data = info1 self.info = info2
def test_list_length_aliases(self): from optimade.server.mappers import StructureMapper transformer = MongoTransformer(mapper=StructureMapper()) parser = LarkParser(version=self.version, variant=self.variant) self.assertEqual( transformer.transform(parser.parse("elements LENGTH 3")), {"nelements": 3}) self.assertEqual( transformer.transform( parser.parse('elements HAS "Li" AND elements LENGTH = 3')), {"$and": [{ "elements": { "$in": ["Li"] } }, { "nelements": 3 }]}, ) self.assertEqual( transformer.transform(parser.parse("elements LENGTH > 3")), {"nelements": { "$gt": 3 }}, ) self.assertEqual( transformer.transform(parser.parse("elements LENGTH < 3")), {"nelements": { "$lt": 3 }}, ) self.assertEqual( transformer.transform(parser.parse("elements LENGTH = 3")), {"nelements": 3}) self.assertEqual( transformer.transform( parser.parse("cartesian_site_positions LENGTH <= 3")), {"nsites": { "$lte": 3 }}, ) self.assertEqual( transformer.transform( parser.parse("cartesian_site_positions LENGTH >= 3")), {"nsites": { "$gte": 3 }}, )
def test_aliases(self, mapper): """Test that valid aliases are allowed, but do not affect r-values.""" from optimade.filtertransformers.mongo import MongoTransformer class MyStructureMapper(mapper("StructureMapper")): ALIASES = ( ("elements", "my_elements"), ("A", "D"), ("property_a", "D"), ("B", "E"), ("C", "F"), ("_exmpl_nested_field", "nested_field"), ) PROVIDER_FIELDS = ("D", "E", "F", "nested_field") mapper = MyStructureMapper t = MongoTransformer(mapper=mapper) p = LarkParser(version=self.version, variant=self.variant) assert mapper.get_backend_field("elements") == "my_elements" test_filter = 'elements HAS "A"' assert t.transform(p.parse(test_filter)) == { "my_elements": { "$in": ["A"] } } test_filter = 'elements HAS ANY "A","B","C" AND elements HAS "D"' assert t.transform(p.parse(test_filter)) == { "$and": [ { "my_elements": { "$in": ["A", "B", "C"] } }, { "my_elements": { "$in": ["D"] } }, ] } test_filter = 'elements = "A"' assert t.transform(p.parse(test_filter)) == { "my_elements": { "$eq": "A" } } test_filter = 'property_a HAS "B"' assert t.transform(p.parse(test_filter)) == {"D": {"$in": ["B"]}} test_filter = "_exmpl_nested_field.sub_property > 1234.5" assert t.transform(p.parse(test_filter)) == { "nested_field.sub_property": { "$gt": 1234.5 } } test_filter = "_exmpl_nested_field.sub_property.x IS UNKNOWN" assert t.transform(p.parse(test_filter)) == { "$or": [ { "nested_field.sub_property.x": { "$exists": False } }, { "nested_field.sub_property.x": { "$eq": None } }, ] }
def test_suspected_timestamp_fields(self, mapper): import datetime import bson.tz_util from optimade.filtertransformers.mongo import MongoTransformer from optimade.server.warnings import TimestampNotRFCCompliant example_RFC3339_date = "2019-06-08T04:13:37Z" example_RFC3339_date_2 = "2019-06-08T04:13:37" example_non_RFC3339_date = "2019-06-08T04:13:37.123Z" expected_datetime = datetime.datetime( year=2019, month=6, day=8, hour=4, minute=13, second=37, microsecond=0, tzinfo=bson.tz_util.utc, ) assert self.transform(f'last_modified > "{example_RFC3339_date}"') == { "last_modified": { "$gt": expected_datetime } } assert self.transform( f'last_modified > "{example_RFC3339_date_2}"') == { "last_modified": { "$gt": expected_datetime } } non_rfc_datetime = expected_datetime.replace(microsecond=123000) with pytest.warns(TimestampNotRFCCompliant): assert self.transform( f'last_modified > "{example_non_RFC3339_date}"') == { "last_modified": { "$gt": non_rfc_datetime } } class MyMapper(mapper("StructureMapper")): ALIASES = (("last_modified", "ctime"), ) transformer = MongoTransformer(mapper=MyMapper) parser = LarkParser(version=self.version, variant=self.variant) assert transformer.transform( parser.parse(f'last_modified > "{example_RFC3339_date}"')) == { "ctime": { "$gt": expected_datetime } } assert transformer.transform( parser.parse(f'last_modified > "{example_RFC3339_date_2}"')) == { "ctime": { "$gt": expected_datetime } }
def test_filtering_on_relationships(self, mapper): """Test the nested properties with special names like "structures", "references" etc. are applied to the relationships field. """ from optimade.filtertransformers.mongo import MongoTransformer t = MongoTransformer(mapper=mapper("StructureMapper")) p = LarkParser(version=self.version, variant=self.variant) assert t.transform(p.parse('references.id HAS "dummy/2019"')) == { "relationships.references.data.id": { "$in": ["dummy/2019"] } } assert t.transform( p.parse('structures.id HAS ANY "dummy/2019", "dijkstra1968"')) == { "relationships.structures.data.id": { "$in": ["dummy/2019", "dijkstra1968"] } } assert t.transform( p.parse('structures.id HAS ALL "dummy/2019", "dijkstra1968"')) == { "relationships.structures.data.id": { "$all": ["dummy/2019", "dijkstra1968"] } } assert t.transform(p.parse('structures.id HAS ONLY "dummy/2019"')) == { "$and": [ { "relationships.structures.data": { "$size": 1 } }, { "relationships.structures.data.id": { "$all": ["dummy/2019"] } }, ] } assert t.transform( p.parse( 'structures.id HAS ONLY "dummy/2019" AND structures.id HAS "dummy/2019"' )) == { "$and": [ { "$and": [ { "relationships.structures.data": { "$size": 1, } }, { "relationships.structures.data.id": { "$all": ["dummy/2019"] } }, ] }, { "relationships.structures.data.id": { "$in": ["dummy/2019"] } }, ], }
def set_up(self): from optimade.filtertransformers.mongo import MongoTransformer p = LarkParser(version=self.version, variant=self.variant) t = MongoTransformer() self.transform = lambda inp: t.transform(p.parse(inp))
def setUp(self): p = LarkParser(version=self.version, variant=self.variant) t = MongoTransformer() self.transform = lambda inp: t.transform(p.parse(inp))
class MongoCollection(EntryCollection): def __init__( self, collection: Union[pymongo.collection.Collection, mongomock.collection.Collection], resource_cls: EntryResource, resource_mapper: BaseResourceMapper, ): super().__init__(collection, resource_cls, resource_mapper) self.transformer = MongoTransformer(mapper=resource_mapper) self.provider_prefix = CONFIG.provider.prefix self.provider_fields = CONFIG.provider_fields.get( resource_mapper.ENDPOINT, []) self.parser = LarkParser( version=(0, 10, 1), variant="default" ) # The MongoTransformer only supports v0.10.1 as the latest grammar # check aliases do not clash with mongo operators self._check_aliases(self.resource_mapper.all_aliases()) self._check_aliases(self.resource_mapper.all_length_aliases()) def __len__(self): return self.collection.estimated_document_count() def __contains__(self, entry): return self.collection.count_documents(entry.dict()) > 0 def count(self, **kwargs): for k in list(kwargs.keys()): if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"): del kwargs[k] if "filter" not in kwargs: # "filter" is needed for count_documents() kwargs["filter"] = {} return self.collection.count_documents(**kwargs) def find( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] ) -> Tuple[List[EntryResource], int, bool, set]: criteria = self._parse_params(params) all_fields = criteria.pop("fields") if getattr(params, "response_fields", False): fields = set(params.response_fields.split(",")) fields |= self.resource_mapper.get_required_fields() else: fields = all_fields.copy() results = [] for doc in self.collection.find(**criteria): results.append( self.resource_cls(**self.resource_mapper.map_back(doc))) nresults_now = len(results) if isinstance(params, EntryListingQueryParams): criteria_nolimit = criteria.copy() criteria_nolimit.pop("limit", None) data_returned = self.count(**criteria_nolimit) more_data_available = nresults_now < data_returned else: # SingleEntryQueryParams, e.g., /structures/{entry_id} data_returned = nresults_now more_data_available = False if nresults_now > 1: raise HTTPException( status_code=404, detail= f"Instead of a single entry, {nresults_now} entries were found", ) results = results[0] if results else None return results, data_returned, more_data_available, all_fields - fields def _parse_params( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams]) -> dict: cursor_kwargs = {} if getattr(params, "filter", False): tree = self.parser.parse(params.filter) cursor_kwargs["filter"] = self.transformer.transform(tree) else: cursor_kwargs["filter"] = {} if (getattr(params, "response_format", False) and params.response_format != "json"): raise HTTPException(status_code=400, detail="Only 'json' response_format supported") if getattr(params, "page_limit", False): limit = params.page_limit if limit > CONFIG.page_limit_max: raise HTTPException( status_code=403, # Forbidden detail= f"Max allowed page_limit is {CONFIG.page_limit_max}, you requested {limit}", ) cursor_kwargs["limit"] = limit else: cursor_kwargs["limit"] = CONFIG.page_limit # All OPTIMADE fields fields = self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS.copy() fields |= self.get_attribute_fields() # All provider-specific fields fields |= { f"_{self.provider_prefix}_{field_name}" for field_name in self.provider_fields } cursor_kwargs["fields"] = fields cursor_kwargs["projection"] = [ self.resource_mapper.alias_for(f) for f in fields ] if getattr(params, "sort", False): sort_spec = [] for elt in params.sort.split(","): field = elt sort_dir = 1 if elt.startswith("-"): field = field[1:] sort_dir = -1 sort_spec.append((field, sort_dir)) cursor_kwargs["sort"] = sort_spec if getattr(params, "page_offset", False): cursor_kwargs["skip"] = params.page_offset return cursor_kwargs def _check_aliases(self, aliases): """ Check that aliases do not clash with mongo keywords. """ if any(alias[0].startswith("$") or alias[1].startswith("$") for alias in aliases): raise RuntimeError( f"Cannot define an alias starting with a '$': {aliases}")
class MongoCollection(EntryCollection): def __init__( self, collection: Union[pymongo.collection.Collection, mongomock.collection.Collection], resource_cls: Resource ): super().__init__(collection, resource_cls) self.transformer = MongoTransformer() def __len__(self): return self.collection.estimated_document_count() def __contains__(self, entry): return self.collection.count_documents(entry.dict()) > 0 def count(self, **kwargs): for k in list(kwargs.keys()): if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"): del kwargs[k] return self.collection.count_documents(**kwargs) def find(self, params: EntryListingQueryParams) -> Tuple[List[Resource], bool, NonnegativeInt]: criteria = self._parse_params(params) criteria_nolimit = criteria.copy() del criteria_nolimit['limit'] nresults_now = self.count(**criteria) nresults_total = self.count(**criteria_nolimit) more_data_available = nresults_now < nresults_total data_available = nresults_total results = [] for doc in self.collection.find(**criteria): results.append(self.resource_cls(**StructureMapper.map_back(doc))) return results, more_data_available, data_available def _parse_params(self, params: EntryListingQueryParams) -> dict: cursor_kwargs = {} if params.filter: tree = self.parser.parse(params.filter) cursor_kwargs['filter'] = self.transformer.transform(tree) else: cursor_kwargs['filter'] = {} if params.response_format and params.response_format != 'jsonapi': raise HTTPException(status_code=400, detail="Only 'jsonapi' response_format supported") limit = RESPONSE_LIMIT if params.response_limit != RESPONSE_LIMIT: limit = params.response_limit elif params.page_limit != RESPONSE_LIMIT: limit = params.page_limit if limit > RESPONSE_LIMIT: raise HTTPException(status_code=400, detail=f"Max response_limit/page[limit] is {RESPONSE_LIMIT}") elif limit == 0: limit = RESPONSE_LIMIT cursor_kwargs['limit'] = limit fields = {"id", "local_id", "last_modified"} if params.response_fields: fields |= set(params.response_fields.split(",")) cursor_kwargs['projection'] = [StructureMapper.alias_for(f) for f in fields] if params.sort: sort_spec = [] for elt in params.sort.split(','): field = elt sort_dir = 1 if elt.startswith('-'): field = field[1:] sort_dir = -1 sort_spec.append((field, sort_dir)) cursor_kwargs['sort'] = sort_spec if params.page_offset: cursor_kwargs['skip'] = params.page_offset return cursor_kwargs
def test_filtering_on_relationships(self, mapper): """Test the nested properties with special names like "structures", "references" etc. are applied to the relationships field. """ from optimade.filtertransformers.mongo import MongoTransformer t = MongoTransformer(mapper=mapper("StructureMapper")) p = LarkParser(version=self.version, variant=self.variant) assert t.transform(p.parse('references.id HAS "dummy/2019"')) == { "relationships.references.data.id": { "$in": ["dummy/2019"] } } assert t.transform( p.parse('structures.id HAS ANY "dummy/2019", "dijkstra1968"')) == { "relationships.structures.data.id": { "$in": ["dummy/2019", "dijkstra1968"] } } assert t.transform( p.parse('structures.id HAS ALL "dummy/2019", "dijkstra1968"')) == { "relationships.structures.data.id": { "$all": ["dummy/2019", "dijkstra1968"] } } assert t.transform(p.parse('structures.id HAS ONLY "dummy/2019"')) == { "$and": [ { "relationships.structures.data": { "$not": { "$elemMatch": { "id": { "$nin": ["dummy/2019"] } } } } }, { "relationships.structures.data.0": { "$exists": True } }, ] } assert t.transform( p.parse( 'structures.id HAS ONLY "dummy/2019" AND structures.id HAS "dummy/2019"' )) == { "$and": [ { "$and": [ { "relationships.structures.data": { "$not": { "$elemMatch": { "id": { "$nin": ["dummy/2019"] } } } } }, { "relationships.structures.data.0": { "$exists": True } }, ] }, { "relationships.structures.data.id": { "$in": ["dummy/2019"] } }, ] } with pytest.raises( NotImplementedError, match= 'Cannot filter relationships by field "doi", only "id" is supported.', ): assert t.transform( p.parse( 'references.doi HAS ONLY "10.123/12345" AND structures.id HAS "dummy/2019"' )) == { "$and": [ { "$and": [ { "relationships.references.data": { "$not": { "$elemMatch": { "doi": { "$nin": ["10.123/12345"] } } } } }, { "relationships.references.data.0": { "$exists": True } }, ] }, { "relationships.structures.data.id": { "$in": ["dummy/2019"] } }, ] }
class MongoCollection(EntryCollection): def __init__( self, collection: Union[pymongo.collection.Collection, mongomock.collection.Collection], resource_cls: EntryResource, resource_mapper: ResourceMapper, ): super().__init__(collection, resource_cls, resource_mapper) self.transformer = MongoTransformer() self.provider = CONFIG.provider["prefix"] self.provider_fields = CONFIG.provider_fields.get( resource_mapper.ENDPOINT, []) self.parser = LarkParser( version=(0, 10, 1), variant="default" ) # The MongoTransformer only supports v0.10.1 as the latest grammar def __len__(self): return self.collection.estimated_document_count() def __contains__(self, entry): return self.collection.count_documents(entry.dict()) > 0 def count(self, **kwargs): for k in list(kwargs.keys()): if k not in ("filter", "skip", "limit", "hint", "maxTimeMS"): del kwargs[k] if "filter" not in kwargs: # "filter" is needed for count_documents() kwargs["filter"] = {} return self.collection.count_documents(**kwargs) def find( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams] ) -> Tuple[List[EntryResource], NonnegativeInt, bool, set]: criteria = self._parse_params(params) all_fields = criteria.pop("fields") if getattr(params, "response_fields", False): fields = set(params.response_fields.split(",")) else: fields = all_fields.copy() results = [] for doc in self.collection.find(**criteria): results.append( self.resource_cls(**self.resource_mapper.map_back(doc))) nresults_now = len(results) if isinstance(params, EntryListingQueryParams): criteria_nolimit = criteria.copy() criteria_nolimit.pop("limit", None) data_returned = self.count(**criteria_nolimit) more_data_available = nresults_now < data_returned else: # SingleEntryQueryParams, e.g., /structures/{entry_id} data_returned = nresults_now more_data_available = False if nresults_now > 1: raise HTTPException( status_code=404, detail= f"Instead of a single entry, {nresults_now} entries were found", ) results = results[0] if results else None return results, data_returned, more_data_available, all_fields - fields def _alias_filter(self, filter_: dict) -> dict: res = {} for key, value in filter_.items(): if key in ["$and", "$or"]: res[key] = [self._alias_filter(item) for item in value] else: new_value = value if isinstance(value, dict): new_value = self._alias_filter(value) res[self.resource_mapper.alias_for(key)] = new_value return res def _parse_params( self, params: Union[EntryListingQueryParams, SingleEntryQueryParams]) -> dict: cursor_kwargs = {} if getattr(params, "filter", False): tree = self.parser.parse(params.filter) mongo_filter = self.transformer.transform(tree) cursor_kwargs["filter"] = self._alias_filter(mongo_filter) else: cursor_kwargs["filter"] = {} if (getattr(params, "response_format", False) and params.response_format != "json"): raise HTTPException(status_code=400, detail="Only 'json' response_format supported") if getattr(params, "page_limit", False): limit = params.page_limit if limit > CONFIG.page_limit_max: raise HTTPException( status_code=403, # Forbidden detail= f"Max allowed page_limit is {CONFIG.page_limit_max}, you requested {limit}", ) cursor_kwargs["limit"] = limit else: cursor_kwargs["limit"] = CONFIG.page_limit # All OPTiMaDe fields fields = self.resource_mapper.TOP_LEVEL_NON_ATTRIBUTES_FIELDS.copy() fields |= self.get_attribute_fields() # All provider-specific fields fields |= {self.provider + _ for _ in self.provider_fields} cursor_kwargs["fields"] = fields cursor_kwargs["projection"] = [ self.resource_mapper.alias_for(f) for f in fields ] if getattr(params, "sort", False): sort_spec = [] for elt in params.sort.split(","): field = elt sort_dir = 1 if elt.startswith("-"): field = field[1:] sort_dir = -1 sort_spec.append((field, sort_dir)) cursor_kwargs["sort"] = sort_spec if getattr(params, "page_offset", False): cursor_kwargs["skip"] = params.page_offset return cursor_kwargs