def register_all(self, models: List[ResourceType], dry_run=False) -> List[ResourceType]: """ [ALPHA] Create or update each model in models. This method has the same behavior as `register`, except that all no models will be written if any one of them is invalid. Using this method should yield significant improvements to write speed over separate calls to `register`. Parameters ---------- models: List[ResourceType] The objects to be written. dry_run: bool Whether to actually register the objects or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- List[ResourceType] Each object model as it now exists in the database. The order and number of models is guaranteed to be the same as originally specified. """ if self.dataset_id is None: raise RuntimeError( "Must specify a dataset in order to register a data model object." ) path = self._get_path() params = {'dry_run': dry_run} temp_scope = str(uuid4()) scope = temp_scope if dry_run else CITRINE_SCOPE json = GEMDJson(scope=scope) [json.dumps(x) for x in models] # This apparent no-op populates uids objects = [ replace_objects_with_links(scrub_none(model.dump())) for model in models ] recursive_foreach( models, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids response_data = self.session.put_resource(path + '/batch', json={'objects': objects}, params=params) return [self.build(obj) for obj in response_data['objects']]
def dump(self): """ Convert the object to a JSON dictionary, so that every entry is serialized. Uses the json encoder client, so objects with uids are converted to LinkByUID dictionaries. Returns ------- str A string representation of the object as a dictionary. """ from gemd.json import GEMDJson encoder = GEMDJson() return json.loads(encoder.raw_dumps(self))
def register(self, model: ResourceType, dry_run=False): """ Create a new element of the collection or update an existing element. If the input model has an ID that corresponds to an existing object in the database, then that object will be updated. Otherwise a new object will be created. Only the top-level object in `model` itself is written to the database with this method. References to other objects are persisted as links, and the object returned by this method has all instances of data objects replaced by instances of LinkByUid. Registering an object which references other objects does NOT implicitly register those other objects. Rather, those other objects' values are ignored, and the pre-existence of objects with their IDs is asserted before attempting to write `model`. Parameters ---------- model: ResourceType The DataConcepts object. dry_run: bool Whether to actually register the item or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- ResourceType A copy of the registered object as it now exists in the database. """ if self.dataset_id is None: raise RuntimeError( "Must specify a dataset in order to register a data model object." ) path = self._get_path() params = {'dry_run': dry_run} # How do we prepare a citrine-python object to be the json in a POST request? # Right now, that method scrubs out None values and replaces top-level objects with links. # Eventually, we want to replace it with the following: # dumped_data = dumps(loads(dumps(model.dump()))) # This dumps the object to a dictionary (model.dump()), and then to a string (dumps()). # But this string is still nested--because it's a dictionary, GEMDJson.dumps() does not # know how to replace the objects with link-by-uids. loads() converts this string into # nested gemd objects, and then the final dumps() converts that to a json-ready string # in which all of the object references have been replaced with link-by-uids. temp_scope = str(uuid4()) scope = temp_scope if dry_run else CITRINE_SCOPE GEMDJson(scope=scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids data = self.session.post_resource(path, dumped_data, params=params) full_model = self.build(data) return full_model
def test_thin_dumps(): """Test that thin_dumps turns pointers into links.""" mat = MaterialRun("The actual material") meas_spec = MeasurementSpec("measurement", uids={'my_scope': '324324'}) meas = MeasurementRun("The measurement", spec=meas_spec, material=mat) thin_copy = MeasurementRun.build(json.loads(GEMDJson().thin_dumps(meas))) assert isinstance(thin_copy, MeasurementRun) assert isinstance(thin_copy.material, LinkByUID) assert isinstance(thin_copy.spec, LinkByUID) assert thin_copy.spec.id == meas_spec.uids['my_scope'] # Check that LinkByUID objects are correctly converted their JSON equivalent expected_json = '{"id": "my_id", "scope": "scope", "type": "link_by_uid"}' assert GEMDJson().thin_dumps(LinkByUID('scope', 'my_id')) == expected_json # Check that objects lacking .uid attributes will raise an exception when dumped with pytest.raises(TypeError): GEMDJson().thin_dumps({{'key': 'value'}})
def test_register_argument_validation(): """Test that register_classes argument is type checked.""" orig = GEMDJson() with pytest.raises(ValueError): orig.register_classes("foo") with pytest.raises(ValueError): orig.register_classes({"foo": orig}) with pytest.raises(ValueError): orig.register_classes({ProcessSpec: ProcessSpec})
def test_deserialize(): """Round-trip serde should leave the object unchanged.""" condition = Condition(name="A condition", value=NominalReal(7, '')) parameter = Parameter(name="A parameter", value=NormalReal(mean=17, std=1, units='')) measurement = MeasurementRun(tags="A tag on a measurement", conditions=condition, parameters=parameter) copy_meas = GEMDJson().copy(measurement) assert(copy_meas.conditions[0].value == measurement.conditions[0].value) assert(copy_meas.parameters[0].value == measurement.parameters[0].value) assert(copy_meas.uids["auto"] == measurement.uids["auto"])
def get_json_support(cls): """Get a DataConcepts-compatible json serializer/deserializer.""" if cls.json_support is None: DataConcepts._make_class_dict() cls.json_support = GEMDJson(scope=CITRINE_SCOPE) cls.json_support.register_classes({ k: v for k, v in DataConcepts.class_dict.items() if k != "link_by_uid" }) return cls.json_support
def test_name_persistance(): """Verify that a serialized IngredientRun doesn't lose its name.""" from gemd.entity.object import IngredientSpec from gemd.entity.link_by_uid import LinkByUID from gemd.json import GEMDJson je = GEMDJson() ms_link = LinkByUID(scope='local', id='mat_spec') mr_link = LinkByUID(scope='local', id='mat_run') ps_link = LinkByUID(scope='local', id='pro_spec') pr_link = LinkByUID(scope='local', id='pro_run') spec = IngredientSpec(name='Ingred', labels=['some', 'words'], process=ps_link, material=ms_link) run = IngredientRun(spec=spec, process=pr_link, material=mr_link) assert run.name == spec.name assert run.labels == spec.labels # Try changing them and make sure they change spec.name = 'Frank' spec.labels = ['other', 'words'] assert run.name == spec.name assert run.labels == spec.labels run.spec = LinkByUID(scope='local', id='ing_spec') # Name and labels are now stashed but not stored assert run == je.copy(run) assert run.name == spec.name assert run.labels == spec.labels # Test that serialization doesn't get confused after a deser and set spec_too = IngredientSpec(name='Jorge', labels=[], process=ps_link, material=ms_link) run.spec = spec_too assert run == je.copy(run) assert run.name == spec_too.name assert run.labels == spec_too.labels
def build(d): """ Build an object from a JSON dictionary. This differs from `from_dict` in that the values themselves may *also* be dictionaries corresponding to serialized DictSerializable objects. Parameters ---------- d: dict The object as a serialized dictionary. Returns ------- DictSerializable The deserialized object. """ from gemd.json import GEMDJson encoder = GEMDJson() return encoder.raw_loads(encoder.raw_dumps(d))
def test_scope_control(): """Serializing a nested object should be identical to individually serializing each piece.""" input_material = MaterialSpec() process = ProcessSpec() IngredientSpec(material=input_material, process=process) material = MaterialSpec(process=process) # Verify the default scope is there default_json = GEMDJson() default_text = default_json.dumps(material) assert "auto" in default_text assert "custom" not in default_text # Clear out ids input_material.uids = {} process.uids = {} process.ingredients[0].uids = {} input_material.uids = {} material.uids = {} # Verify the default scope is there custom_json = GEMDJson(scope='custom') custom_text = custom_json.dumps(material) assert "auto" not in custom_text assert "custom" in custom_text
def test_register_classes_override(): """Test that register_classes overrides existing entries in the class index.""" class MyProcessSpec(ProcessSpec): pass normal = GEMDJson() custom = GEMDJson() custom.register_classes({MyProcessSpec.typ: MyProcessSpec}) obj = ProcessSpec(name="foo") assert not isinstance(normal.copy(obj), MyProcessSpec),\ "Class registration bled across GEMDJson() objects" assert isinstance(custom.copy(obj), ProcessSpec),\ "Custom GEMDJson didn't deserialize as MyProcessSpec"
def register(self, model: ResourceType, dry_run=False): """ Create a new element of the collection or update an existing element. If the input model has an ID that corresponds to an existing object in the database, then that object will be updated. Otherwise a new object will be created. Only the top-level object in `model` itself is written to the database with this method. References to other objects are persisted as links, and the object returned by this method has all instances of data objects replaced by instances of LinkByUid. Registering an object which references other objects does NOT implicitly register those other objects. Rather, those other objects' values are ignored, and the pre-existence of objects with their IDs is asserted before attempting to write `model`. Parameters ---------- model: ResourceType The DataConcepts object. dry_run: bool Whether to actually register the item or run a dry run of the register operation. Dry run is intended to be used for validation. Default: false Returns ------- ResourceType A copy of the registered object as it now exists in the database. """ if self.dataset_id is None: raise RuntimeError( "Must specify a dataset in order to register a data model object." ) path = self._get_path() params = {'dry_run': dry_run} temp_scope = str(uuid4()) scope = temp_scope if dry_run else CITRINE_SCOPE GEMDJson(scope=scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids data = self.session.post_resource(path, dumped_data, params=params) full_model = self.build(data) return full_model
def test_pure_subsitutions(): """Make sure substitute methods don't mutate inputs.""" json_str = ''' [ [ { "uids": { "id": "9118c2d3-1c38-47fe-a650-c2b92fdb6777" }, "type": "material_run", "name": "flour" } ], { "type": "ingredient_run", "uids": { "id": "8858805f-ec02-49e4-ba3b-d784e2aea3f8" }, "material": { "type": "link_by_uid", "scope": "ID", "id": "9118c2d3-1c38-47fe-a650-c2b92fdb6777" }, "process": { "type": "link_by_uid", "scope": "ID", "id": "9148c2d3-2c38-47fe-b650-c2b92fdb6777" } } ] ''' index = {} original = json.loads( json_str, object_hook=lambda x: GEMDJson()._load_and_index(x, index)) frozen = deepcopy(original) loaded = substitute_objects(original, index) assert original == frozen frozen_loaded = deepcopy(loaded) substitute_links(loaded) assert loaded == frozen_loaded for o in loaded: substitute_links(o) assert loaded == frozen_loaded
def validate_templates(self, model: DataObjectResourceType, object_template: Optional[ObjectTemplateResourceType] = None, ingredient_process_template: Optional[ProcessTemplate] = None)\ -> List[ValidationError]: """ Validate a data object against its templates. Validates against provided object templates (passed in as parameters) and stored attribute templates linked on the data object. :param model: the data object to validate :param object_template: optional object template to validate against :param ingredient_process_template: optional process template to validate ingredient against. Ignored unless data object is an IngredientSpec or IngredientRun. :return: List[ValidationError] of validation errors encountered. Empty if successful. """ path = self._get_path(ignore_dataset=True) + "/validate-templates" temp_scope = str(uuid4()) GEMDJson(scope=temp_scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids request_data = {"dataObject": dumped_data} if object_template is not None: request_data["objectTemplate"] = \ replace_objects_with_links(scrub_none(object_template.dump())) if ingredient_process_template is not None: request_data["ingredientProcessTemplate"] = \ replace_objects_with_links(scrub_none(ingredient_process_template.dump())) try: self.session.put_resource(path, request_data) return [] except BadRequest as e: if e.api_error is not None and e.api_error.validation_errors: return e.api_error.validation_errors raise e
def copy(obj): return GEMDJson().copy(obj)
def raw_loads(json_str, **kwargs): return GEMDJson().raw_loads(json_str, **kwargs)
def thin_dumps(obj, **kwargs): return GEMDJson().thin_dumps(obj, **kwargs)
origin="specified")) cake_obj.notes = cake_obj.notes + "; Très délicieux! 😀" cake_obj.file_links = [ FileLink( filename="Photo", url='https://storcpdkenticomedia.blob.core.windows.net/media/' 'recipemanagementsystem/media/recipe-media-files/recipes/retail/x17/' '16730-beckys-butter-cake-600x600.jpg?ext=.jpg') ] return cake_obj if __name__ == "__main__": encoder = GEMDJson() cake = make_cake(seed=42) with open("example_gemd_material_history.json", "w") as f: context_list = complete_material_history(cake) f.write(json.dumps(context_list, indent=2)) with open("example_gemd_material_template.json", "w") as f: f.write(encoder.thin_dumps(cake.template, indent=2)) with open("example_gemd_process_template.json", "w") as f: f.write( encoder.thin_dumps( cake.process.ingredients[0].material.process.template, indent=2))
def test_enumeration_serde(): """An enumeration should get serialized as a string.""" condition = Condition(name="A condition", notes=Origin.UNKNOWN) copy_condition = GEMDJson().copy(condition) assert copy_condition.notes == Origin.get_value(condition.notes)
def async_update(self, model: ResourceType, *, dry_run: bool = False, wait_for_response: bool = True, timeout: float = 2 * 60, polling_delay: float = 1.0) -> Optional[UUID]: """ [ALPHA] Update a particular element of the collection with data validation. Update a particular element of the collection, doing a deeper check to ensure that the dependent data objects are still with the (potentially) changed constraints of this change. This will allow you to make bounds and allowed named/labels changes to templates. Parameters ---------- model: ResourceType The DataConcepts object. dry_run: bool Whether to actually update the item or run a dry run of the update operation. Dry run is intended to be used for validation. Default: false wait_for_response: Whether to poll for the eventual response. This changes the return type (see below). timeout: How long to poll for the result before giving up. This is expressed in (fractional) seconds. polling_delay: How long to delay between each polling retry attempt. Returns ------- Optional[UUID] If wait_for_response if True, then this call will poll the backend, waiting for the eventual job result. In the case of successful validation/update, a return value of None is provided which indicates success. In the case of a failure validating or processing the update, an exception (JobFailureError) is raised and an error message is logged with the underlying reason of the failure. If wait_for_response if False, A job ID (of type UUID) is returned that one can use to poll for the job completion and result with the :func:`~citrine.resources.DataConceptsCollection.poll_async_update_job` method. """ temp_scope = str(uuid4()) GEMDJson(scope=temp_scope).dumps( model) # This apparent no-op populates uids dumped_data = replace_objects_with_links(scrub_none(model.dump())) recursive_foreach( model, lambda x: x.uids.pop(temp_scope, None)) # Strip temp uids scope = CITRINE_SCOPE id = dumped_data['uids']['id'] if self.dataset_id is None: raise RuntimeError("Must specify a dataset in order to update " "a data model object with data validation.") url = self._get_path() + \ "/" + scope + "/" + id + "/async" response_json = self.session.put_resource(url, dumped_data, params={'dry_run': dry_run}) job_id = response_json["job_id"] if wait_for_response: self.poll_async_update_job(job_id, timeout=timeout, polling_delay=polling_delay) # That worked, nothing returned in this case return None else: # TODO: use JobSubmissionResponse here instead return job_id
def raw_dumps(obj, **kwargs): return GEMDJson().raw_dumps(obj, **kwargs)