class _Submission(AttributeClass): """The data packet encoding an estimation request which will be sent to the server. """ dataset = Attribute( docstring="The set of properties to estimate.", type_hint=PhysicalPropertyDataSet, ) options = Attribute( docstring="The options to use when estimating the dataset.", type_hint=RequestOptions, ) force_field_source = Attribute( docstring= "The force field parameters to estimate the dataset using.", type_hint=ForceFieldSource, ) parameter_gradient_keys = Attribute( docstring="A list of the parameters that the physical properties " "should be differentiated with respect to.", type_hint=list, ) def validate(self, attribute_type=None): super(EvaluatorClient._Submission, self).validate(attribute_type) assert all( isinstance(x, ParameterGradientKey) for x in self.parameter_gradient_keys)
class CalculationLayerResult(AttributeClass): """The result of attempting to estimate a property using a `CalculationLayer`. """ physical_property = Attribute( docstring="The estimated property (if the layer was successful).", type_hint=PhysicalProperty, optional=True, ) data_to_store = Attribute( docstring="Paths to the data objects to store.", type_hint=list, default_value=[], ) exceptions = Attribute( docstring="Any exceptions raised by the layer while estimating the " "property.", type_hint=list, default_value=[], ) def validate(self, attribute_type=None): super(CalculationLayerResult, self).validate(attribute_type) assert all(isinstance(x, (tuple, list)) for x in self.data_to_store) assert all(len(x) == 2 for x in self.data_to_store) assert all( all(isinstance(y, str) for y in x) for x in self.data_to_store) assert all(isinstance(x, EvaluatorException) for x in self.exceptions)
class CalculationLayerSchema(AttributeClass): """A schema which encodes the options that a `CalculationLayer` should use when estimating a given class of physical properties. """ absolute_tolerance = Attribute( docstring="The absolute uncertainty that the property should " "be estimated to within. This attribute is mutually exclusive " "with the `relative_tolerance` attribute.", type_hint=pint.Quantity, default_value=UNDEFINED, optional=True, ) relative_tolerance = Attribute( docstring="The relative uncertainty that the property should " "be estimated to within, i.e `relative_tolerance * " "measured_property.uncertainty`. This attribute is mutually " "exclusive with the `absolute_tolerance` attribute.", type_hint=float, default_value=UNDEFINED, optional=True, ) def validate(self, attribute_type=None): if (self.absolute_tolerance != UNDEFINED and self.relative_tolerance != UNDEFINED): raise ValueError( "Only one of `absolute_tolerance` and `relative_tolerance` " "can be set.") super(CalculationLayerSchema, self).validate(attribute_type)
class ConnectionOptions(AttributeClass): """The options to use when connecting to an `EvaluatorServer` """ server_address = Attribute( docstring="The address of the server to connect to.", type_hint=str, default_value="localhost", ) server_port = Attribute( docstring="The port of the server to connect to.", type_hint=int, default_value=8000, ) def __init__(self, server_address=None, server_port=None): """ Parameters ---------- server_address: str The address of the server to connect to. server_port: int The port of the server to connect to. """ if server_address is not None: self.server_address = server_address if server_port is not None: self.server_port = server_port
class ProtocolGroupSchema(ProtocolSchema): """A json serializable representation of a workflow protocol group. """ protocol_schemas = Attribute( docstring="The schemas of the protocols within this group.", type_hint=dict, read_only=True, ) def __init__(self, unique_id=None, protocol_type=None, inputs=None, protocol_schemas=None): super(ProtocolGroupSchema, self).__init__(unique_id, protocol_type, inputs) if protocol_schemas is not None: self._set_value("protocol_schemas", protocol_schemas) def validate(self, attribute_type=None): super(ProtocolGroupSchema, self).validate(attribute_type) for key, value in self.protocol_schemas.items(): assert isinstance(key, str) assert isinstance(value, ProtocolSchema)
class WorkflowResult(AttributeClass): """The result of executing a `Workflow` as part of a `WorkflowGraph`. """ workflow_id = Attribute( docstring="The id of the workflow associated with this result.", type_hint=str, ) value = Attribute( docstring="The estimated value of the property and the uncertainty " "in that value.", type_hint=pint.Measurement, optional=True, ) gradients = Attribute( docstring="The gradients of the estimated value with respect to the " "specified force field parameters.", type_hint=list, default_value=[], ) exceptions = Attribute( docstring="Any exceptions raised by the layer while estimating the " "property.", type_hint=list, default_value=[], ) data_to_store = Attribute( docstring="Paths to the data objects to store.", type_hint=list, default_value=[], ) def validate(self, attribute_type=None): super(WorkflowResult, self).validate(attribute_type) assert all(isinstance(x, ParameterGradient) for x in self.gradients) assert all(isinstance(x, tuple) for x in self.data_to_store) assert all(len(x) == 2 for x in self.data_to_store) assert all( all(isinstance(y, str) for y in x) for x in self.data_to_store) assert all(isinstance(x, EvaluatorException) for x in self.exceptions)
class Condition(AttributeClass): """Defines a specific condition which must be met of the form `left_hand_value` [TYPE] `right_hand_value`, where `[TYPE]` may be less than or greater than. """ @unique class Type(Enum): """The available condition types.""" LessThan = "lessthan" GreaterThan = "greaterthan" left_hand_value = Attribute( docstring="The left-hand value to compare.", type_hint=typing.Union[int, float, pint.Quantity], ) right_hand_value = Attribute( docstring="The right-hand value to compare.", type_hint=typing.Union[int, float, pint.Quantity], ) type = Attribute( docstring="The right-hand value to compare.", type_hint=Type, default_value=Type.LessThan, ) def __eq__(self, other): return ( type(self) == type(other) and self.left_hand_value == other.left_hand_value and self.right_hand_value == other.right_hand_value and self.type == other.type ) def __ne__(self, other): return not self.__eq__(other) def __str__(self): return f"{self.left_hand_value} {self.type} {self.right_hand_value}" def __repr__(self): return f"<Condition {str(self)}>"
class MoleFraction(Amount): """The mole fraction of a `Component` in a `Substance`.""" value = Attribute(docstring="The value of this amount.", type_hint=float) @property def identifier(self): return f"x={self.value:.6f}" def to_number_of_molecules(self, total_substance_molecules, tolerance=None): # Determine how many molecules of each type will be present in the system. number_of_molecules = self.value * total_substance_molecules fractional_number_of_molecules = number_of_molecules % 1 if np.isclose(fractional_number_of_molecules, 0.5): number_of_molecules = int(number_of_molecules) else: number_of_molecules = int(round(number_of_molecules)) if number_of_molecules == 0: raise ValueError( "The total number of substance molecules was not large enough, " "such that this non-zero amount translates into zero molecules " "of this component in the substance.") if tolerance is not None: mole_fraction = number_of_molecules / total_substance_molecules if abs(mole_fraction - self.value) > tolerance: raise ValueError( f"The mole fraction ({mole_fraction}) given a total number of molecules " f"({total_substance_molecules}) is outside of the tolerance {tolerance} " f"of the target mole fraction {self.value}") return number_of_molecules def validate(self, attribute_type=None): super(MoleFraction, self).validate(attribute_type) if self.value <= 0.0 or self.value > 1.0: raise ValueError( "A mole fraction must be greater than zero, and less than or " "equal to one.") if math.floor(self.value * 1e6) < 1: raise ValueError("Mole fractions are only precise to the sixth " "decimal place within this class representation.")
class ReweightingSchema(WorkflowCalculationSchema): """A schema which encodes the options and the workflow schema that the `SimulationLayer` should use when estimating a given class of physical properties using the built-in workflow framework. """ storage_queries = Attribute( docstring="The queries to perform when retrieving data for each " "of the components in the system from the storage backend. The " "keys of this dictionary will correspond to the metadata keys made " "available to the workflow system.", type_hint=dict, default_value=default_storage_query(), ) maximum_data_points = Attribute( docstring="The maximum number of data points to include " "as part of the multi-state reweighting calculations. If " "zero, no cap will be applied.", type_hint=int, default_value=4, ) temperature_cutoff = Attribute( docstring="The maximum difference between the target temperature " "and the temperature at which cached data was collected to. Data " "collected for temperatures outside of this cutoff will be ignored.", type_hint=pint.Quantity, default_value=5.0 * unit.kelvin, ) def validate(self, attribute_type=None): super(ReweightingSchema, self).validate(attribute_type) assert len(self.storage_queries) > 0 assert self.maximum_data_points > 0 assert all( isinstance(x, SimulationDataQuery) for x in self.storage_queries.values())
class RequestResult(AttributeClass): """The current results of an estimation request - these results may be partial if the server hasn't yet completed the request. """ queued_properties = Attribute( docstring="The set of properties which have yet to be, or " "are currently being estimated.", type_hint=PhysicalPropertyDataSet, default_value=PhysicalPropertyDataSet(), ) estimated_properties = Attribute( docstring= "The set of properties which have been successfully estimated.", type_hint=PhysicalPropertyDataSet, default_value=PhysicalPropertyDataSet(), ) unsuccessful_properties = Attribute( docstring= "The set of properties which could not be successfully estimated.", type_hint=PhysicalPropertyDataSet, default_value=PhysicalPropertyDataSet(), ) exceptions = Attribute( docstring="The set of properties which have yet to be, or " "are currently being estimated.", type_hint=list, default_value=[], ) def validate(self, attribute_type=None): super(RequestResult, self).validate(attribute_type) assert all( (isinstance(x, EvaluatorException) for x in self.exceptions))
class ProtocolSchema(AttributeClass): """A json serializable representation of a workflow protocol. """ id = Attribute( docstring="The unique id associated with the protocol.", type_hint=str, ) type = Attribute( docstring="The type of protocol associated with this schema.", type_hint=str, read_only=True, ) inputs = Attribute(docstring="The inputs to the protocol.", type_hint=dict, read_only=True) def __init__(self, unique_id=None, protocol_type=None, inputs=None): if unique_id is not None: self._set_value("id", unique_id) if protocol_type is not None: self._set_value("type", protocol_type) if inputs is not None: self._set_value("inputs", inputs) def to_protocol(self): """Creates a new protocol object from this schema. Returns ------- Protocol The protocol created from this schema. """ from evaluator.workflow.protocols import Protocol return Protocol.from_schema(self)
class WorkflowCalculationSchema(CalculationLayerSchema): """A schema which encodes the options and the workflow schema that a `CalculationLayer` should use when estimating a given class of physical properties using the built-in workflow framework. """ workflow_schema = Attribute( docstring="The workflow schema to use when estimating properties.", type_hint=WorkflowSchema, default_value=UNDEFINED, ) def validate(self, attribute_type=None): super(WorkflowCalculationSchema, self).validate(attribute_type) self.workflow_schema.validate()
class ExactAmount(Amount): """The exact number of instances of a `Component` in a `Substance`. An assumption is made that this amount is for a component which is infinitely dilute (such as ligands in binding calculations), and hence do not contribute to the total mole fraction of a `Substance`. """ value = Attribute(docstring="The value of this amount.", type_hint=int) @property def identifier(self): return f"n={int(round(self.value)):d}" def to_number_of_molecules(self, total_substance_molecules, tolerance=None): return self.value
class _ObjectKeyData(BaseStoredData): """An object which keeps track of the items in the storage system. """ object_keys = Attribute( docstring= "The unique keys of the objects stored in a `StorageBackend`.", type_hint=dict, default_value=dict(), ) @classmethod def has_ancillary_data(cls): return False def to_storage_query(self): # This should never be called so doesn't need an # implementation. raise NotImplementedError()
class SubstanceQuery(AttributeClass, abc.ABC): """A query which focuses on finding data which was collected for substances with specific traits, e.g which contains both a solute and solvent, or only a solvent etc. """ components_only = Attribute( docstring="Only match pure data which was collected for " "one of the components in the query substance.", type_hint=bool, default_value=False, ) # component_roles = QueryAttribute( # docstring="Returns data for only the subset of a substance " # "which has the requested roles.", # type_hint=list, # optional=True, # ) def validate(self, attribute_type=None): super(SubstanceQuery, self).validate(attribute_type)
class PhysicalProperty(AttributeClass, abc.ABC): """Represents the value of any physical property and it's uncertainty if provided. It additionally stores the thermodynamic state at which the property was collected, the phase it was collected in, information about the composition of the observed system, and metadata about how the property was collected. """ @classmethod @abc.abstractmethod def default_unit(cls): """pint.Unit: The default unit (e.g. g / mol) associated with this class of property.""" raise NotImplementedError() id = Attribute( docstring="A unique identifier string assigned to this property", type_hint=str, default_value=lambda: str(uuid.uuid4()).replace("-", ""), ) substance = Attribute( docstring= "The substance that this property was measured estimated for.", type_hint=Substance, ) phase = Attribute( docstring="The phase / phases that this property was measured in.", type_hint=PropertyPhase, ) thermodynamic_state = Attribute( docstring="The thermodynamic state that this property" "was measured / estimated at.", type_hint=ThermodynamicState, ) value = Attribute( docstring="The measured / estimated value of this property.", type_hint=pint.Quantity, ) uncertainty = Attribute( docstring= "The uncertainty in measured / estimated value of this property.", type_hint=pint.Quantity, optional=True, ) source = Attribute( docstring="The original source of this physical property.", type_hint=Source, optional=True, ) metadata = Attribute( docstring= "Additional metadata associated with this property. All property " "metadata will be made accessible to estimation workflows.", type_hint=dict, optional=True, ) gradients = Attribute( docstring="The gradients of this property with respect to " "different force field parameters.", type_hint=list, optional=True, ) def __init__( self, thermodynamic_state=None, phase=PropertyPhase.Undefined, substance=None, value=None, uncertainty=None, source=None, ): """Constructs a new PhysicalProperty object. Parameters ---------- thermodynamic_state : ThermodynamicState The thermodynamic state that the property was measured in. phase : PropertyPhase The phase that the property was measured in. substance : Substance The composition of the substance that was measured. value: pint.Quantity The value of the measured physical property. uncertainty: pint.Quantity The uncertainty in the measured value. source: Source The source of this property. """ if thermodynamic_state is not None: self.thermodynamic_state = thermodynamic_state if phase is not None: self.phase = phase if substance is not None: self.substance = substance if value is not None: self.value = value if uncertainty is not None: self.uncertainty = uncertainty self.gradients = [] if source is not None: self.source = source def __setstate__(self, state): if "id" not in state: state["id"] = str(uuid.uuid4()).replace("-", "") super(PhysicalProperty, self).__setstate__(state) def validate(self, attribute_type=None): super(PhysicalProperty, self).validate(attribute_type) assert self.value.units.dimensionality == self.default_unit( ).dimensionality if self.uncertainty != UNDEFINED: assert (self.uncertainty.units.dimensionality == self.default_unit().dimensionality)
class Amount(AttributeClass, abc.ABC): """A representation of the amount of a given component in a `Substance`. """ value = Attribute( docstring="The value of this amount.", type_hint=typing.Union[float, int], read_only=True, ) def __init__(self, value=UNDEFINED): """ Parameters ---------- value: float or int The value of this amount. """ self._set_value("value", value) @property def identifier(self): """A string identifier for this amount.""" raise NotImplementedError() @abc.abstractmethod def to_number_of_molecules(self, total_substance_molecules, tolerance=None): """Converts this amount to an exact number of molecules Parameters ---------- total_substance_molecules: int The total number of molecules in the whole substance. This amount will contribute to a portion of this total number. tolerance: float, optional The tolerance with which this amount should be in. As an example, when converting a mole fraction into a number of molecules, the total number of molecules may not be sufficiently large enough to reproduce this amount. Returns ------- int The number of molecules which this amount represents, given the `total_substance_molecules`. """ raise NotImplementedError() def __str__(self): return self.identifier def __repr__(self): return f"<{self.__class__.__name__} {str(self)}>" def __eq__(self, other): return type(self) == type(other) and np.isclose( self.value, other.value) def __ne__(self, other): return not (self == other) def __hash__(self): return hash(self.identifier)
class WorkflowSchema(AttributeClass): """The schematic for a property estimation workflow. """ protocol_schemas = Attribute( docstring= "The schemas for the protocols which will make up the workflow.", type_hint=list, default_value=[], ) protocol_replicators = Attribute( docstring= "A set of replicators which will replicate parts of the workflow.", type_hint=list, optional=True, ) final_value_source = Attribute( docstring= "A reference to which protocol output corresponds to the estimated " "value of the property.", type_hint=ProtocolPath, optional=True, ) gradients_sources = Attribute( docstring= "A list of references the protcol outputs which correspond to the gradients " "of the estimated property with respect to specified force field parameters.", type_hint=list, optional=True, ) outputs_to_store = Attribute( docstring= "A collection of data classes to populate ready to be stored by a " "`StorageBackend`.", type_hint=dict, optional=True, ) def replace_protocol_types(self, protocol_replacements, protocol_group_schema=None): """Replaces protocols with given types with other protocols of specified replacements. This is useful when replacing the default protocols with custom ones, or swapping out base protocols with actual implementations Warnings -------- This method is NOT fully implemented and is likely to fail in all but a few specific cases. This method should be used with extreme caution. Parameters ---------- protocol_replacements: dict of str and str, None A dictionary with keys of the types of protocols which should be replaced with those protocols named by the values. protocol_group_schema: ProtocolGroupSchema The protocol group to apply the replacements to. This is mainly used when applying this method recursively. """ if protocol_replacements is None: return if protocol_group_schema is None: protocol_schemas = {x.id: x for x in self.protocol_schemas} else: protocol_schemas = protocol_group_schema.protocol_schemas for protocol_schema_key in protocol_schemas: protocol_schema = protocol_schemas[protocol_schema_key] if protocol_schema.type not in protocol_replacements: continue protocol = protocol_schema.to_protocol() new_protocol = registered_workflow_protocols[protocol_replacements[ protocol_schema.type]](protocol_schema.id) for input_path in new_protocol.required_inputs: if input_path not in protocol.required_inputs: continue value = protocol.get_value(input_path) new_protocol.set_value(input_path, value) protocol_schemas[protocol_schema_key] = new_protocol.schema self.protocol_schemas.remove(protocol_schema) self.protocol_schemas.append(new_protocol.schema) if isinstance(protocol_schemas[protocol_schema_key], ProtocolGroupSchema): self.replace_protocol_types( protocol_replacements, protocol_schemas[protocol_schema_key]) def _find_protocols_to_be_replicated(self, replicator, protocols=None): """Finds all protocols which have been flagged to be replicated by a specified replicator. Parameters ---------- replicator: ProtocolReplicator The replicator of interest. protocols: dict of str and ProtocolSchema or list of ProtocolSchema, optional The protocols to search through. If None, then all protocols in this schema will be searched. Returns ------- list of str The ids of the protocols to be replicated by the specified replicator """ if protocols is None: protocols = {x.id: x for x in self.protocol_schemas} if isinstance(protocols, list): protocols = {protocol.id: protocol for protocol in protocols} protocols_to_replicate = [] for protocol_id, protocol in protocols.items(): if protocol_id.find(replicator.placeholder_id) >= 0: protocols_to_replicate.append(protocol_id) # Search through any children if not isinstance(protocol, ProtocolGroupSchema): continue protocols_to_replicate.extend( self._find_protocols_to_be_replicated( replicator, protocol.protocol_schemas)) return protocols_to_replicate def _get_unreplicated_path(self, protocol_path): """Checks to see if the protocol pointed to by this path will only exist after a replicator has been applied, and if so, returns a path to the unreplicated protocol. Parameters ---------- protocol_path: ProtocolPath The path to convert to an unreplicated path. Returns ------- ProtocolPath The path which should point to only unreplicated protocols """ if self.protocol_replicators == UNDEFINED: return protocol_path.copy() full_unreplicated_path = str(protocol_path.full_path) for replicator in self.protocol_replicators: if replicator.placeholder_id in full_unreplicated_path: continue protocols_to_replicate = self._find_protocols_to_be_replicated( replicator) for protocol_id in protocols_to_replicate: match_pattern = re.escape( protocol_id.replace(replicator.placeholder_id, r"\d+")) match_pattern = match_pattern.replace(re.escape(r"\d+"), r"\d+") full_unreplicated_path = re.sub(match_pattern, protocol_id, full_unreplicated_path) return ProtocolPath.from_string(full_unreplicated_path) @staticmethod def _get_unnested_protocol_path(protocol_path): """Returns a protocol path whose nested property name has been truncated to only include the top level name, e.g: `some_protocol_id.value.error` would be truncated to `some_protocol_id.value` and `some_protocol_id.value[1]` would be truncated to `some_protocol_id.value` Parameters ---------- protocol_path: ProtocolPath The path to truncate. Returns ------- ProtocolPath The truncated path. """ property_name = protocol_path.property_name # Remove any nested property names from the path if protocol_path.property_name.find(".") >= 0: property_name = property_name.split(".")[0] # Remove any array indices from the path if protocol_path.property_name.find("[") >= 0: property_name = property_name.split("[")[0] return ProtocolPath(property_name, *protocol_path.protocol_ids) def _validate_replicators(self, schemas_by_id): if self.protocol_replicators == UNDEFINED: return assert all( isinstance(x, ProtocolReplicator) for x in self.protocol_replicators) for replicator in self.protocol_replicators: assert replicator.id is not None and len(replicator.id) > 0 if not isinstance(replicator.template_values, list) and not isinstance( replicator.template_values, ProtocolPath): raise ValueError( "The template values of a replicator must either be " "a list of values, or a reference to a list of values.") if isinstance(replicator.template_values, list): for template_value in replicator.template_values: if not isinstance(template_value, ProtocolPath): continue if template_value.start_protocol not in schemas_by_id: raise ValueError( f"The value source {template_value} does not exist." ) elif isinstance(replicator.template_values, ProtocolPath): if not replicator.template_values.is_global: raise ValueError( "Template values must either be a constant, or come from the " "global scope.") if (self.final_value_source != UNDEFINED and self.final_value_source.protocol_path.find( replicator.placeholder_id) >= 0): raise ValueError("The final value source cannot come from" "a protocol which is being replicated.") def _validate_final_value(self, schemas_by_id): if self.final_value_source == UNDEFINED: return assert isinstance(self.final_value_source, ProtocolPath) if self.final_value_source.start_protocol not in schemas_by_id: raise ValueError( f"The value source {self.final_value_source} does not exist.") protocol_schema = schemas_by_id[self.final_value_source.start_protocol] protocol_object = protocol_schema.to_protocol() protocol_object.get_value(self.final_value_source) attribute_type = protocol_object.get_class_attribute( self.final_value_source).type_hint # TODO: In Python < 3.7 the Union type will collapse pint.Quantity # and pint.Measurement into pint.Quantity such that this check # will fail. For now we allow Measurements or Quantities, but # this should be reverted to just pint.Measurement when dropping # 3.6 support. if is_union_type(attribute_type): assert is_type_subclass_of_type(attribute_type, pint.Quantity) else: assert is_type_subclass_of_type(attribute_type, pint.Measurement) def _validate_gradients(self, schemas_by_id): if self.gradients_sources == UNDEFINED: return assert all(isinstance(x, ProtocolPath) for x in self.gradients_sources) for gradient_source in self.gradients_sources: if gradient_source.start_protocol not in schemas_by_id: raise ValueError( f"The gradient source {gradient_source} does not exist.") protocol_schema = schemas_by_id[gradient_source.start_protocol] protocol_object = protocol_schema.to_protocol() protocol_object.get_value(gradient_source) attribute_type = protocol_object.get_class_attribute( gradient_source).type_hint assert is_type_subclass_of_type(attribute_type, ParameterGradient) def _validate_outputs_to_store(self, schemas_by_id): """Validates that the references to the outputs to store are valid. """ if self.outputs_to_store == UNDEFINED: return assert all( isinstance(x, BaseStoredData) for x in self.outputs_to_store.values()) for output_label in self.outputs_to_store: output_to_store = self.outputs_to_store[output_label] output_to_store.validate() for attribute_name in output_to_store.get_attributes( StorageAttribute): attribute_value = getattr(output_to_store, attribute_name) if isinstance(attribute_value, ReplicatorValue): matching_replicas = [ x for x in self.protocol_replicators if attribute_value.replicator_id == x.id ] if len(matching_replicas) == 0: raise ValueError( f"An output to store is trying to take its value from a " f"replicator {attribute_value.replicator_id} which does " f"not exist.") if (not isinstance(attribute_value, ProtocolPath) or attribute_value.is_global): continue if attribute_value.start_protocol not in schemas_by_id: raise ValueError( f"The {attribute_value} source does not exist.") protocol_schema = schemas_by_id[attribute_value.start_protocol] protocol_object = protocol_schema.to_protocol() protocol_object.get_value(attribute_value) def _validate_interfaces(self, schemas_by_id): """Validates the flow of the data between protocols, ensuring that inputs and outputs correctly match up. """ for protocol_schema in schemas_by_id.values(): protocol_object = protocol_schema.to_protocol() for input_path in protocol_object.required_inputs: input_value = protocol_object.get_value(input_path) input_attribute = protocol_object.get_class_attribute( input_path) if not isinstance(input_attribute, InputAttribute): continue is_optional = input_attribute.optional if input_value == UNDEFINED and is_optional is False: raise ValueError( f"The {input_path} required input of protocol " f"{protocol_schema.id} was not set.") for input_path in protocol_object.required_inputs: value_references = protocol_object.get_value_references( input_path) for source_path, value_reference in value_references.items(): if value_reference.is_global: # We handle global input validation separately continue value_reference = self._get_unreplicated_path( value_reference) # Make sure the other protocol whose output we are interested # in actually exists. if (value_reference.start_protocol not in schemas_by_id and value_reference.start_protocol != protocol_object.id): raise ValueError( f"The {protocol_object.id} protocol tries to take input " f"from a non-existent protocol: {value_reference.full_path}" ) if value_reference.start_protocol != protocol_object.id: other_protocol_schema = schemas_by_id[ value_reference.start_protocol] other_protocol_object = other_protocol_schema.to_protocol( ) else: other_protocol_object = protocol_object unnested_value_reference = self._get_unnested_protocol_path( value_reference) unnested_source_path = self._get_unnested_protocol_path( source_path) # Make sure the other protocol has the output referenced # by this input. other_protocol_object.get_value(unnested_value_reference) # Do a very rudimentary type check between the input and # output types. This is not currently possible for nested # or indexed properties, or outputs of replicated protocols. if (value_reference.full_path != unnested_value_reference.full_path or source_path.full_path != unnested_source_path.full_path): continue is_replicated_reference = False protocol_replicators = self.protocol_replicators if protocol_replicators == UNDEFINED: protocol_replicators = [] for replicator in protocol_replicators: if (replicator.placeholder_id in protocol_schema.id and replicator.placeholder_id in value_reference.protocol_path) or ( replicator.placeholder_id not in protocol_schema.id and replicator.placeholder_id not in value_reference.protocol_path): continue is_replicated_reference = True break if is_replicated_reference: continue expected_input_type = protocol_object.get_class_attribute( unnested_source_path).type_hint expected_output_type = other_protocol_object.get_class_attribute( unnested_value_reference).type_hint if expected_input_type is None or expected_output_type is None: continue if not is_type_subclass_of_type(expected_output_type, expected_input_type): raise ValueError( f"The output type ({expected_output_type}) of " f"{value_reference} does not match the requested " f"input type ({expected_input_type}) of {source_path}." ) def validate(self, attribute_type=None): super(WorkflowSchema, self).validate(attribute_type) # Do some simple type checking. assert len(self.protocol_schemas) > 0 assert all( isinstance(x, ProtocolSchema) for x in self.protocol_schemas) schemas_by_id = {x.id: x for x in self.protocol_schemas} # Validate the different pieces of data to populate / draw from. self._validate_final_value(schemas_by_id) self._validate_gradients(schemas_by_id) self._validate_replicators(schemas_by_id) self._validate_outputs_to_store(schemas_by_id) # Validate the interfaces between protocols self._validate_interfaces(schemas_by_id)
class NestedAttributeObject(AttributeClass): some_value = Attribute("", AttributeObject) some_list = Attribute("", list, UNDEFINED, optional=True) some_dict = Attribute("", dict, UNDEFINED, optional=True)
class Substance(AttributeClass): """Defines the components, their amounts, and their roles in a system. Examples -------- A neat liquid containing only a single component: >>> from evaluator.substances import Component, ExactAmount, MoleFraction >>> liquid = Substance() >>> liquid.add_component(Component(smiles='O'), MoleFraction(1.0)) A binary mixture containing two components, where the mole fractions are explicitly stated: >>> binary_mixture = Substance() >>> binary_mixture.add_component(Component(smiles='O'), MoleFraction(0.2)) >>> binary_mixture.add_component(Component(smiles='CO'), MoleFraction(0.8)) The infinite dilution of one molecule within a bulk solvent or mixture may also be specified by defining the exact number of copies of that molecule, rather than a mole fraction: >>> benzene = Component(smiles='C1=CC=CC=C1', role=Component.Role.Solute) >>> water = Component(smiles='O', role=Component.Role.Solvent) >>> >>> infinite_dilution = Substance() >>> infinite_dilution.add_component(component=benzene, amount=ExactAmount(1)) # Infinite dilution. >>> infinite_dilution.add_component(component=water, amount=MoleFraction(1.0)) In this example we explicitly flag benzene as being the solute and the water component the solvent. This enables workflow's to easily identify key molecules of interest, such as the molecule which should be 'grown' into solution during solvation free energy calculations. """ components = Attribute( docstring="A list of all of the components in this substance.", type_hint=tuple, default_value=tuple(), read_only=True, ) amounts = Attribute( docstring="the amounts of the component in this substance", type_hint=dict, default_value=dict(), read_only=True, ) @property def identifier(self): """str: A unique str representation of this substance, which encodes all components and their amounts in the substance.""" return self._get_identifier() @property def number_of_components(self): """int: The number of different components in this substance.""" return len(self.components) def _get_identifier(self): """Generates a unique string identifier for this substance, which encodes all components and their amounts in the substance Returns ------- str The string identifier. """ component_identifiers = [component.identifier for component in self.components] component_identifiers.sort() identifier_split = [] for component_identifier in component_identifiers: component_amounts = sorted( self.amounts[component_identifier], key=lambda x: type(x).__name__ ) amount_identifier = ",".join( [component_amount.identifier for component_amount in component_amounts] ) identifier = f"{component_identifier}{{{amount_identifier}}}" identifier_split.append(identifier) return "|".join(identifier_split) @classmethod def from_components(cls, *components): """Creates a new `Substance` object from a list of components. This method assumes that all components should be present with equal mole fractions. Parameters ---------- components: Component or str The components to add to the substance. These may either be full `Component` objects or just the smiles representation of the component. Returns ------- Substance The substance containing the requested components in equal amounts. """ if len(components) == 0: raise ValueError("At least one component must be specified") mole_fraction = 1.0 / len(components) return_substance = cls() for component in components: if isinstance(component, str): component = Component(smiles=component) return_substance.add_component(component, MoleFraction(mole_fraction)) return return_substance def add_component(self, component, amount): """Add a component to the Substance. If the component is already present in the substance, then the mole fraction will be added to the current mole fraction of that component. Parameters ---------- component : Component The component to add to the system. amount : Amount The amount of this component in the substance. """ assert isinstance(component, Component) assert isinstance(amount, Amount) component.validate() amount.validate() if isinstance(amount, MoleFraction): total_mole_fraction = amount.value for component_identifier in self.amounts: total_mole_fraction += sum( [ amount.value for amount in self.amounts[component_identifier] if isinstance(amount, MoleFraction) ] ) if np.isclose(total_mole_fraction, 1.0): total_mole_fraction = 1.0 if total_mole_fraction > 1.0: raise ValueError( f"The total mole fraction of this substance {total_mole_fraction} exceeds 1.0" ) if component.identifier not in self.amounts: components = (*self.components, component) self._set_value("components", components) existing_amount_of_type = None all_amounts = ( [] if component.identifier not in self.amounts else self.amounts[component.identifier] ) remaining_amounts = [] # Check to see if an amount of the same type already exists in # the substance, such that this amount should be appended to it. for existing_amount in all_amounts: if not type(existing_amount) is type(amount): remaining_amounts.append(existing_amount) continue existing_amount_of_type = existing_amount break if existing_amount_of_type is not None: # Append any existing amounts to the new amount. amount = type(amount)(existing_amount_of_type.value + amount.value) remaining_amounts.append(amount) amounts = dict(self.amounts) amounts[component.identifier] = tuple(remaining_amounts) self._set_value("amounts", amounts) def get_amounts(self, component): """Returns the amounts of the component in this substance. Parameters ---------- component: str or Component The component (or it's identifier) to retrieve the amount of. Returns ------- tuple of Amount The amounts of the component in this substance. """ assert isinstance(component, str) or isinstance(component, Component) identifier = component if isinstance(component, str) else component.identifier return self.amounts[identifier] def get_molecules_per_component(self, maximum_molecules, tolerance=None): """Returns the number of molecules for each component in this substance, given a maximum total number of molecules. Parameters ---------- maximum_molecules: int The maximum number of molecules. tolerance: float, optional The tolerance within which this amount should be represented. As an example, when converting a mole fraction into a number of molecules, the total number of molecules may not be sufficiently large enough to reproduce this amount. Returns ------- dict of str and int A dictionary of molecule counts per component, where each key is a component identifier. """ number_of_molecules = {} remaining_molecule_slots = maximum_molecules for index, component in enumerate(self.components): amounts = self.amounts[component.identifier] for amount in amounts: if not isinstance(amount, ExactAmount): continue remaining_molecule_slots -= amount.value if remaining_molecule_slots < 0: raise ValueError( f"The required number of molecules {maximum_molecules - remaining_molecule_slots} " f"exceeds the provided maximum number ({maximum_molecules})." ) for component in self.components: number_of_molecules[component.identifier] = 0 for amount in self.amounts[component.identifier]: number_of_molecules[ component.identifier ] += amount.to_number_of_molecules(remaining_molecule_slots, tolerance) return number_of_molecules @staticmethod def calculate_aqueous_ionic_mole_fraction(ionic_strength): """Determines what mole fraction of ions is needed to yield an aqueous system of a given ionic strength. Parameters ---------- ionic_strength: pint.Quantity The ionic string in units of molar. Returns ------- float The mole fraction of ions. """ # Taken from YANK: # https://github.com/choderalab/yank/blob/4dfcc8e127c51c20180fe6caeb49fcb1f21730c6/Yank/pipeline.py#L1869 water_molarity = (998.23 * unit.gram / unit.litre) / ( 18.01528 * unit.gram / unit.mole ) ionic_mole_fraction = ionic_strength / (ionic_strength + water_molarity) return ionic_mole_fraction def __str__(self): return self.identifier def __repr__(self): return f"<Substance {str(self)}>" def __hash__(self): return hash(self.identifier) def __eq__(self, other): return type(self) == type(other) and hash(self) == hash(other) def __ne__(self, other): return not (self == other) def __setstate__(self, state): # Handle the list -> tuple conversion manually. assert "amounts" in state for key in state["amounts"]: assert isinstance(state["amounts"][key], (list, tuple)) state["amounts"][key] = tuple(state["amounts"][key]) super(Substance, self).__setstate__(state) def __len__(self): return len(self.components) def __iter__(self): return iter(self.components) def validate(self, attribute_type=None): super(Substance, self).validate(attribute_type) # Validate all of the components. assert all(isinstance(x, Component) for x in self.components) assert all(x.identifier in self.amounts for x in self.components) # Validate the amounts assert all(x.identifier in self.amounts for x in self.components) assert all(isinstance(x, tuple) for x in self.amounts.values()) assert all(len(x) > 0 for x in self.amounts.values()) for component in self.components: component.validate(attribute_type) amounts = self.amounts[component.identifier] assert all(isinstance(x, Amount) for x in amounts) for amount in amounts: amount.validate(attribute_type) contains_mole_fraction = any( isinstance(x, MoleFraction) for y in self.amounts.values() for x in y ) if contains_mole_fraction: total_mole_fraction = 0.0 for component_identifier in self.amounts: total_mole_fraction += sum( [ amount.value for amount in self.amounts[component_identifier] if isinstance(amount, MoleFraction) ] ) if not np.isclose(total_mole_fraction, 1.0): raise ValueError( f"The total mole fraction of this substance " f"({total_mole_fraction}) must equal 1.0" )
class Component(AttributeClass): """Defines a single component in a chemical system, as well as it's role within the system (if any). """ class Role(Enum): """An enum which describes the role of a component in the system, such as whether the component is a solvent, a solute, a receptor etc. These roles are mainly used by workflow to identify the correct species in a system, such as when doing docking or performing solvation free energy calculations. """ Solvent = "solv" Solute = "sol" Ligand = "lig" Receptor = "rec" smiles = Attribute( docstring="The SMILES pattern which describes this component.", type_hint=str, read_only=True, ) role = Attribute( docstring="The role of this component in the system.", type_hint=Role, default_value=Role.Solvent, read_only=True, ) @property def identifier(self): """str: A unique identifier for this component.""" return f"{self.smiles}{{{self.role.value}}}" def __init__(self, smiles=UNDEFINED, role=Role.Solvent): """Constructs a new Component object with either a label or a smiles string, but not both. Notes ----- The `label` and `smiles` arguments are mutually exclusive, and only one can be passed while the other should be `None`. Parameters ---------- smiles: str A SMILES descriptor of the component role: Component.Role The role of this component in the system. """ if smiles != UNDEFINED: smiles = self._standardize_smiles(smiles) self._set_value("smiles", smiles) self._set_value("role", role) @staticmethod def _standardize_smiles(smiles): """Standardizes a SMILES pattern to be canonical (but not necessarily isomeric) using the `cmiles` library. Parameters ---------- smiles: str The SMILES pattern to standardize. Returns ------- The standardized SMILES pattern. """ from cmiles.utils import load_molecule, mol_to_smiles molecule = load_molecule(smiles, toolkit="rdkit") try: # Try to make the smiles isomeric. smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False) except ValueError: # Fall-back to non-isomeric. smiles = mol_to_smiles(molecule, isomeric=False, explicit_hydrogen=False, mapped=False) return smiles def __str__(self): return self.identifier def __repr__(self): return f"<{self.__class__.__name__} {str(self)}>" def __hash__(self): return hash(self.identifier) def __eq__(self, other): return type(self) == type( other) and self.identifier == other.identifier def __ne__(self, other): return not (self == other) def __setstate__(self, state): # Make sure the smiles pattern is standardized. state["smiles"] = Component._standardize_smiles(state["smiles"]) super(Component, self).__setstate__(state)
class Batch(AttributeClass): """Represents a batch of physical properties which are being estimated by the server for a given set of force field parameters. The expectation is that this object will be passed between calculation layers, whereby each layer will attempt to estimate each of the `queued_properties`. Those properties which can be estimated will be moved to the `estimated_properties` set, while those that couldn't will remain in the `queued_properties` set ready for the next layer. """ id = Attribute( docstring="The unique id of this batch.", type_hint=str, default_value=lambda: str(uuid.uuid4()).replace("-", ""), ) force_field_id = Attribute( docstring="The id of the force field being used to estimate" "this batch of properties.", type_hint=str, ) options = Attribute( docstring="The options being used to estimate this batch.", type_hint=RequestOptions, ) parameter_gradient_keys = Attribute( docstring="The parameters that this batch of physical properties " "should be differentiated with respect to.", type_hint=list, ) queued_properties = Attribute( docstring="The set of properties which have yet to be estimated.", type_hint=list, default_value=[], ) estimated_properties = Attribute( docstring= "The set of properties which have been successfully estimated.", type_hint=list, default_value=[], ) unsuccessful_properties = Attribute( docstring= "The set of properties which have been could not be estimated.", type_hint=list, default_value=[], ) exceptions = Attribute( docstring="The set of properties which have yet to be, or " "are currently being estimated.", type_hint=list, default_value=[], ) def validate(self, attribute_type=None): super(Batch, self).validate(attribute_type) assert all( isinstance(x, PhysicalProperty) for x in self.queued_properties) assert all( isinstance(x, PhysicalProperty) for x in self.estimated_properties) assert all( isinstance(x, PhysicalProperty) for x in self.unsuccessful_properties) assert all(isinstance(x, EvaluatorException) for x in self.exceptions) assert all( isinstance(x, ParameterGradientKey) for x in self.parameter_gradient_keys)
class ThermodynamicState(AttributeClass): """Data specifying a physical thermodynamic state obeying Boltzmann statistics. Notes ----- Equality of two thermodynamic states is determined by comparing the temperature in kelvin to within 3 decimal places, and comparing the pressure (if defined) in pascals to within 3 decimal places. Examples -------- Specify an NPT state at 298 K and 1 atm pressure. >>> state = ThermodynamicState(temperature=298.0*unit.kelvin, pressure=1.0*unit.atmospheres) Note that the pressure is only relevant for periodic systems. """ temperature = Attribute(docstring="The external temperature.", type_hint=pint.Quantity) pressure = Attribute(docstring="The external pressure.", type_hint=pint.Quantity, optional=True) @property def inverse_beta(self): """Returns the temperature multiplied by the molar gas constant""" return (self.temperature * unit.molar_gas_constant).to(unit.kilojoule / unit.mole) @property def beta(self): """Returns one divided by the temperature multiplied by the molar gas constant""" return 1.0 / self.inverse_beta def __init__(self, temperature=None, pressure=None): """Constructs a new ThermodynamicState object. Parameters ---------- temperature : pint.Quantity The external temperature pressure : pint.Quantity The external pressure """ if temperature is not None: self.temperature = temperature if pressure is not None: self.pressure = pressure def validate(self, attribute_type=None): super(ThermodynamicState, self).validate(attribute_type) if self.pressure != UNDEFINED: self.pressure.to(unit.pascals) assert self.pressure > 0.0 * unit.pascals self.temperature.to(unit.kelvin) assert self.temperature > 0.0 * unit.kelvin def __repr__(self): return_value = f"ThermodynamicState T={self.temperature:~}" if self.pressure != UNDEFINED: return_value += f" P={self.pressure:~}" return return_value def __str__(self): return f"<{str(self)}>" def __hash__(self): temperature = self.temperature.to(unit.kelvin).magnitude pressure = (None if self.pressure == UNDEFINED else self.pressure.to( unit.pascal).magnitude) return hash((f"{temperature:.3f}", None if pressure is None else f"{pressure:.3f}")) def __eq__(self, other): if not isinstance(other, ThermodynamicState): return False return hash(self) == hash(other) def __ne__(self, other): return not (self == other)
class RequestOptions(AttributeClass): """The options to use when requesting a set of physical properties be estimated by the server. """ calculation_layers = Attribute( docstring="The calculation layers which may be used to " "estimate the set of physical properties. The order in which " "the layers appears in this list determines the order in which " "the layers will attempt to estimate the data set.", type_hint=list, default_value=["ReweightingLayer", "SimulationLayer"], ) calculation_schemas = Attribute( docstring="The schemas that each calculation layer should " "use when estimating the set of physical properties. The " "dictionary should be of the form [property_type][layer_type].", type_hint=dict, optional=True, ) batch_mode = Attribute( docstring="The way in which the server should batch together " "properties to estimate. Properties will only be marked as finished " "when all properties in a single batch are completed.", type_hint=BatchMode, default_value=BatchMode.SharedComponents, optional=True, ) def add_schema(self, layer_type, property_type, schema): """A convenience function for adding a calculation schema to the schema dictionary. Parameters ---------- layer_type: str or type of CalculationLayer The layer to associate the schema with. property_type: str or type of PhysicalProperty The class of property to associate the schema with. schema: CalculationSchema The schema to add. """ # Validate the schema. schema.validate() # Make sure the schema is compatible with the layer. assert layer_type in registered_calculation_layers calculation_layer = registered_calculation_layers[layer_type] assert type(schema) == calculation_layer.required_schema_type() if isinstance(property_type, type): property_type = property_type.__name__ if self.calculation_schemas == UNDEFINED: self.calculation_schemas = {} if property_type not in self.calculation_schemas: self.calculation_schemas[property_type] = {} if layer_type not in self.calculation_schemas[property_type]: self.calculation_schemas[property_type][layer_type] = {} self.calculation_schemas[property_type][layer_type] = schema def validate(self, attribute_type=None): super(RequestOptions, self).validate(attribute_type) assert all(isinstance(x, str) for x in self.calculation_layers) assert all(x in registered_calculation_layers for x in self.calculation_layers) if self.calculation_schemas != UNDEFINED: for property_type in self.calculation_schemas: assert isinstance(self.calculation_schemas[property_type], dict) for layer_type in self.calculation_schemas[property_type]: assert layer_type in self.calculation_layers calculation_layer = registered_calculation_layers[ layer_type] schema = self.calculation_schemas[property_type][ layer_type] required_type = calculation_layer.required_schema_type() assert isinstance(schema, required_type)
class Request(AttributeClass): """An estimation request which has been sent to a `EvaluatorServer` instance. This object can be used to query and retrieve the results of the request when finished, or be stored to retrieve the request at some point in the future.""" id = Attribute( docstring="The unique id assigned to this request by the server.", type_hint=str) connection_options = Attribute( docstring= "The options used to connect to the server handling the request.", type_hint=ConnectionOptions, ) def __init__(self, client=None): """ Parameters ---------- client: EvaluatorClient, optional The client which submitted this request. """ if client is not None: self.connection_options = ConnectionOptions() self.connection_options.server_address = client.server_address self.connection_options.server_port = client.server_port self._client = client def results(self, synchronous=False, polling_interval=5): """Attempt to retrieve the results of the request from the server. If the method is run synchronously it will block the main thread either all of the requested properties have been estimated, or an exception is returned. Parameters ---------- synchronous: bool If `True`, this method will block the main thread until the server either returns a result or an error. polling_interval: float If running synchronously, this is the time interval (seconds) between checking if the calculation has finished. This will be ignored if running asynchronously. Returns ------- RequestResult, optional Returns the current results of the request. This may be `None` if any unexpected exceptions occurred while retrieving the estimate. EvaluatorException, optional The exception raised will trying to retrieve the result if any. """ if (self._client is None or self._client.server_address != self._client.server_address or self._client.server_port != self._client.server_port): self.validate() self._client = EvaluatorClient(self.connection_options) return self._client.retrieve_results(self.id, synchronous, polling_interval) def __str__(self): return f"Request id={self.id}" def __repr__(self): return f"<{str(self)}>"