class CrossValidationAnalysisConfiguration( Serializable['CrossValidationAnalysisConfiguration']): """[DEPRECATED] Configuration settings for running cross-validation in a performance workflow. Parameters ---------- name : str Name of the analysis configuration description: str Description of the analysis configuration n_folds: int Number of folds n_trials: int Number of cross-validation trials to run, each with ``n_folds`` folds max_rows: int Maximum number of training candidates to use during cross-validation seed: int, optional Seed used to generate random test/train splits. If not provided, a random seed is used. group_by_keys: List[str], optional Set of keys used to group candidates. If present, candidates are grouped by the hash of ``(key, value)`` pairs computed on the given keys. If not provided, candidates are not grouped. responses: List[str], optional Set of descriptor keys to cross-validate. All requested responses must be present as an output of the predictor being analyzed. If not provided cross-validation metrics will be computed for all predictor responses. These cross-validated responses are removed from the data during the analysis, so which responses are requested can affect the performance metrics if the predictor contains latent variables. For example, if only the final output (leaf) responses are requested, latent variables are not removed during cross-validation. In this case the actual (and not predicted) values for latent variables are fed into the models used to compute leaf responses. Often this will manifest as a lower model error for the final response, compared to the model error computed when latent variables are requested and hence removed from the data. Note, if no responses are specified all leaf and latent variables are removed from the data during cross-validation. """ name = properties.String('name') description = properties.String('description') n_folds = properties.Integer('n_folds') n_trials = properties.Integer('n_trials') seed = properties.Optional(properties.Integer, 'seed') group_by_keys = properties.Optional(properties.List(properties.String), 'group_by_keys') responses = properties.Optional(properties.List(properties.String), 'responses') max_rows = properties.Integer('max_rows') typ = properties.String('type', default='CrossValidationAnalysis', deserializable=False) def __init__( self, name: str, description: str, n_folds: int, n_trials: int, max_rows: int, seed: Optional[int] = None, group_by_keys: Optional[List[str]] = None, responses: Optional[List[str]] = None, ): warn("{this_class} is deprecated. Please use {replacement} instead". format(this_class=self.__class__.name, replacement=CrossValidationEvaluator.__name__)) self.name = name self.description = description self.n_folds = n_folds self.n_trials = n_trials self.seed = seed self.group_by_keys = group_by_keys self.max_rows = max_rows self.responses = responses
class ChemicalFormulaFeaturizer(Resource['ChemicalFormulaFeaturizer'], Predictor, AIResourceMetadata): """ A featurizer for chemical formulae. Inspired by Magpie. The ChemicalFormulaFeaturizer computes a configurable set of features on chemical formula data. The features are functions of element-level properties, which are inspired by `Magpie <https://bitbucket.org/wolverton/magpie/src/master/>`_. The features are configured using the ``features`` and ``excludes`` arguments, which accept either feature names or predefined aliases. Many features are stoichiometrically weighted generalized means of element-level properties. How to compute the mean is configured using the ``powers`` argument. The default is the "standard" alias, corresponding to features that are intuitive and often correlate with properties of interest. Other aliases are "physical," "electronic," and "periodicTable." The following features are weighted means of simple elemental properties. - "Pauling electronegativity": standard, electronic - "Number of d valence electrons": standard, electronic - "Number of unfilled f valence electrons": standard, electronic - "Number of f valence electrons": standard, electronic - "Number of unfilled p valence electrons": standard, electronic - "Number of p valence electrons": standard, electronic - "Number of unfilled s valence electrons": standard, electronic - "Number of s valence electrons": standard, electronic - "Total number of unfilled valence electrons": standard, electronic - "Total number of valence electrons": standard, electronic - "Elemental work function": standard, electronic - "Elemental polarizability": standard, electronic - "Radius of d orbitals": standard, electronic - "Radius of s orbitals": standard, electronic - "Radius of p orbitals": standard, electronic - "Elemental magnetic moment": standard, electronic - "Elemental atomic volume": standard, electronic, physical - "Elemental electron density": standard, electronic - "Mendeleev number": standard, periodicTable - "Row in periodic table": standard, periodicTable - "Elemental bulk modulus": standard, physical - "Elemental density": standard, physical - "Elemental melting temperature": standard, physical - "Elemental crystal structure (space group)": standard, electronic, physical - "AtomicVolume": electronic, physical - "Number": periodicTable - "CovalentRadius": electronic, physical - "DipolePolarizability": electronic - "ElectronAffinity": electronic - "FirstIonizationEnergy": electronic - "GSbandgap": electronic - "GSenergy_pa": electronic - "GSestBCClatcnt": electronic, physical - "GSvolume_pa": electronic, physical - "MiracleRadius": electronic, physical - "NdUnfilled": electronic - "ZungerPP-r_pi": electronic - "AtomicWeight": physical, periodicTable - "Column in periodic table": periodicTable - "IsAlkali": periodicTable - "IsDBlock": periodicTable - "IsFBlock": periodicTable - "IsMetal": periodicTable - "IsNonmetal": periodicTable - "BoilingT": physical - "FusionEnthalpy": physical - "HeatCapacityMass": physical - "HeatCapacityMolar": physical - "HeatFusion": physical - "ShearModulus": physical - "ValenceZeff": electronic, physical The following features are weighted means of more complex elemental properties. - "Packing density": standard, physical - "Liquid range": standard, physical - "Non-dimensional liquid range": standard, physical - "Liquid ratio": standard, physical - "Elastic Poisson Ratio": standard, physical - "DFT energy density": standard, electronic, physical - "Interatomic distance": standard, physical - "Ionization Affinity Ratio": standard, electronic - "Ratio of Electron Affinity to Electronegativity": standard, electronic - "Trouton's Ratio": standard, physical - "Miracle Ratio": standard, electronic - "DFT volume ratio": standard, physical - "Mulliken electronegativity": standard, electronic - "Modulii sum": standard, physical - "Zunger Pseudopotential radius ratio": standard, electronic - "BCC Efficiency": standard, physical - "Non-dimensional heat of fusion": standard, physical - "Non-dimensional band gap": standard, electronic - "Conduction ionization energy": standard, electronic - "Valence electron density": standard, electronic - "Non-dimensional work function": standard, electronic - "Shear Modulus Melting Temp Product": standard, physical The following features are not weighted means. Their values do not depend on ``powers``. - "Maximum electronegativity difference": standard, electronic - "Maximum radius difference": standard, electronic, physical - "Maximum radius ratio": standard, electronic, physical - "Min atomic radius plus max electronegativity difference": standard, electronic, physical - "Number of elements" - "Minimum atomic fraction" - "Maximum atomic fraction" - "Minimum weight fraction": standard, periodicTable - "Maximum weight fraction": standard, periodicTable - "Formula weight": standard, physical Parameters ---------- input_descriptor: ChemicalFormulaDescriptor the descriptor to featurize features: Optional[List[str]] The list of features to compute, either by name or by group alias. Default is "standard." excludes: Optional[List[str]] The list of features to exclude, either by name or by group alias. Default is none. The final set of features generated by the predictor is set(features) - set(excludes). powers: Optional[List[int]] The list of powers to use when computing generalized weighted means of element properties. p=1 corresponds to the ordinary mean, p=2 is the root mean square, etc. """ _resource_type = ResourceTypeEnum.MODULE input_descriptor = _properties.Object(Descriptor, 'config.input') features = _properties.List(_properties.String, 'config.features') excludes = _properties.List(_properties.String, 'config.excludes') powers = _properties.List(_properties.Integer, 'config.powers') typ = _properties.String('config.type', default='ChemicalFormulaFeaturizer', deserializable=False) module_type = _properties.String('module_type', default='PREDICTOR') def __init__(self, name: str, description: str, input_descriptor: ChemicalFormulaDescriptor, features: Optional[List[str]] = None, excludes: Optional[List[str]] = None, powers: Optional[List[int]] = None): self.name = name self.description = description self.input_descriptor = input_descriptor self.features = features if features is not None else ["standard"] self.excludes = excludes if excludes is not None else [] self.powers = powers if powers is not None else [1] def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<ChemicalFormulaFeaturizer {!r}>'.format(self.name)
def test_list_property_serde(sub_prop, sub_value, sub_serialized): prop = properties.List(sub_prop) value = [sub_value for _ in range(5)] serialized = [sub_serialized for _ in range(5)] assert prop.deserialize(serialized) == value assert prop.serialize(value) == serialized
class IngredientLabelsSetInOutput(Serializable['IngredientLabelsSetInOutput'], Variable): """[ALPHA] The set of labels on an ingredient in the trunk of a material history tree. The search for an ingredient starts at the terminal of the material history tree and proceeds until any of the given process templates are reached. Those templates block the search from continuing but are inclusive: a match is extracted if an ingredient with the specified ingredient name is found at or before a cutoff. This variable definition allows a set of labels to be extracted when an ingredient is used in multiple processes. As an example, consider a paint formed by mixing red and yellow pigments, where the red pigment is formed by mixing yellow and magenta. This variable could be used to represent the labels applied to yellow in both mixing processes (red and the final paint) in a single column provided the process templates that mixed red and the final paint are included as cutoffs. In general, this variable should be preferred over an :class:`~citrine.gemtables.variables.IngredientLabelSetByProcessTemplateAndName` when mixtures are hierarchical (i.e., blends of blends). It allows an ingredient with a single name to be used in multiple processes without defining additional variables that manifest as additional columns in your GEM table, and must be used in place of the former if the same process template is used to represent mixing at multiple levels in the material history hierarchy. Going back to the previous example, this variable must be used in place of an :class:`~citrine.gemtables.variables.IngredientLabelSetByProcessTemplateAndName` if the same process template was used to represent the process that mixed red and the final paint. Using :class:`~citrine.gemtables.variables.IngredientLabelSetByProcessTemplateAndName` would result in an ambiguous match because yellow would be found twice in the material history, once when mixing red and again when mixing the final paint. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers process_templates: list[LinkByUID] process templates that should not be traversed through when searching for a matching attribute. The attribute may be present in these processes but not their ingredients. ingredient_name: str name of ingredient """ name = properties.String('name') headers = properties.List(properties.String, 'headers') process_templates = properties.List(properties.Object(LinkByUID), 'process_templates') ingredient_name = properties.String('ingredient_name') typ = properties.String('type', default="ing_label_set_in_output", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "process_templates", "ingredient_name", "typ" ] def __init__(self, *, name: str, headers: List[str], process_templates: List[LinkByUID], ingredient_name: str): self.name = name self.headers = headers self.process_templates = process_templates self.ingredient_name = ingredient_name
class IngredientQuantityInOutput(Serializable['IngredientQuantityInOutput'], Variable): """[ALPHA] Ingredient quantity in the trunk of a material history tree. The search for an ingredient starts at the terminal of the material history tree and proceeds until any of the given process templates are reached. Those templates block the search from continuing but are inclusive: a match is extracted if an ingredient with the specified ingredient name is found at or before a cutoff. This variable definition allows a quantity to be extracted when an ingredient is used in multiple processes. As an example, consider a paint formed by mixing red and yellow pigments, where the red pigment is formed by mixing yellow and magenta. This variable could be used to represent the quantity of yellow in both mixing processes (red and the final paint) in a single column provided the process templates that mixed red and the final paint are included as cutoffs. In general, this variable should be preferred over an :class:`~citrine.gemtables.variables.IngredientQuantityByProcessTemplateAndName` when mixtures are hierarchical (i.e., blends of blends). It allows an ingredient with a single name to be used in multiple processes without defining additional variables that manifest as additional columns in your table, and must be used in place of the former if the same process template is used to represent mixing at multiple levels in the material history hierarchy. Going back to the previous example, this variable must be used in place of an :class:`~citrine.gemtables.variables.IngredientQuantityByProcessTemplateAndName` if the same process template was used to represent the process that mixed red and the final paint. Using :class:`~citrine.gemtables.variables.IngredientQuantityByProcessTemplateAndName` would result in an ambiguous match because yellow would be found twice in the material history, once when mixing red and again when mixing the final paint. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers ingredient_name: str Name of the ingredient to search for quantity_dimension: IngredientQuantityDimension Dimension of the ingredient quantity: absolute quantity, number, mass, or volume fraction. Valid options are defined by :class:`~citrine.gemtables.variables.IngredientQuantityDimension` process_templates: list[LinkByUID] Process templates halt the search for a matching ingredient name. These process templates are inclusive. The ingredient may be present in these processes but not before. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN unit: str an optional unit: only ingredient quantities that are convertible to this unit will be matched. note that this parameter is mandatory when quantity_dimension is IngredientQuantityDimension.ABSOLUTE. """ name = properties.String('name') headers = properties.List(properties.String, 'headers') ingredient_name = properties.String('ingredient_name') quantity_dimension = properties.Enumeration(IngredientQuantityDimension, 'quantity_dimension') process_templates = properties.List(properties.Object(LinkByUID), 'process_templates') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") unit = properties.Optional(properties.String, "unit") typ = properties.String('type', default="ing_quantity_in_output", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "ingredient_name", "process_templates", "type_selector", "unit", "typ" ] def __init__( self, *, name: str, headers: List[str], ingredient_name: str, quantity_dimension: IngredientQuantityDimension, process_templates: List[LinkByUID], type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN, unit: Optional[str] = None): self.name = name self.headers = headers self.ingredient_name = ingredient_name self.process_templates = process_templates self.type_selector = type_selector # Cast to make sure the string is valid if not isinstance(quantity_dimension, IngredientQuantityDimension): quantity_dimension = IngredientQuantityDimension.get_enum( quantity_dimension) self.quantity_dimension = quantity_dimension if quantity_dimension == IngredientQuantityDimension.ABSOLUTE: if unit is None: raise ValueError( "Absolute Quantity variables require that 'unit' is set") else: if unit is not None and unit != "": raise ValueError("Fractional variables cannot take a 'unit'") self.unit = unit
class IngredientQuantityByProcessAndName( Serializable['IngredientQuantityByProcessAndName'], Variable): """[ALPHA] The quantity of an ingredient associated with a process template and a name. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers process_template: LinkByUID process template associated with this ingredient identifier ingredient_name: str name of ingredient quantity_dimension: IngredientQuantityDimension Dimension of the ingredient quantity: absolute quantity, number, mass, or volume fraction. Valid options are defined by :class:`~citrine.gemtables.variables.IngredientQuantityDimension` type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN unit: str An optional unit: only ingredient quantities that are convertible to this unit will be matched. Note that this parameter is mandatory when quantity_dimension is IngredientQuantityDimension.ABSOLUTE. """ name = properties.String('name') headers = properties.List(properties.String, 'headers') process_template = properties.Object(LinkByUID, 'process_template') ingredient_name = properties.String('ingredient_name') quantity_dimension = properties.Enumeration(IngredientQuantityDimension, 'quantity_dimension') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="ing_quantity_by_process_and_name", deserializable=False) unit = properties.Optional(properties.String, "unit") def _attrs(self) -> List[str]: return [ "name", "headers", "process_template", "ingredient_name", "quantity_dimension", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], process_template: LinkByUID, ingredient_name: str, quantity_dimension: IngredientQuantityDimension, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN, unit: Optional[str] = None): self.name = name self.headers = headers self.process_template = process_template self.ingredient_name = ingredient_name self.type_selector = type_selector # Cast to make sure the string is valid if not isinstance(quantity_dimension, IngredientQuantityDimension): quantity_dimension = IngredientQuantityDimension.get_enum( quantity_dimension) self.quantity_dimension = quantity_dimension if quantity_dimension == IngredientQuantityDimension.ABSOLUTE: if unit is None: raise ValueError( "Absolute Quantity variables require that 'unit' is set") else: if unit is not None and unit != "": raise ValueError("Fractional variables cannot take a 'unit'") self.unit = unit
class AttributeInOutput(Serializable['AttributeInOutput'], Variable): """[ALPHA] Attribute marked by an attribute template in the trunk of the history tree. The search for an attribute that marks the given attribute template starts at the terminal of the material history tree and proceeds until any of the given process templates are reached. Those templates block the search from continuing into their ingredients but do not halt the search entirely. This variable definition allows attributes that are present both in output and the inputs of a process to be distinguished. For example, a material "paint" might be produced by mixing and then resting "pigments" and a "base". The color of the pigments and base could be measured and recorded as attributes in addition to the color of the resulting paint. To define a variable as the color of the resulting paint, AttributeInOutput can be used with the mixing process included in the list of process templates. Then, when the platform looks for the color of a paint, it will find it but *won't* traverse through the mixing process and also find the colors of the pigments and base, which would result in an ambiguous variable match. Unlike "AttributeByTemplateAfterProcess", AttributeInOutput will also match on the color attribute of the pigments in the rows that correspond to those pigments. This way, all the colors can be assigned to the same variable and rendered into the same columns in the GEM table. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers attribute_template: LinkByUID attribute template that identifies the attribute to assign to the variable process_templates: list[LinkByUID] process templates that should not be traversed through when searching for a matching attribute. The attribute may be present in these processes but not their ingredients. attribute_constraints: Optional[list[list[LinkByUID, Bounds]]] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') attribute_template = properties.Object(LinkByUID, 'attribute_template') process_templates = properties.List(properties.Object(LinkByUID), 'process_templates') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)])), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_in_trunk", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "attribute_template", "process_templates", "attribute_constraints", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], attribute_template: LinkByUID, process_templates: List[LinkByUID], attribute_constraints: Optional[List[List[Union[LinkByUID, BaseBounds]]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.attribute_template = attribute_template self.process_templates = process_templates self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class AttributeByTemplateAndObjectTemplate( Serializable['AttributeByTemplateAndObjectTemplate'], Variable): """[ALPHA] Attribute marked by an attribute template and an object template. For example, one property may be measured by two different measurement techniques. In this case, that property would have the same attribute template. Filtering by measurement templates, which identify the measurement techniques, disambiguates the technique used to measure that otherwise ambiguous property. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers attribute_template: LinkByUID attribute template that identifies the attribute to assign to the variable object_template: LinkByUID template that identifies the associated object attribute_constraints: list[list[LinkByUID, Bounds]] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') attribute_template = properties.Object(LinkByUID, 'attribute_template') object_template = properties.Object(LinkByUID, 'object_template') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)])), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_by_object", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "attribute_template", "object_template", "attribute_constraints", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], attribute_template: LinkByUID, object_template: LinkByUID, attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.attribute_template = attribute_template self.object_template = object_template self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class AttributeByTemplateAfterProcessTemplate( Serializable['AttributeByTemplateAfterProcessTemplate'], Variable): """[ALPHA] Attribute of an object marked by an attribute template and a parent process template. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers attribute_template: LinkByUID attribute template that identifies the attribute to assign to the variable process_template: LinkByUID process template that identifies the originating process attribute_constraints: list[list[LinkByUID, Bounds]] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') attribute_template = properties.Object(LinkByUID, 'attribute_template') process_template = properties.Object(LinkByUID, 'process_template') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)])), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_after_process", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "attribute_template", "process_template", "attribute_constraints", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], attribute_template: LinkByUID, process_template: LinkByUID, attribute_constraints: Optional[List[List[Union[LinkByUID, BaseBounds]]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.attribute_template = attribute_template self.process_template = process_template self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class DesignExecution(Resource['DesignExecution'], Pageable, AsynchronousObject): """The execution of a DesignWorkflow. Possible statuses are INPROGRESS, SUCCEEDED, and FAILED. Design executions also have a ``status_description`` field with more information. Parameters ---------- project_id: str Unique identifier of the project that contains the workflow execution """ _paginator: Paginator = Paginator() _collection_key = 'response' uid: UUID = properties.UUID('id', serializable=False) """:UUID: Unique identifier of the workflow execution""" workflow_id = properties.UUID('workflow_id', serializable=False) """:UUID: Unique identifier of the workflow that was executed""" version_number = properties.Integer("version_number", serializable=False) """:int: Integer identifier that increases each time the workflow is executed. The first execution has version_number = 1.""" status = properties.Optional(properties.String(), 'status', serializable=False) """:Optional[str]: short description of the execution's status""" status_description = properties.Optional(properties.String(), 'status_description', serializable=False) """:Optional[str]: more detailed description of the execution's status""" status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) """:Optional[List[str]]: human-readable explanations of the status""" experimental = properties.Boolean("experimental", serializable=False, default=True) """:bool: whether the execution is experimental (newer, less well-tested functionality)""" experimental_reasons = properties.Optional(properties.List( properties.String()), 'experimental_reasons', serializable=False) """:Optional[List[str]]: human-readable reasons why the execution is experimental""" created_by = properties.Optional(properties.UUID, 'created_by', serializable=False) """:Optional[UUID]: id of the user who created the resource""" updated_by = properties.Optional(properties.UUID, 'updated_by', serializable=False) """:Optional[UUID]: id of the user who most recently updated the resource, if it has been updated""" create_time = properties.Optional(properties.Datetime, 'create_time', serializable=False) """:Optional[datetime]: date and time at which the resource was created""" update_time = properties.Optional(properties.Datetime, 'update_time', serializable=False) """:Optional[datetime]: date and time at which the resource was most recently updated, if it has been updated""" score = properties.Object(Score, 'score') """:Score: score by which this execution was evaluated""" descriptors = properties.List(properties.Object(Descriptor), 'descriptors') """:List[Descriptor]: all of the descriptors in the candidates generated by this execution""" def __init__(self): """This shouldn't be called, but it defines members that are set elsewhere.""" self.project_id: Optional[UUID] = None # pragma: no cover self.session: Optional[Session] = None # pragma: no cover def __str__(self): return '<DesignExecution {!r}>'.format(str(self.uid)) def _path(self): return '/projects/{project_id}/design-workflows/{workflow_id}/executions/{execution_id}' \ .format(project_id=self.project_id, workflow_id=self.workflow_id, execution_id=self.uid) def in_progress(self) -> bool: """Whether design execution is in progress. Does not query state.""" return self.status == "INPROGRESS" def succeeded(self) -> bool: """Whether design execution has completed successfully. Does not query state.""" return self.status == "SUCCEEDED" def failed(self) -> bool: """Whether design execution has completed unsuccessfully. Does not query state.""" return self.status == "FAILED" @classmethod def _build_candidates( cls, subset_collection: Iterable[dict]) -> Iterable[DesignCandidate]: for candidate in subset_collection: yield DesignCandidate.build(candidate) def candidates( self, page: Optional[int] = None, per_page: int = 100, ) -> Iterable[DesignCandidate]: """Fetch the Design Candidates for the particular execution, paginated.""" path = self._path() + '/candidates' fetcher = partial(self._fetch_page, path=path) return self._paginator.paginate( page_fetcher=fetcher, collection_builder=self._build_candidates, page=page, per_page=per_page)
class TableConfig(Resource["TableConfig"]): """ [ALPHA] The Table Configuration used to build GEM Tables. Parameters ---------- name: str Name of the Table Configuration description: str Description of the Table Configuration datasets: list[UUID] Datasets that are in scope for the table, as a list of dataset uuids variables: list[Variable] Variable definitions, which define data from the material histories to use in the columns rows: list[Row] List of row definitions that define the rows of the table columns: list[Column] Column definitions, which describe how the variables are shaped into the table """ # FIXME (DML): rename this (this is dependent on the server side) _response_key = "ara_definition" @staticmethod def _get_dups(lst: List) -> List: # Hmmn, this looks like a potentially costly operation?! return [x for x in lst if lst.count(x) > 1] config_uid = properties.Optional(properties.UUID(), 'definition_id') version_uid = properties.Optional(properties.UUID(), 'id') version_number = properties.Optional(properties.Integer, 'version_number') name = properties.String("name") description = properties.String("description") datasets = properties.List(properties.UUID, "datasets") variables = properties.List(properties.Object(Variable), "variables") rows = properties.List(properties.Object(Row), "rows") columns = properties.List(properties.Object(Column), "columns") # Provide some backwards compatible support for definition_uid, redirecting to config_uid @property def definition_uid(self): """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead.""" from warnings import warn warn( "definition_uid is deprecated and will soon be removed. " "Please use config_uid instead", DeprecationWarning) return self.config_uid @definition_uid.setter def definition_uid(self, value): # pragma: no cover """[[DEPRECATED]] This is a deprecated alias to config_uid. Please use that instead.""" from warnings import warn warn( "definition_uid is deprecated and will soon be removed. " "Please use config_uid instead", DeprecationWarning) self.config_uid = value def __init__(self, *, name: str, description: str, datasets: List[UUID], variables: List[Variable], rows: List[Row], columns: List[Column], version_uid: Optional[UUID] = None, version_number: Optional[int] = None, definition_uid: Optional[UUID] = None, config_uid: Optional[UUID] = None): self.name = name self.description = description self.datasets = datasets self.rows = rows self.variables = variables self.columns = columns self.version_uid = version_uid self.version_number = version_number if config_uid is not None: assert definition_uid is None, "Please supply config_uid " \ "instead of definition_uid, and not both" self.config_uid = config_uid else: self.config_uid = definition_uid # Note that these validations only apply at construction time. The current intended usage # is for this object to be created holistically; if changed, then these will need # to move into setters. names = [x.name for x in variables] dup_names = self._get_dups(names) if len(dup_names) > 0: raise ValueError("Multiple variables defined these names," " which much be unique: {}".format(dup_names)) headers = [x.headers for x in variables] dup_headers = self._get_dups(headers) if len(dup_headers) > 0: raise ValueError("Multiple variables defined these headers," " which much be unique: {}".format(dup_headers)) missing_variables = [ x.data_source for x in columns if x.data_source not in names ] if len(missing_variables) > 0: raise ValueError( "The data_source of the columns must match one of the variable names," " but {} were missing".format(missing_variables)) def add_columns(self, *, variable: Variable, columns: List[Column], name: Optional[str] = None, description: Optional[str] = None) -> 'TableConfig': """[ALPHA] Add a variable and one or more columns to this TableConfig (out-of-place). This method checks that the variable name is not already in use and that the columns only reference that variable. It is *not* able to check if the columns and the variable are compatible (yet, at least). Parameters ---------- variable: Variable Variable to add and use in the added columns columns: list[Column] Columns to add, which must only reference the added variable name: Optional[str] Optional renaming of the table description: Optional[str] Optional re-description of the table """ if variable.name in [x.name for x in self.variables]: raise ValueError("The variable name {} is already used".format( variable.name)) mismatched_data_source = [ x for x in columns if x.data_source != variable.name ] if len(mismatched_data_source): raise ValueError( "Column.data_source must be {} but found {}".format( variable.name, mismatched_data_source)) return TableConfig(name=name or self.name, description=description or self.description, datasets=copy(self.datasets), rows=copy(self.rows), variables=copy(self.variables) + [variable], columns=copy(self.columns) + columns, config_uid=copy(self.config_uid)) def add_all_ingredients(self, *, process_template: LinkByUID, project, quantity_dimension: IngredientQuantityDimension, scope: str = CITRINE_SCOPE): """[ALPHA] Add variables and columns for all of the possible ingredients in a process. For each allowed ingredient name in the process template there is a column for the if of the ingredient and a column for the quantity of the ingredient. If the quantities are given in absolute amounts then there is also a column for units. Parameters ------------ process_template: LinkByUID scope and id of a registered process template project: Project a project that has access to the process template quantity_dimension: IngredientQuantityDimension the dimension in which to report ingredient quantities scope: Optional[str] the scope for which to get ingredient ids (default is Citrine scope, 'id') """ dimension_display = { IngredientQuantityDimension.ABSOLUTE: "absolute quantity", IngredientQuantityDimension.MASS: "mass fraction", IngredientQuantityDimension.VOLUME: "volume fraction", IngredientQuantityDimension.NUMBER: "number fraction" } process: ProcessTemplate = project.process_templates.get( uid=process_template.id, scope=process_template.scope) if not process.allowed_names: raise RuntimeError( "Cannot add ingredients for process template \'{}\' because it has no defined " "ingredients (allowed_names is not defined).".format( process.name)) new_variables = [] new_columns = [] for name in process.allowed_names: identifier_variable = IngredientIdentifierByProcessTemplateAndName( name='_'.join([ process.name, name, str(hash(process_template.id + name + scope)) ]), headers=[process.name, name, scope], process_template=process_template, ingredient_name=name, scope=scope) quantity_variable = IngredientQuantityByProcessAndName( name='_'.join([ process.name, name, str( hash(process_template.id + name + dimension_display[quantity_dimension])) ]), headers=[ process.name, name, dimension_display[quantity_dimension] ], process_template=process_template, ingredient_name=name, quantity_dimension=quantity_dimension) if identifier_variable.name not in [ var.name for var in self.variables ]: new_variables.append(identifier_variable) new_columns.append( IdentityColumn(data_source=identifier_variable.name)) new_variables.append(quantity_variable) new_columns.append(MeanColumn(data_source=quantity_variable.name)) if quantity_dimension == IngredientQuantityDimension.ABSOLUTE: new_columns.append( OriginalUnitsColumn(data_source=quantity_variable.name)) return TableConfig(name=self.name, description=self.description, datasets=copy(self.datasets), rows=copy(self.rows), variables=copy(self.variables) + new_variables, columns=copy(self.columns) + new_columns, config_uid=copy(self.config_uid))
class AutoMLPredictor(Resource['AutoMLPredictor'], Predictor, AIResourceMetadata): """[ALPHA] A predictor interface that builds a single ML model. The model uses the set of inputs to predict the output. Only one value for output is currently supported. Only one machine learning model is built. Parameters ---------- name: str name of the configuration description: str the description of the predictor inputs: list[Descriptor] Descriptors that represent inputs to the model output: Descriptor A single Descriptor that represents the output of the model training_data: Optional[List[DataSource]] Sources of training data. Each can be either a CSV or an GEM Table. Candidates from multiple data sources will be combined into a flattened list and de-duplicated by uid and identifiers. De-duplication is performed if a uid or identifier is shared between two or more rows. The content of a de-duplicated row will contain the union of data across all rows that share the same uid or at least 1 identifier. Training data is unnecessary if the predictor is part of a graph that includes all training data required by this predictor. """ _resource_type = ResourceTypeEnum.MODULE inputs = _properties.List(_properties.Object(Descriptor), 'config.inputs') output = _properties.Object(Descriptor, 'output') training_data = _properties.List(_properties.Object(DataSource), 'config.training_data') typ = _properties.String('config.type', default='AutoML', deserializable=False) module_type = _properties.String('module_type', default='PREDICTOR') def __init__(self, name: str, description: str, output: Descriptor, inputs: List[Descriptor], training_data: Optional[List[DataSource]] = None, archived: bool = False): self.name: str = name self.description: str = description self.inputs: List[Descriptor] = inputs self.output: Descriptor = output self.training_data: List[DataSource] = self._wrap_training_data( training_data) self.archived: bool = archived def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] data['config']['outputs'] = [data['output']] data['config']['responses'] = [data['output']] return data @classmethod def _pre_build(cls, data: dict) -> dict: if 'outputs' in data['config']: data['output'] = data['config']['outputs'][0] elif 'responses' in data['config']: data['output'] = data['config']['responses'][0] return data def __str__(self): return '<AutoMLPredictor {!r}>'.format(self.name)