class DesignSpace(Module): """A Citrine Design Space describes the set of materials that can be made. Abstract type that returns the proper type given a serialized dict. """ _project_id: Optional[UUID] = None _session: Optional[Session] = None uid = properties.Optional(properties.UUID, 'id', serializable=False) """:Optional[UUID]: Citrine Platform unique identifier""" name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') @classmethod def get_type(cls, data) -> Type[Serializable]: """Return the subtype.""" from .data_source_design_space import DataSourceDesignSpace from .enumerated_design_space import EnumeratedDesignSpace from .formulation_design_space import FormulationDesignSpace from .product_design_space import ProductDesignSpace return { 'Univariate': ProductDesignSpace, 'ProductDesignSpace': ProductDesignSpace, 'EnumeratedDesignSpace': EnumeratedDesignSpace, 'FormulationDesignSpace': FormulationDesignSpace, 'DataSourceDesignSpace': DataSourceDesignSpace }[data['config']['type']]
class ScalarMinObjective(Serializable['ScalarMinObjective'], Objective): """Simple single-response minimization objective with optional bounds on the objective space. Parameters ---------- descriptor_key: str the key from which to pull the values lower_bound: float the lower bound on the space, e.g. 0 for a non-negative property upper_bound: float the upper bound on the space, e.g. 0 for a non-positive property """ descriptor_key = properties.String('descriptor_key') lower_bound = properties.Optional(properties.Float, 'lower_bound') upper_bound = properties.Optional(properties.Float, 'upper_bound') typ = properties.String('type', default='ScalarMin') def __init__(self, descriptor_key: str, lower_bound: Optional[float] = None, upper_bound: Optional[float] = None, session: Optional[Session] = None): self.descriptor_key = descriptor_key self.lower_bound = lower_bound self.upper_bound = upper_bound self.session: Optional[Session] = session def __str__(self): return '<ScalarMinObjective {!r}>'.format(self.descriptor_key)
class EnumeratedDesignSpace(Resource['EnumeratedDesignSpace'], DesignSpace): """Design space composed of an explicit enumeration of candidate materials to score. Note that every candidate must have exactly the descriptors in the list populated (no more, no less) to be included. Parameters ---------- name:str the name of the design space description:str the description of the design space descriptors: list[Descriptor] the list of descriptors included in the candidates of the design space data: list[dict] list of dicts of the shape `{<descriptor_key>: <descriptor_value>}` where each dict corresponds to a candidate in the design space """ _response_key = None uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') descriptors = properties.List(properties.Object(Descriptor), 'config.descriptors') data = properties.List( properties.Mapping(properties.String, properties.Raw), 'config.data') typ = properties.String('config.type', default='EnumeratedDesignSpace', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='DESIGN_SPACE') schema_id = properties.UUID( 'schema_id', default=UUID('f3907a58-aa46-462c-8837-a5aa9605e79e')) def __init__(self, name: str, description: str, descriptors: List[Descriptor], data: List[Mapping[str, Any]], session: Session = Session()): self.name: str = name self.description: str = description self.descriptors: List[Descriptor] = descriptors self.data: List[Mapping[str, Any]] = data self.session: Session = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<EnumeratedDesignSpace {!r}>'.format(self.name)
class DesignWorkflow(Resource['DesignWorkflow'], Workflow): """Object that generates scored materials that may approach higher values of the score. Parameters ---------- name: str the name of the workflow design_space_id: UUID the UUID corresponding to the design space to use processor_id: UUID the UUID corresponding to the processor to use predictor_id: UUID the UUID corresponding to the predictor to use project_id: UUID the UUID corresponding to the project to use """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('display_name') status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # TODO: Figure out how to make these fields richer/use actual objects design_space_id = properties.UUID('modules.design_space_id') processor_id = properties.UUID('modules.processor_id') predictor_id = properties.UUID('modules.predictor_id') # The project_id is used to keep a reference to the project under which the workflow was # created. It is currently unclear if this is the best way to do this. Another option might # be to have all objects have a context object, but that also seems to have downsides. def __init__(self, name: str, design_space_id: UUID, processor_id: UUID, predictor_id: UUID, project_id: Optional[UUID] = None, session: Session = Session()): self.name = name self.design_space_id = design_space_id self.processor_id = processor_id self.predictor_id = predictor_id self.project_id = project_id self.session = session def __str__(self): return '<DesignWorkflow {!r}>'.format(self.name) @property def executions(self) -> WorkflowExecutionCollection: """Return a resource representing all visible executions of this workflow.""" if getattr(self, 'project_id', None) is None: raise AttributeError( 'Cannot initialize execution without project reference!') return WorkflowExecutionCollection(self.project_id, self.uid, self.session)
class MonteCarloProcessor(Serializable['GridProcessor'], Processor): """[ALPHA] Using a Monte Carlo optimizer to search for the best candidate. The moves that the MonteCarlo optimizer makes are inferred from the descriptors in the design space. Parameters ---------- name: str name of the processor description: str description of the processor """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') typ = properties.String('config.type', default='ContinuousSearch', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional(properties.List( properties.String()), 'experimental_reasons', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PROCESSOR') schema_id = properties.UUID( 'schema_id', default=UUID('d8ddfe73-10f7-4456-9de9-9a1638bae403')) def _attrs(self) -> List[str]: return ["name", "description", "typ"] def __init__(self, name: str, description: str, session: Optional[Session] = None): self.name: str = name self.description: str = description self.session: Optional[Session] = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<MonteCarloProcessor {!r}>'.format(self.name)
class ProductDesignSpace(Resource['ProductDesignSpace'], DesignSpace): """[ALPHA] An outer product of univariate dimensions, either continuous or enumerated. Parameters ---------- name:str the name of the design space description:str the description of the design space dimensions: list[Dimension] univariate dimensions that are factors of the design space; can be enumerated or continuous """ _response_key = None uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') dimensions = properties.List(properties.Object(Dimension), 'config.dimensions') typ = properties.String('config.type', default='Univariate', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional( properties.List(properties.String()), 'status_info', serializable=False ) archived = properties.Boolean('archived', default=False) experimental = properties.Boolean("experimental", serializable=False, default=True) experimental_reasons = properties.Optional( properties.List(properties.String()), 'experimental_reasons', serializable=False ) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='DESIGN_SPACE') schema_id = properties.UUID('schema_id', default=UUID('6c16d694-d015-42a7-b462-8ef299473c9a')) def __init__(self, name: str, description: str, dimensions: List[Dimension], session: Session = Session()): self.name: str = name self.description: str = description self.dimensions: List[Dimension] = dimensions self.session: Session = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<ProductDesignSpace {!r}>'.format(self.name)
class ModelSummary(Serializable['ModelSummary']): """[ALPHA] Summary of information about a single model in a predictor. ModelSummary objects are constructed from saved models and should not be user-instantiated. """ name = properties.String('name') """:str: the name of the model""" type_ = properties.String('type') """:str: the type of the model (e.g., "ML Model", "Featurizer", etc.)""" inputs = properties.List( properties.Union([properties.Object(Descriptor), properties.String()]), 'inputs') """:List[Descriptor]: list of input descriptors""" outputs = properties.List( properties.Union([properties.Object(Descriptor), properties.String()]), 'outputs') """:List[Descriptor]: list of output descriptors""" model_settings = properties.Raw('model_settings') """:dict: model settings, as a dictionary (keys depend on the model type)""" feature_importances = properties.List( properties.Object(FeatureImportanceReport), 'feature_importances') """:List[FeatureImportanceReport]: feature importance reports for each output""" predictor_name = properties.String('predictor_configuration_name', default='') """:str: the name of the predictor that created this model""" predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid') """:Optional[UUID]: the unique Citrine id of the predictor that created this model""" training_data_count = properties.Optional(properties.Integer, "training_data_count") """:int: Number of rows in the training data for the model, if applicable.""" def __init__(self, name: str, type_: str, inputs: List[Descriptor], outputs: List[Descriptor], model_settings: Dict[str, Any], feature_importances: List[FeatureImportanceReport], predictor_name: str, predictor_uid: Optional[UUID] = None): self.name = name self.type_ = type_ self.inputs = inputs self.outputs = outputs self.model_settings = model_settings self.feature_importances = feature_importances self.predictor_name = predictor_name self.predictor_uid = predictor_uid def __str__(self): return '<ModelSummary {!r}>'.format(self.name)
class GridProcessor(Serializable['GridProcessor'], Processor): """Generates a finite set of materials from the domain defined by the design space, then scans over the set of materials. To create a finite set of materials from continuous dimensions, a uniform grid is created between the bounds of the descriptor. The number of points is specified by `grid_sizes`. Parameters ---------- name: str name of the processor description: str description of the processor grid_sizes: dict[str, int] the number of points to select along each dimension of the grid, by dimension name """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') typ = properties.String('config.type', default='Grid', deserializable=False) grid_sizes = properties.Mapping(properties.String, properties.Integer, 'config.grid_dimensions') status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PROCESSOR') schema_id = properties.UUID( 'schema_id', default=UUID('272791a5-5468-4344-ac9f-2811d9266a4d')) def __init__(self, name: str, description: str, grid_sizes: Mapping[str, int], session: Optional[Session] = None): self.name: str = name self.description: str = description self.grid_sizes: Mapping[str, int] = grid_sizes self.session: Optional[Session] = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<GridProcessor {!r}>'.format(self.name)
class AuditInfo(Serializable, DictSerializable): """ Model that holds audit metadata. AuditInfo objects should not be created by the user. Parameters ---------- created_by: Optional[UUID] ID of the user who created the object created_at: Optional[Datetime] Time, in ms since epoch, at which the object was created updated_by: Optional[UUID] ID of the user who most recently updated the object updated_at: Optional[Datetime] Time, in ms since epoch, at which the object was most recently updated """ created_by = properties.Optional(properties.UUID, 'created_by') created_at = properties.Optional(properties.Datetime, 'created_at') updated_by = properties.Optional(properties.UUID, 'updated_by') updated_at = properties.Optional(properties.Datetime, 'updated_at') def __init__(self, created_by: Optional[UUID], created_at: Optional[datetime], updated_by: Optional[UUID] = None, updated_at: Optional[datetime] = None): self.created_by = created_by self.created_at = created_at self.updated_by = updated_by self.updated_at = updated_at def __repr__(self): return 'Created by: {!r}\nCreated at: {!r}\nUpdated by: {!r}\nUpdated at: {!r}'.format( self.created_by, self.created_at, self.updated_by, self.updated_at ) def __str__(self): create_str = 'Created by user {} at time {}'.format( self.created_by, self.created_at) if self.updated_by is not None or self.updated_at is not None: update_str = '\nUpdated by user {} at time {}'.format( self.updated_by, self.updated_at) else: update_str = '' return create_str + update_str def __eq__(self, other): return self.__repr__() == other.__repr__() def as_dict(self): """Return the object as a dictionary.""" return self.dump()
class EnumeratedProcessor(Serializable['EnumeratedProcessor'], Processor): """Process a design space by enumerating up to `max_size` materials from the domain and processing each independently. Parameters ---------- name: str name of the processor description: str description of the processor max_size: int maximum number of samples that can be enumerated over """ uid = properties.Optional(properties.UUID, 'id', serializable=False) name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') max_size = properties.Integer('config.max_size') typ = properties.String('config.type', default='Enumerated', deserializable=False) status = properties.String('status', serializable=False) status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) # NOTE: These could go here or in _post_dump - it's unclear which is better right now module_type = properties.String('module_type', default='PROCESSOR') schema_id = properties.UUID( 'schema_id', default=UUID('307b88a2-fd50-4d27-ae91-b8d6282f68f7')) def __init__(self, name: str, description: str, max_size: Optional[int] = None, session: Optional[Session] = None): self.name: str = name self.description: str = description self.max_size: int = max_size or 2**31 - 1 # = 2147483647 (max 32-bit integer) self.session: Optional[Session] = session def _post_dump(self, data: dict) -> dict: data['display_name'] = data['config']['name'] return data def __str__(self): return '<EnumeratedProcessor {!r}>'.format(self.name)
class LabelFractionConstraint(Serializable['LabelFractionConstraint'], Constraint): """Represents a constraint on the total amount of ingredients with a given label. Parameters ---------- formulation_descriptor: FormulationDescriptor descriptor to constrain label: str ingredient label to constrain min: float minimum value max: float maximum value is_required: bool, optional whether this ingredient is required. If ``True``, the label must be present and its value must be within the specified range. if ``False``, the label must be within the specified range only if it's present in the formulation, i.e., the value can be 0 or on the range ``[min, max]``. """ formulation_descriptor = properties.Object(FormulationDescriptor, 'formulation_descriptor') label = properties.String('label') min = properties.Optional(properties.Float, 'min') max = properties.Optional(properties.Float, 'max') is_required = properties.Boolean('is_required') typ = properties.String('type', default='LabelFractionConstraint') def __init__(self, *, formulation_descriptor: FormulationDescriptor, label: str, min: float, max: float, is_required: bool = True, session: Optional[Session] = None): self.formulation_descriptor: FormulationDescriptor = formulation_descriptor self.label: str = label self.min: float = min self.max: float = max self.is_required: bool = is_required self.session: Optional[Session] = session def __str__(self): return '<LabelFractionConstraint {!r}::{!r}>'.format( self.formulation_descriptor.key, self.label)
class AIResourceMetadata(): """Abstract class for representing common metadata for Resources.""" created_by = properties.Optional(properties.UUID, 'created_by', serializable=False) """:Optional[UUID]: id of the user who created the resource""" create_time = properties.Optional(properties.Datetime, 'create_time', serializable=False) """:Optional[datetime]: date and time at which the resource was created""" updated_by = properties.Optional(properties.UUID, 'updated_by', serializable=False) """:Optional[UUID]: id of the user who most recently updated the resource, if it has been updated""" update_time = properties.Optional(properties.Datetime, 'update_time', serializable=False) """:Optional[datetime]: date and time at which the resource was most recently updated, if it has been updated""" archived = properties.Boolean('archived', default=False) """:bool: whether the resource is archived (hidden but not deleted)""" archived_by = properties.Optional(properties.UUID, 'archived_by', serializable=False) """:Optional[UUID]: id of the user who archived the resource, if it has been archived""" archive_time = properties.Optional(properties.Datetime, 'archive_time', serializable=False) """:Optional[datetime]: date and time at which the resource was archived, if it has been archived""" experimental = properties.Boolean("experimental", serializable=False, default=True) """:bool: whether the resource is experimental (newer, less well-tested functionality)""" experimental_reasons = properties.Optional(properties.List( properties.String()), 'experimental_reasons', serializable=False) """:Optional[List[str]]: human-readable reasons why the resource is experimental""" status = properties.Optional(properties.String(), 'status', serializable=False) """:Optional[str]: short description of the resource's status""" status_info = properties.Optional(properties.List(properties.String()), 'status_info', serializable=False) """:Optional[List[str]]: human-readable explanations of the status"""
class JobStatusResponse(Resource['JobStatusResponse']): """[ALPHA] a response to a job status check. The JobStatusResponse summarizes the status for the entire job. Parameters ---------- job_type: str the type of job for this status report status: str the actual status of the job. One of "Running", "Success", or "Failure". tasks: List[TaskNode] all of the constituent task required to complete this job output: Optional[Map[String,String]] job output properties and results """ job_type = properties.String("job_type") status = properties.String("status") tasks = properties.List(Object(TaskNode), "tasks") output = properties.Optional(properties.Mapping(String, String), 'output') def __init__( self, job_type: str, status: str, tasks: List[TaskNode], output: Optional[Dict[str, str]] ): self.job_type = job_type self.status = status self.tasks = tasks self.output = output
class CSVDataSource(Serializable['CSVDataSource'], DataSource): """A data source based on a CSV file stored on the data platform. Parameters ---------- file_link: FileLink link to the CSV file to read the data from column_definitions: Mapping[str, Descriptor] Map the column headers to the descriptors that will be used to interpret the cell contents identifiers: Optional[List[str]] List of one or more column headers whose values uniquely identify a row. These may overlap with ``column_definitions`` if a column should be used as data and as an identifier, but this is not necessary. Identifiers must be unique within a dataset. No two rows can contain the same value. """ typ = properties.String('type', default='csv_data_source', deserializable=False) file_link = properties.Object(FileLink, "file_link") column_definitions = properties.Mapping( properties.String, properties.Object(Descriptor), "column_definitions") identifiers = properties.Optional(properties.List(properties.String), "identifiers") def _attrs(self) -> List[str]: return ["file_link", "column_definitions", "identifiers", "typ"] def __init__(self, file_link: FileLink, column_definitions: Mapping[str, Descriptor], identifiers: Optional[List[str]] = None): self.file_link = file_link self.column_definitions = column_definitions self.identifiers = identifiers
class MeanColumn(Serializable['MeanColumn'], Column): """[ALPHA] Column containing the mean of a real-valued variable. Parameters ---------- data_source: str name of the variable to use when populating the column target_units: Optional[str] units to convert the real variable into """ data_source = properties.String('data_source') target_units = properties.Optional(properties.String, "target_units") typ = properties.String('type', default="mean_column", deserializable=False) def _attrs(self) -> List[str]: return ["data_source", "target_units", "typ"] def __init__(self, *, data_source: str, target_units: Optional[str] = None): self.data_source = data_source self.target_units = target_units
class GemTableDataSource(Serializable['GemTableDataSource'], DataSource): """[ALPHA] A data source based on a GEM Table hosted on the data platform. Parameters ---------- table_id: UUID Unique identifier for the GEM Table table_version: Union[str,int] Version number for the GEM Table, which starts at 1 rather than 0. Strings are cast to ints. formulation_descriptor: Optional[FormulationDescriptor] Optional descriptor used to store formulations emitted by the data source. """ typ = properties.String('type', default='hosted_table_data_source', deserializable=False) table_id = properties.UUID("table_id") table_version = properties.Integer("table_version") formulation_descriptor = properties.Optional( properties.Object(FormulationDescriptor), "formulation_descriptor") def _attrs(self) -> List[str]: return ["table_id", "table_version", "typ"] def __init__( self, table_id: UUID, table_version: Union[int, str], formulation_descriptor: Optional[FormulationDescriptor] = None): self.table_id: UUID = table_id self.table_version: Union[int, str] = table_version self.formulation_descriptor: Optional[ FormulationDescriptor] = formulation_descriptor
class NthBiggestComponentQuantityColumn(Serializable["NthBiggestComponentQuantityColumn"], Column): """[ALPHA] Quantity of the Nth biggest component. If there are not N components in the composition, then this column will be empty. Parameters ---------- data_source: str name of the variable to use when populating the column n: int index of the component quantity to extract, starting with 1 for the biggest normalize: Optional[bool] whether to normalize the quantity by the sum of all component amounts. Default is false """ data_source = properties.String('data_source') n = properties.Integer("n") normalize = properties.Optional(properties.Boolean, "normalize") typ = properties.String('type', default="biggest_component_quantity_column", deserializable=False) def _attrs(self) -> List[str]: return ["data_source", "n", "normalize", "typ"] def __init__(self, *, data_source: str, n: int, normalize: Optional[bool] = False): self.data_source = data_source self.n = n self.normalize = normalize
class EnumeratedDimension(Serializable['EnumeratedDimension'], Dimension): """A finite, enumerated dimension. Parameters ---------- descriptor: Descriptor a descriptor of the single dimension template_id: UUID UUID that corresponds to the template in DC values: list[str] list of values that can be parsed by the descriptor """ descriptor = properties.Object(Descriptor, 'descriptor') values = properties.List(properties.String(), 'list') typ = properties.String('type', default='EnumeratedDimension', deserializable=False) template_id = properties.Optional(properties.UUID, 'template_id', default=uuid4()) def __init__(self, descriptor: Descriptor, values: List[str], template_id: Optional[UUID] = None): self.descriptor: Descriptor = descriptor self.values: List[str] = values self.template_id: Optional[UUID] = template_id
class ComponentQuantityColumn(Serializable["ComponentQuantityColumn"], Column): """[ALPHA] Column that extracts the quantity of a given component. If the component is not present in the composition, then the value in the column will be 0.0. Parameters ---------- data_source: str name of the variable to use when populating the column component_name: str name of the component from which to extract the quantity normalize: Optional[bool] whether to normalize the quantity by the sum of all component amounts. Default is false """ data_source = properties.String('data_source') component_name = properties.String("component_name") normalize = properties.Optional(properties.Boolean, "normalize") typ = properties.String('type', default="component_quantity_column", deserializable=False) def _attrs(self) -> List[str]: return ["data_source", "component_name", "normalize", "typ"] def __init__(self, *, data_source: str, component_name: str, normalize: Optional[bool] = False): self.data_source = data_source self.component_name = component_name self.normalize = normalize
class AttributeByTemplateAndObjectTemplate( Serializable['AttributeByTemplateAndObjectTemplate'], Variable): """[ALPHA] Attribute marked by an attribute template and an object template. For example, one property may be measured by two different measurement techniques. In this case, that property would have the same attribute template. Filtering by measurement templates, which identify the measurement techniques, disambiguates the technique used to measure that otherwise ambiguous property. Parameters --------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers attribute_template: LinkByUID attribute template that identifies the attribute to assign to the variable object_template: LinkByUID template that identifies the associated object attribute_constraints: list[(LinkByUID, Bounds)] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') attribute_template = properties.Object(LinkByUID, 'attribute_template') object_template = properties.Object(LinkByUID, 'object_template') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)] ) ), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_by_object", deserializable=False) def _attrs(self) -> List[str]: return ["name", "headers", "attribute_template", "object_template", "attribute_constraints", "type_selector", "typ"] def __init__(self, *, name: str, headers: List[str], attribute_template: LinkByUID, object_template: LinkByUID, attribute_constraints: List[List[Union[LinkByUID, BaseBounds]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector.PREFER_RUN): self.name = name self.headers = headers self.attribute_template = attribute_template self.object_template = object_template self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class GemTable(Resource['Table']): """A 2-dimensional projection of data. GEM Tables are the basic unit used to flatten and manipulate data objects. While data objects can represent complex materials data, the format is NOT conducive to analysis and machine learning. GEM Tables, however, can be used to 'flatten' data objects into useful projections. Attributes ---------- uid: UUID Unique uuid4 identifier of this GEM Table. version: str Version number of the GEM Table download_url: int Url pointing to the location of the GEM Table's contents. This is an expiring download link and is not unique. """ _response_key = 'table' uid = properties.Optional(properties.UUID(), 'id') version = properties.Optional(properties.Integer, 'version') download_url = properties.Optional(properties.String, 'signed_download_url') def __init__(self): self.uid = None self.version = None self.download_url = None def __str__(self): return '<GEM Table {!r}, version {}>'.format(self.uid, self.version) def resource_type(self) -> str: """Get the access control resource type of this resource.""" return 'TABLE' @deprecation.deprecated(deprecated_in="0.16.0", details="Use TableCollection.read() instead") def read(self, local_path): """[DEPRECATED] Use TableCollection.read() instead.""" # noqa: D402 data_location = self.download_url data_location = rewrite_s3_links_locally(data_location) response = requests.get(data_location) write_file_locally(response.content, local_path)
class DummyDescriptor(object): dummy_map = properties.Mapping(properties.Float(), properties.String) dummy_list = properties.List(properties.Float, properties.String) dummy_set = properties.Set(type(properties.Float())) link_or_else = properties.LinkOrElse() map_collection_key = properties.Mapping( properties.Optional(properties.String), properties.Integer) specified_mixed_list = properties.SpecifiedMixedList( [properties.Integer(default=100)])
class ModelSummary(Serializable['ModelSummary']): """[ALPHA] Summary of information about a single model in a predictor. ModelSummary objects are constructed from saved models and should not be user-instantiated. Parameters ---------- name: str the name of the model type_: str the type of the model (e.g., "ML Model", "Featurizer", etc.) inputs: List[Descriptor] list of input descriptors outputs: List[Descriptor] list of output descriptors model_settings: dict settings of the model, as a dictionary (details depend on model type) feature_importances: List[FeatureImportanceReport] list of feature importance reports, one for each output predictor_name: str the name of the predictor that created this model predictor_uid: Optional[uuid] the uid of the predictor that created this model """ name = properties.String('name') type_ = properties.String('type') inputs = properties.List(properties.String(), 'inputs') outputs = properties.List(properties.String(), 'outputs') model_settings = properties.Raw('model_settings') feature_importances = properties.List( properties.Object(FeatureImportanceReport), 'feature_importances') predictor_name = properties.String('predictor_configuration_name', default='') predictor_uid = properties.Optional(properties.UUID(), 'predictor_configuration_uid') def __init__(self, name: str, type_: str, inputs: List[Descriptor], outputs: List[Descriptor], model_settings: Dict[str, Any], feature_importances: List[FeatureImportanceReport], predictor_name: str, predictor_uid: Optional[UUID] = None): self.name = name self.type_ = type_ self.inputs = inputs self.outputs = outputs self.model_settings = model_settings self.feature_importances = feature_importances self.predictor_name = predictor_name self.predictor_uid = predictor_uid def __str__(self): return '<ModelSummary {!r}>'.format(self.name)
class RealDescriptor(Serializable['RealDescriptor'], Descriptor): """A descriptor to hold real-valued numbers. Parameters ---------- key: str the key corresponding to a descriptor lower_bound: float inclusive lower bound for valid real values upper_bound: float inclusive upper bound for valid real values """ lower_bound = properties.Float('lower_bound') upper_bound = properties.Float('upper_bound') units = properties.Optional(properties.String, 'units', default='') typ = properties.String('type', default='Real', deserializable=False) def __eq__(self, other): try: attrs = ["key", "lower_bound", "upper_bound", "units", "typ"] return all([ self.__getattribute__(key) == other.__getattribute__(key) for key in attrs ]) except AttributeError: return False def __init__(self, key: str, lower_bound: float, upper_bound: float, units: Optional[str] = None): self.key: str = key self.lower_bound: float = lower_bound self.upper_bound: float = upper_bound if units is None: msg = "Default of dimensionless is deprecated; \ please specify an empty string explicitly." warnings.warn(msg, category=DeprecationWarning) self.units = "" else: self.units = units def __str__(self): return "<RealDescriptor {!r}>".format(self.key) def __repr__(self): return "RealDescriptor({}, {}, {}, {})".format(self.key, self.lower_bound, self.upper_bound, self.units)
class AttributeByTemplate(Serializable['AttributeByTemplate'], Variable): """[ALPHA] Attribute marked by an attribute template. Parameters ---------- name: str a short human-readable name to use when referencing the variable headers: list[str] sequence of column headers template: LinkByUID attribute template that identifies the attribute to assign to the variable attribute_constraints: list[list[LinkByUID, Bounds]] constraints on object attributes in the target object that must be satisfied. Constraints are expressed as Bounds. Attributes are expressed with links. The attribute that the variable is being set to may be the target of a constraint as well. type_selector: DataObjectTypeSelector strategy for selecting data object types to consider when matching, defaults to PREFER_RUN """ name = properties.String('name') headers = properties.List(properties.String, 'headers') template = properties.Object(LinkByUID, 'template') attribute_constraints = properties.Optional( properties.List( properties.SpecifiedMixedList( [properties.Object(LinkByUID), properties.Object(BaseBounds)])), 'attribute_constraints') type_selector = properties.Enumeration(DataObjectTypeSelector, "type_selector") typ = properties.String('type', default="attribute_by_template", deserializable=False) def _attrs(self) -> List[str]: return [ "name", "headers", "template", "attribute_constraints", "type_selector", "typ" ] def __init__( self, *, name: str, headers: List[str], template: LinkByUID, attribute_constraints: Optional[List[List[Union[LinkByUID, BaseBounds]]]] = None, type_selector: DataObjectTypeSelector = DataObjectTypeSelector. PREFER_RUN): self.name = name self.headers = headers self.template = template self.attribute_constraints = attribute_constraints self.type_selector = type_selector
class Table(Resource['Table']): """A 2-dimensional projection of data. Tables are the basic unit used to flatten and manipulate data objects. While data objects can represent complex materials data, the format is NOT conducive to analysis and machine learning. Tables, however, can be used to 'flatten' data objects into useful projections. Attributes ---------- uid: UUID Unique uuid4 identifier of this project. version: str Version number of the Table download_url: int Url pointing to the location of the Table's contents """ _response_key = 'table' uid = properties.Optional(properties.UUID(), 'id') version = properties.Optional(properties.Integer, 'version') download_url = properties.Optional(properties.String, 'signed_download_url') def __init__(self): self.uid = None self.version = None self.download_url = None def __str__(self): # TODO: Change this to name once that's added to the table model return '<Table {!r}>'.format(self.uid) def read(self, local_path): """Read the Table file from S3.""" data_location = self.download_url data_location = rewrite_s3_links_locally(data_location) response = requests.get(data_location) write_file_locally(response.content, local_path)
class ScalarRangeConstraint(Serializable['ScalarRangeConstraint'], Constraint): """[ALPHA] Represents an inequality constraint on a scalar-valued material attribute. Parameters ---------- descriptor_key: str the key corresponding to a descriptor min: float the minimum value in the range max: float the maximum value in the range min_inclusive: bool if True, will include the min value in the range max_inclusive: bool if True, will include the max value in the range """ descriptor_key = properties.String('descriptor_key') min = properties.Optional(properties.Float, 'min') max = properties.Optional(properties.Float, 'max') min_inclusive = properties.Boolean('min_inclusive') max_inclusive = properties.Boolean('max_inclusive') typ = properties.String('type', default='ScalarRange') def __init__(self, descriptor_key: str, max: Optional[float] = None, min: Optional[float] = None, min_inclusive: Optional[bool] = True, max_inclusive: Optional[bool] = True, session: Optional[Session] = None): self.descriptor_key = descriptor_key self.max = max self.min = min self.min_inclusive = min_inclusive self.max_inclusive = max_inclusive self.session: Optional[Session] = session def __str__(self): return '<ScalarRangeConstraint {!r}>'.format(self.descriptor_key)
class GemTable(Resource['Table']): """A 2-dimensional projection of data. GEM Tables are the basic unit used to flatten and manipulate data objects. While data objects can represent complex materials data, the format is NOT conducive to analysis and machine learning. GEM Tables, however, can be used to 'flatten' data objects into useful projections. """ _response_key = 'table' _resource_type = ResourceTypeEnum.TABLE uid = properties.Optional(properties.UUID(), 'id') """:Optional[UUID]: unique Citrine id of this GEM Table""" version = properties.Optional(properties.Integer, 'version') """:Optional[int]: Version number of the GEM Table. The first table built from a given config is version 1.""" download_url = properties.Optional(properties.String, 'signed_download_url') """:Optional[str]: Url pointing to the location of the GEM Table's contents. This is an expiring download link and is not unique.""" def __init__(self): self.uid = None self.version = None self.download_url = None def __str__(self): return '<GEM Table {!r}, version {}>'.format(self.uid, self.version) @deprecation.deprecated(deprecated_in="0.16.0", details="Use TableCollection.read() instead") def read(self, local_path): """[DEPRECATED] Use TableCollection.read() instead.""" # noqa: D402 data_location = self.download_url data_location = rewrite_s3_links_locally(data_location) response = requests.get(data_location) write_file_locally(response.content, local_path)
class Processor(Module): """A Citrine Processor describes how a design space is searched. Abstract type that returns the proper type given a serialized dict. """ _project_id: Optional[UUID] = None _session: Optional[Session] = None uid = properties.Optional(properties.UUID, 'id', serializable=False) """:Optional[UUID]: Citrine Platform unique identifier""" name = properties.String('config.name') description = properties.Optional(properties.String(), 'config.description') @classmethod def get_type(cls, data) -> Type['Processor']: """Return the sole currently implemented subtype.""" return { 'Grid': GridProcessor, 'Enumerated': EnumeratedProcessor, 'ContinuousSearch': MonteCarloProcessor }[data['config']['type']] def _attrs(self) -> List[str]: return ["name", "description", "id"] # pragma: no cover def __eq__(self, other): try: return all([ self.__getattribute__(key) == other.__getattribute__(key) for key in self._attrs() ]) except AttributeError: return False
class RealDescriptor(Serializable['RealDescriptor'], Descriptor): """[ALPHA] A descriptor to hold real-valued numbers. Parameters ---------- key: str the key corresponding to a descriptor lower_bound: float inclusive lower bound for valid real values upper_bound: float inclusive upper bound for valid real values """ key = properties.String('descriptor_key') lower_bound = properties.Float('lower_bound') upper_bound = properties.Float('upper_bound') units = properties.Optional(properties.String, 'units', default='') typ = properties.String('type', default='Real', deserializable=False) def __eq__(self, other): try: attrs = ["key", "lower_bound", "upper_bound", "units", "typ"] return all([ self.__getattribute__(key) == other.__getattribute__(key) for key in attrs ]) except AttributeError: return False def __init__(self, key: str, lower_bound: float, upper_bound: float, units: str = ''): self.key: str = key self.lower_bound: float = lower_bound self.upper_bound: float = upper_bound self.units: Optional[str] = units def __str__(self): return "<RealDescriptor {!r}>".format(self.key) def __repr__(self): return "RealDescriptor({}, {}, {}, {})".format(self.key, self.lower_bound, self.upper_bound, self.units)