def __init__( self, name: str, model: dict, dataset: Union[GordoBaseDataset, dict], project_name: str, evaluation: Optional[dict] = None, metadata: Optional[Union[dict, Metadata]] = None, runtime=None, ): if runtime is None: runtime = dict() if evaluation is None: evaluation = dict(cv_mode="full_build") if metadata is None: metadata = dict() self.name = name self.model = model self.dataset = (dataset if isinstance(dataset, GordoBaseDataset) else GordoBaseDataset.from_dict(dataset)) self.runtime = runtime self.evaluation = evaluation self.metadata = ( metadata if isinstance(metadata, Metadata) else Metadata.from_dict( metadata) # type: ignore ) self.project_name = project_name self.host = f"gordoserver-{self.project_name}-{self.name}"
def _get_dataset(self, machine: Machine, start: datetime, end: datetime) -> GordoBaseDataset: """ Apply client setting to machine dataset. Parameters ---------- machine: Machine Named tuple representing the machine info from controller start: datetime end: datetime Returns ------- GordoBaseDataset """ # We want to adjust for any model offset. If the model outputs less than it got in, it requires # extra data than what we're being asked to get predictions for. # just to give us some buffer zone. resolution = machine.dataset["resolution"] n_intervals = machine.metadata.build_metadata.model.model_offset + 5 start = self._adjust_for_offset(dt=start, resolution=resolution, n_intervals=n_intervals) # Re-create the machine's dataset but updating to use the client's # data provider and changing the dates of data we want. config = machine.dataset config.update({"data_provider": self.data_provider, "train_start_date": start, "train_end_date": end}) parsed_type = parse_module_path(config["type"]) if parsed_type in self.enforced_dataset_kwargs: config.update(self.enforced_dataset_kwargs[parsed_type]) return GordoBaseDataset.from_dict(config)
def from_config( # type: ignore cls, config: Dict[str, Any], project_name: str, config_globals=None): """ Construct an instance from a block of YAML config file which represents a single Machine; loaded as a ``dict``. Parameters ---------- config: dict The loaded block of config which represents a 'Machine' in YAML project_name: str Name of the project this Machine belongs to. config_globals: The block of config within the YAML file within `globals` Returns ------- :class:`~Machine` """ if config_globals is None: config_globals = dict() name = config["name"] model = config.get("model") or config_globals.get("model") local_runtime = config.get("runtime", dict()) runtime = patch_dict(config_globals.get("runtime", dict()), local_runtime) dataset_config = patch_dict(config.get("dataset", dict()), config_globals.get("dataset", dict())) dataset = GordoBaseDataset.from_dict(dataset_config) evaluation = patch_dict(config_globals.get("evaluation", dict()), config.get("evaluation", dict())) metadata = Metadata( user_defined={ "global-metadata": config_globals.get("metadata", dict()), "machine-metadata": config.get("metadata", dict()), }) return cls( name, model, dataset, metadata=metadata, runtime=runtime, project_name=project_name, evaluation=evaluation, )
def test_from_dict_with_empty_type(): train_start_date = datetime(2020, 1, 1, tzinfo=tzutc()) train_end_date = datetime(2020, 3, 1, tzinfo=tzutc()) tag_list = [SensorTag("tag1", "asset"), SensorTag("tag2", "asset")] config = { "train_start_date": train_start_date, "train_end_date": train_end_date, "tag_list": tag_list, } dataset = GordoBaseDataset.from_dict(config) assert type(dataset) is TimeSeriesDataset assert dataset.train_start_date == train_start_date assert dataset.train_end_date == train_end_date assert dataset.tag_list == tag_list