def test_metalearner_nearest_neighbor(self): meta_train_data = {} metadata_indices = [1, 2, 3] for ix, dataset_id in enumerate(metadata_indices): hparams = standard_artifacts.HyperParameters() hparams.uri = os.path.join(self._input_data_dir, f'Tuner.train_mockdata_{dataset_id}', 'best_hyperparameters') meta_train_data[f'hparams_train_{ix}'] = [hparams] metafeature = artifacts.MetaFeatures() metafeature.uri = os.path.join( self._input_data_dir, f'MetaFeatureGen.train_mockdata_{dataset_id}', 'metafeatures') meta_train_data[f'meta_train_features_{ix}'] = [metafeature] input_dict = { **meta_train_data, } output_dict = { executor.OUTPUT_HYPERPARAMS: [self._hparams_out], executor.OUTPUT_MODEL: [self._model_out], } exec_properties = self._exec_properties.copy() exec_properties['algorithm'] = executor.NEAREST_NEIGHBOR ex = executor.MetaLearnerExecutor() ex.Do(input_dict, output_dict, exec_properties) self._verify_hparams_outputs(executor.NEAREST_NEIGHBOR) self._verify_model_export()
def test_metalearner_majority_voting(self): meta_train_data = {} metadata_indices = [1, 2, 3] for ix, dataset_id in enumerate(metadata_indices): hparams = standard_artifacts.HyperParameters() hparams.uri = os.path.join(self._input_data_dir, f'Tuner.train_mockdata_{dataset_id}', 'best_hyperparameters') meta_train_data[f'hparams_train_{ix}'] = [hparams] input_dict = { **meta_train_data, } output_dict = { executor.OUTPUT_HYPERPARAMS: [self._hparams_out], executor.OUTPUT_MODEL: [self._model_out], } exec_properties = self._exec_properties.copy() exec_properties['algorithm'] = executor.MAJORITY_VOTING ex = executor.MetaLearnerExecutor() ex.Do(input_dict, output_dict, exec_properties) self._verify_hparams_outputs(executor.MAJORITY_VOTING)
def __init__(self, examples: types.Channel = None, schema: Optional[types.Channel] = None, transform_graph: Optional[types.Channel] = None, module_file: Optional[Text] = None, tuner_fn: Optional[Text] = None, train_args: trainer_pb2.TrainArgs = None, eval_args: trainer_pb2.EvalArgs = None, tune_args: Optional[tuner_pb2.TuneArgs] = None, best_hyperparameters: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a Tuner component. Args: examples: A Channel of type `standard_artifacts.Examples`, serving as the source of examples that are used in tuning (required). schema: An optional Channel of type `standard_artifacts.Schema`, serving as the schema of training and eval data. This is used when raw examples are provided. transform_graph: An optional Channel of type `standard_artifacts.TransformGraph`, serving as the input transform graph if present. This is used when transformed examples are provided. module_file: A path to python module file containing UDF tuner definition. The module_file must implement a function named `tuner_fn` at its top level. The function must have the following signature. def tuner_fn(fn_args: FnArgs) -> TunerFnResult: Exactly one of 'module_file' or 'tuner_fn' must be supplied. tuner_fn: A python path to UDF model definition function. See 'module_file' for the required signature of the UDF. Exactly one of 'module_file' or 'tuner_fn' must be supplied. train_args: A trainer_pb2.TrainArgs instance, containing args used for training. Current only num_steps is available. eval_args: A trainer_pb2.EvalArgs instance, containing args used for eval. Current only num_steps is available. tune_args: A tuner_pb2.TuneArgs instance, containing args used for tuning. Current only num_parallel_trials is available. best_hyperparameters: Optional Channel of type `standard_artifacts.HyperParameters` for result of the best hparams. instance_name: Optional unique instance name. Necessary if multiple Tuner components are declared in the same pipeline. """ if bool(module_file) == bool(tuner_fn): raise ValueError( "Exactly one of 'module_file' or 'tuner_fn' must be supplied") best_hyperparameters = best_hyperparameters or types.Channel( type=standard_artifacts.HyperParameters, artifacts=[standard_artifacts.HyperParameters()]) spec = TunerSpec( examples=examples, schema=schema, transform_graph=transform_graph, module_file=module_file, tuner_fn=tuner_fn, train_args=train_args, eval_args=eval_args, tune_args=tune_args, best_hyperparameters=best_hyperparameters) super(Tuner, self).__init__(spec=spec, instance_name=instance_name)
def setUp(self): super(ComponentTest, self).setUp() self.examples = channel_utils.as_channel([standard_artifacts.Examples()]) self.transform_output = channel_utils.as_channel( [standard_artifacts.TransformGraph()]) self.schema = channel_utils.as_channel([standard_artifacts.Schema()]) self.hyperparameters = channel_utils.as_channel( [standard_artifacts.HyperParameters()]) self.train_args = trainer_pb2.TrainArgs(num_steps=100) self.eval_args = trainer_pb2.EvalArgs(num_steps=50)
def setUp(self): super().setUp() self.examples = channel_utils.as_channel( [standard_artifacts.Examples()]) self.transform_graph = channel_utils.as_channel( [standard_artifacts.TransformGraph()]) self.schema = channel_utils.as_channel([standard_artifacts.Schema()]) self.hyperparameters = channel_utils.as_channel( [standard_artifacts.HyperParameters()]) self.train_args = trainer_pb2.TrainArgs(splits=['train'], num_steps=100) self.eval_args = trainer_pb2.EvalArgs(splits=['eval'], num_steps=50)
def setUp(self): super(ComponentTest, self).setUp() num_train = 5 self.meta_train_data = {} for ix in range(num_train): self.meta_train_data[f'hparams_train_{ix}'] = channel_utils.as_channel( [standard_artifacts.HyperParameters()]) self.meta_train_data[ f'meta_train_features_{ix}'] = channel_utils.as_channel( [artifacts.MetaFeatures()]) self.custom_config = {'some': 'thing', 'some other': 1, 'thing': 2}
def __init__(self, examples: types.Channel = None, schema: types.Channel = None, module_file: Optional[Text] = None, tuner_fn: Optional[Text] = None, model: Optional[types.Channel] = None, best_hyperparameters: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a Tuner component. Args: examples: A Channel of type `standard_artifacts.Examples`, serving as the source of examples that are used in tuning (required). Transformed examples are not yet supported. schema: A Channel of type `standard_artifacts.Schema`, serving as the schema of training and eval data. module_file: A path to python module file containing UDF KerasTuner definition. Exactly one of 'module_file' or 'tuner_fn' must be supplied. The module_file must implement a function named `tuner_fn` at its top level. The function takes working dir path, train data path, eval data path and tensorflow_metadata.proto.v0.schema_pb2.Schema and generates a namedtuple TunerFnResult which contains: - 'tuner': A KerasTuner that will be used for tuning. - 'train_dataset': A tf.data.Dataset of training data. - 'eval_dataset': A tf.data.Dataset of eval data. tuner_fn: A python path to UDF model definition function. See 'module_file' for the required signature of the UDF. Exactly one of 'module_file' or 'tuner_fn' must be supplied. model: Optional Channel of type `standard_artifacts.Model` for result of best model. best_hyperparameters: Optional Channel of type `standard_artifacts.HyperParameters` for result of the best hparams. instance_name: Optional unique instance name. Necessary if multiple Tuner components are declared in the same pipeline. """ if bool(module_file) == bool(tuner_fn): raise ValueError( "Exactly one of 'module_file' or 'tuner_fn' must be supplied") model = model or types.Channel(type=standard_artifacts.Model, artifacts=[standard_artifacts.Model()]) best_hyperparameters = best_hyperparameters or types.Channel( type=standard_artifacts.HyperParameters, artifacts=[standard_artifacts.HyperParameters()]) spec = TunerSpec(examples=examples, schema=schema, module_file=module_file, tuner_fn=tuner_fn, model_export_path=model, best_hyperparameters=best_hyperparameters) super(Tuner, self).__init__(spec=spec, instance_name=instance_name)
def testDoWithHyperParameters(self): hp_artifact = standard_artifacts.HyperParameters() hp_artifact.uri = os.path.join(self._output_data_dir, 'hyperparameters/') # TODO(jyzhao): use real kerastuner.HyperParameters instead of dict. hyperparameters = {} hyperparameters['first_dnn_layer_size'] = 100 hyperparameters['num_dnn_layers'] = 4 hyperparameters['dnn_decay_factor'] = 0.7 io_utils.write_string_file( os.path.join(hp_artifact.uri, 'hyperparameters.txt'), json.dumps(hyperparameters)) self._input_dict[executor.HYPERPARAMETERS_KEY] = [hp_artifact] self._exec_properties['module_file'] = self._module_file self._do(self._trainer_executor) self._verify_model_exports()
def __init__(self, examples: types.Channel = None, schema: Optional[types.Channel] = None, transform_graph: Optional[types.Channel] = None, module_file: Optional[str] = None, tuner_fn: Optional[str] = None, train_args: trainer_pb2.TrainArgs = None, eval_args: trainer_pb2.EvalArgs = None, tune_args: Optional[tuner_pb2.TuneArgs] = None, custom_config: Optional[Dict[str, Any]] = None, metalearning_algorithm: Optional[str] = None, warmup_hyperparameters: Optional[types.Channel] = None, metamodel: Optional[types.Channel] = None, metafeature: Optional[types.Channel] = None, best_hyperparameters: Optional[types.Channel] = None, instance_name: Optional[str] = None): """Constructs custom Tuner component that stores trial learning curve. Adapted from the following code: https://github.com/tensorflow/tfx/blob/master/tfx/components/tuner/component.py Args: examples: A Channel of type `standard_artifacts.Examples`, serving as the source of examples that are used in tuning (required). schema: An optional Channel of type `standard_artifacts.Schema`, serving as the schema of training and eval data. This is used when raw examples are provided. transform_graph: An optional Channel of type `standard_artifacts.TransformGraph`, serving as the input transform graph if present. This is used when transformed examples are provided. module_file: A path to python module file containing UDF tuner definition. The module_file must implement a function named `tuner_fn` at its top level. The function must have the following signature. def tuner_fn(fn_args: FnArgs) -> TunerFnResult: Exactly one of 'module_file' or 'tuner_fn' must be supplied. tuner_fn: A python path to UDF model definition function. See 'module_file' for the required signature of the UDF. Exactly one of 'module_file' or 'tuner_fn' must be supplied. train_args: A trainer_pb2.TrainArgs instance, containing args used for training. Currently only splits and num_steps are available. Default behavior (when splits is empty) is train on `train` split. eval_args: A trainer_pb2.EvalArgs instance, containing args used for eval. Currently only splits and num_steps are available. Default behavior (when splits is empty) is evaluate on `eval` split. tune_args: A tuner_pb2.TuneArgs instance, containing args used for tuning. Currently only num_parallel_trials is available. custom_config: A dict which contains addtional training job parameters that will be passed into user module. metalearning_algorithm: Optional str for the type of metalearning_algorithm. warmup_hyperparameters: Optional Channel of type `artifacts.KCandidateHyperParameters` for a list of recommended search space for warm-starting the tuner (generally the output of a metalearning component or subpipeline). metamodel: Optional Channel of type `standard_artifacts.Model` for trained meta model metafeature: Optional Channel of `artifacts.MetaFeatures` of the dataset to be tuned. This is used as an input to the `meta_model` to predict search space. best_hyperparameters: Optional Channel of type `standard_artifacts.HyperParameters` for result of the best hparams. instance_name: Optional unique instance name. Necessary if multiple Tuner components are declared in the same pipeline. """ if bool(module_file) == bool(tuner_fn): raise ValueError( "Exactly one of 'module_file' or 'tuner_fn' must be supplied") best_hyperparameters = best_hyperparameters or types.Channel( type=standard_artifacts.HyperParameters, artifacts=[standard_artifacts.HyperParameters()]) trial_summary_plot = types.Channel(type=TunerData, artifacts=[TunerData()]) spec = AugmentedTunerSpec( examples=examples, schema=schema, transform_graph=transform_graph, module_file=module_file, tuner_fn=tuner_fn, train_args=train_args, eval_args=eval_args, tune_args=tune_args, metalearning_algorithm=metalearning_algorithm, warmup_hyperparameters=warmup_hyperparameters, metamodel=metamodel, metafeature=metafeature, best_hyperparameters=best_hyperparameters, trial_summary_plot=trial_summary_plot, custom_config=json_utils.dumps(custom_config), ) super(AugmentedTuner, self).__init__(spec=spec, instance_name=instance_name)