class PredictIrisType(PipelineTask): batch_size = parameter.default(100)[int] train_steps = parameter.default(1000)[int] train_set_url = parameter( config_path=ConfigPath("dbnd_examples", "iris_train_url"))[str] test_set_url = parameter( config_path=ConfigPath("dbnd_examples", "iris_test_url"))[str] report = output def band(self): train = DownloadKeras(url=self.train_set_url) test = DownloadKeras(url=self.test_set_url) data = PrepareTestAndValidationData(raw_data=test.downloaded) model = TrainIrisModel( train_set=train.downloaded, test_set=data.test, batch_size=self.batch_size, train_steps=self.train_steps, ) self.report = ValidateIrisModel(model=model.model, validation_set=data.validation).report
def test_prepare_data_parameter_decorator(self): #### DOC START @task(data=parameter.default(None)[DataFrame]) def prepare_data(data) -> DataFrame: return data #### DOC END prepare_data.dbnd_run(data=data_repo.wines)
class DataSplit(PythonTask): splits_count = parameter.default(2) splits = output.csv.folder(output_factory=_get_all_splits) def run(self): for key, split in six.iteritems(self.splits): logging.info("writing split") split.write("split_%s" % key)
class TrainModel(MXNetTask): learning_rate = parameter.default(0.1)[float] optimizer = parameter.default("sgd") epoch_num = parameter[int].default(10) training_data = data training_labels = data test_data = data test_labels = data model = output def main(self, ctx): train_iter = self.to_ndarray_iterator(self.training_data, self.training_labels, shuffle=True) val_iter = self.to_ndarray_iterator(self.test_data, self.test_labels) # create a trainable module on compute context mlp_model = mx.mod.Module(symbol=create_mlp_digit_classifier(), context=ctx) mlp_model.fit( train_iter, # train data eval_data=val_iter, # validation data optimizer=self.optimizer, # use SGD to train optimizer_params={"learning_rate": self.learning_rate}, # use fixed learning rate eval_metric="acc", # report accuracy during training batch_end_callback=mx.callback.Speedometer(self.batch_size), # output progress for each 100 data batches num_epoch=self.epoch_num, ) # train for at most 10 dataset passes # mlp_model.save_checkpoint("mnist", self.epoch_num) mlp_model.save_params(self.model.path)
class TData(Task): t_param = data(default="foo") t_param2 = parameter.default(1)[int] t_param3 = parameter(default=False)[bool] t_output = output
def prepare_data(data=parameter.default(None)[DataFrame]) -> DataFrame: return data