class PredictWineQualityParameterSearch(PipelineTask): data = data(default=test_data_csv).target alpha_step = parameter.value(0.3) l1_ratio_step = parameter.value(0.4) results = output def band(self): result = {} variants = list( itertools.product( np.arange(0, 1, self.alpha_step), np.arange(0, 1, self.l1_ratio_step) ) ) # variants = list(itertools.product([0.1, 0.5], [0.2, 0.3])) logger.info("All Variants: %s", variants) for alpha_value, l1_ratio in variants: predict = PredictWineQuality( data=self.data, alpha=alpha_value, l1_ratio=l1_ratio ) exp_name = "%f_%f" % (alpha_value, l1_ratio) result[exp_name] = (predict.model, predict.validation) self.results = result
class BPipeline(PipelineTask): tt = data(scope=ParameterScope.children) x = parameter(scope=ParameterScope.children)[str] some_a = output def band(self): self.some_a = A(task_name="A_%s" % self.x)
class TPipeline(PipelineTask): tdata = data(scope=ParameterScope.children) tstr = parameter(scope=ParameterScope.children)[str] some_a = output def band(self): self.some_a = simple_task()
class CPipeline(PipelineTask): tt = data(scope=ParameterScope.children) task_p1 = output some_a = output def band(self): self.task_p1 = BPipeline(task_name="B_x10", x=10) self.some_a = BPipeline(task_name="B_x20", x=20).some_a
class PrepareSaladAtSpark(PipelineTask): vegetables = data(default=data_repo.vegetables) dressing = parameter.value("oil") salad = output.data def band(self): s1 = CutAtSpark(vegetables=self.vegetables) self.salad = AddDressingAtSpark( chopped_vegetables=s1.chopped_vegetables, dressing=self.dressing)
class PrepareSalad(PipelineTask): vegetables = data(default=data_repo.vegetables).target dressing = parameter.value(default="oil", description="dressing for the salad") salad = output def band(self): s1 = Cut(vegetables=self.vegetables) self.salad = AddDressing(chopped_vegetables=s1.chopped_vegetables, dressing=self.dressing).salad
class TDataSource(DataSourceTask): partitioned_data_path = path.join(path.dirname(__file__), "data", "partitioned_data") root_location = data(default=partitioned_data_path) task_target_date = parameter()[datetime.date] logs = output def band(self): self.logs = target( self.root_location, "%s/" % self.task_target_date.strftime("%Y-%m-%d"), config=folder.without_flag(), )
class Experiment(PipelineTask): data = data(data_repo.wines) model = output validation_stats = output block1 = parameter.value(default="a") block2 = parameter.value(default="b") block3 = parameter.value(default="c") precision = parameter[int].default(32) def band(self): self.validation_stats = DataStats(raw_data=self.data).statistics transformed_data = TransformToTfRecords(raw_data=self.data) # transformed_data.set_upstream(validation_stats) self.model = TrainTfModel( data=transformed_data.tf_records, block1=self.block1, block2=self.block2, block3=self.block3, precision=self.precision, ).model
class TData(Task): t_param = data(default="foo") t_param2 = parameter.default(1)[int] t_param3 = parameter(default=False)[bool] t_output = output