def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_1.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_test_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) reader_1.get_party_instance( role='host', party_id=host).algorithm_param(table=host_test_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 param = { "with_label": True, "label_name": "y", "label_type": "int", "output_format": "dense", "missing_fill": True, "missing_fill_method": "mean", "outlier_replace": False, "outlier_replace_method": "designated", "outlier_replace_value": 0.66, "outlier_impute": "-9999" } # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(**param) # get and configure DataIO party instance of host dataio_1.get_party_instance(role='guest', party_id=guest).algorithm_param(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(**param) dataio_1.get_party_instance(role='host', party_id=host).algorithm_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0", intersect_method="raw") intersection_1 = Intersection(name="intersection_1", intersect_method="raw") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv", "init_bucket_method": "quantile" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "unique_value", "iv_filter", "statistic_filter"], "manually_param": { "filter_out_indexes": [1, 2], "filter_out_names": ["x3", "x4"] }, "unique_param": { "eps": 1e-6 }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.1] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, False], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') param = {"name": "hetero_scale_0", "method": "standard_scale"} hetero_scale_0 = FeatureScale(**param) hetero_scale_1 = FeatureScale(name='hetero_scale_1') param = { "penalty": "L2", "optimizer": "nesterov_momentum_sgd", "tol": 1e-4, "alpha": 0.01, "max_iter": 5, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "validation_freqs": None, "early_stopping_rounds": None } hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param) evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) pipeline.add_component( hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component( hetero_scale_1, data=Data(data=hetero_feature_selection_1.output.data), model=Model(hetero_scale_0.output.model)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=hetero_scale_0.output.data, validate_data=hetero_scale_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_test_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_test_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 param = { "with_label": True, "label_name": "y", "label_type": "int", "output_format": "dense", "missing_fill": True, "missing_fill_method": "mean", "outlier_replace": False, "outlier_replace_method": "designated", "outlier_replace_value": 0.66, "outlier_impute": "-9999" } # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(**param) # get and configure DataIO party instance of host dataio_1.get_party_instance(role='guest', party_id=guest).component_param(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } dataio_0.get_party_instance(role='host', party_id=host).component_param(**param) dataio_1.get_party_instance(role='host', party_id=host).component_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0", intersect_method="raw") intersection_1 = Intersection(name="intersection_1", intersect_method="raw") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv", "init_bucket_method": "quantile" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "unique_value", "iv_filter", "statistic_filter"], "manually_param": { "filter_out_indexes": [1, 2], "filter_out_names": ["x2", "x3"] }, "unique_param": { "eps": 1e-6 }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.1] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, False], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') param = { "task_type": "classification", "learning_rate": 0.1, "num_trees": 10, "subsample_feature_rate": 0.5, "n_iter_no_change": False, "tol": 0.0002, "bin_num": 50, "objective_param": { "objective": "cross_entropy" }, "encrypt_param": { "method": "paillier" }, "predict_param": { "threshold": 0.5 }, "tree_param": { "max_depth": 2 }, "cv_param": { "n_splits": 5, "shuffle": False, "random_seed": 103, "need_cv": False }, "validation_freqs": 2, "early_stopping_rounds": 5, "metrics": ["auc", "ks"] } hetero_secureboost_0 = HeteroSecureBoost(name='hetero_secureboost_0', **param) evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) # set train & validate data of hetero_secureboost_0 component pipeline.add_component( hetero_secureboost_0, data=Data(train_data=hetero_feature_selection_0.output.data, validate_data=hetero_feature_selection_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secureboost_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print(pipeline.get_component("hetero_secureboost_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data_0 = {"name": "breast_hetero_guest", "namespace": "experiment"} guest_train_data_1 = {"name": "breast_hetero_guest", "namespace": "experiment"} guest_test_data_0 = {"name": "breast_hetero_guest", "namespace": "experiment"} guest_test_data_1 = {"name": "breast_hetero_guest", "namespace": "experiment"} host_train_data_0 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"} host_train_data_1 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"} host_test_data_0 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"} host_test_data_1 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") reader_2 = Reader(name="reader_2") reader_3 = Reader(name="reader_3") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data_0) reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data_1) reader_2.get_party_instance(role='guest', party_id=guest).component_param(table=guest_test_data_0) reader_3.get_party_instance(role='guest', party_id=guest).component_param(table=guest_test_data_1) # configure Reader for host reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data_0) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data_1) reader_2.get_party_instance(role='host', party_id=host).component_param(table=host_test_data_0) reader_3.get_party_instance(role='host', party_id=host).component_param(table=host_test_data_1) param = { "name": "union_0", "keep_duplicate": True } union_0 = Union(**param) param = { "name": "union_1", "keep_duplicate": True } union_1 = Union(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=host).component_param(**param) dataio_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) dataio_1.get_party_instance(role='host', party_id=host).component_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_1 = Intersection(name="intersection_1") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "iv_filter", "statistic_filter"], "manually_param": { "filter_out_indexes": [1, 2], "filter_out_names": ["x2", "x3"] }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.01] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, True], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection(name='hetero_feature_selection_1') param = { "name": "hetero_scale_0", "method": "standard_scale" } hetero_scale_0 = FeatureScale(**param) hetero_scale_1 = FeatureScale(name='hetero_scale_1') param = { "penalty": "L2", "validation_freqs": None, "early_stopping_rounds": None, "max_iter": 5 } hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param) evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(reader_2) pipeline.add_component(reader_3) pipeline.add_component(union_0, data=Data(data=[reader_0.output.data, reader_1.output.data])) pipeline.add_component(union_1, data=Data(data=[reader_2.output.data, reader_3.output.data])) pipeline.add_component(dataio_0, data=Data(data=union_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=union_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[hetero_feature_binning_0.output.model, statistic_0.output.model])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model(hetero_feature_selection_0.output.model)) pipeline.add_component(hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component(hetero_scale_1, data=Data(data=hetero_feature_selection_1.output.data), model=Model(hetero_scale_0.output.model)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=hetero_scale_0.output.data, validate_data=hetero_scale_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment_sid{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment_sid{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).\ set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_match_id=True) data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") feature_scale_0 = FeatureScale(name='feature_scale_0', method="standard_scale", need_run=True) binning_param = { "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "adjustment_factor": 0.5, "local_only": False, "need_run": True, "transform_param": { "transform_cols": -1, "transform_type": "bin_num" } } hetero_feature_binning_0 = HeteroFeatureBinning( name='hetero_feature_binning_0', **binning_param) statistic_0 = DataStatistics(name='statistic_0', statistics=["95%"]) pearson_0 = HeteroPearson(name='pearson_0', column_indexes=-1) onehot_0 = OneHotEncoder(name='onehot_0') selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "manually", "unique_value", "iv_filter", "coefficient_of_variation_value_thres", "outlier_cols" ], "manually_param": { "filter_out_indexes": [0, 1, 2], "filter_out_names": ["x3"] }, "unique_param": { "eps": 1e-06 }, "iv_param": { "metrics": ["iv", "iv", "iv"], "filter_type": ["threshold", "top_k", "top_percentile"], "threshold": [0.001, 100, 0.99] }, "variance_coe_param": { "value_threshold": 0.3 }, "outlier_param": { "percentile": 0.95, "upper_threshold": 2.0 } } hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) lr_param = { "name": "hetero_lr_0", "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": 320, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "sqn_param": { "update_interval_L": 3, "memory_M": 5, "sample_size": 5000, "random_seed": None }, "cv_param": { "n_splits": 5, "shuffle": False, "random_seed": 103, "need_cv": False } } hetero_lr_0 = HeteroLR(**lr_param) evaluation_0 = Evaluation(name='evaluation_0') pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(feature_scale_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component(pearson_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component( onehot_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=onehot_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"} # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense') # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True) # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) statistic_param = { "name": "statistic_0", "statistics": ["95%", "coefficient_of_variance", "stddev"], "column_indexes": -1, "column_names": [] } statistic_0 = DataStatistics(**statistic_param) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("statistic_0").get_summary())
def make_normal_dsl(config, namespace, selection_param, is_multi_host=False, host_dense_output=True, **kwargs): parties = config.parties guest = parties.guest[0] if is_multi_host: hosts = parties.host else: hosts = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=hosts).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) last_cpn = intersection_0 selection_include_model = [] if 'binning_param' in kwargs: hetero_feature_binning_0 = HeteroFeatureBinning( **kwargs['binning_param']) pipeline.add_component(hetero_feature_binning_0, data=Data(data=last_cpn.output.data)) selection_include_model.append(hetero_feature_binning_0) # last_cpn = hetero_feature_binning_0 if 'statistic_param' in kwargs: # print(f"param: {kwargs['statistic_param']}, kwargs: {kwargs}") statistic_0 = DataStatistics(**kwargs['statistic_param']) pipeline.add_component(statistic_0, data=Data(data=last_cpn.output.data)) # last_cpn = statistic_0 selection_include_model.append(statistic_0) if 'psi_param' in kwargs: reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=hosts).component_param(table=host_eval_data) data_transform_1 = DataTransform(name="data_transform_1") intersection_1 = Intersection(name="intersection_1") pipeline.add_component(reader_1) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data)) psi_0 = PSI(**kwargs['psi_param']) pipeline.add_component(psi_0, data=Data( train_data=intersection_0.output.data, validate_data=intersection_1.output.data)) # last_cpn = statistic_0 selection_include_model.append(psi_0) if 'sbt_param' in kwargs: secureboost_0 = HeteroSecureBoost(**kwargs['sbt_param']) pipeline.add_component( secureboost_0, data=Data(train_data=intersection_0.output.data)) selection_include_model.append(secureboost_0) if "fast_sbt_param" in kwargs: fast_sbt_0 = HeteroFastSecureBoost(**kwargs['fast_sbt_param']) pipeline.add_component( fast_sbt_0, data=Data(train_data=intersection_0.output.data)) selection_include_model.append(fast_sbt_0) hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model( isometric_model=[x.output.model for x in selection_include_model])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() return pipeline