def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense") # start component numbering at 0 homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=1000) homo_binning_1 = HomoFeatureBinning(name='homo_binning_1', sample_bins=1000) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(homo_binning_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_binning_1, data=Data(data=dataio_0.output.data), model=Model(model=homo_binning_0.output.model)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "tag_value_1000_140", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False, input_format="tag", tag_with_value=True) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "vehicle_scale_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "vehicle_scale_homo_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0 = DataIO(name="dataio_0") reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={"max_depth": 3}, cv_param={ "need_cv": True, "shuffle": False, "n_splits": 5 }) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["manually"], "manually_param": { "filter_out_indexes": None, "filter_out_names": None, "left_col_indexes": [0, 1, 2], "left_col_names": ["x3"] } } pipeline = common_tools.make_normal_dsl(config, namespace, selection_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode param = { "name": "hetero_feature_binning_0", "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } pipeline = common_tools.make_add_one_hot_dsl(config, namespace, param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode lr_param = { "name": "hetero_lr_0", "penalty": "L2", "optimizer": "nesterov_momentum_sgd", "tol": 1e-05, "alpha": 0.0001, "max_iter": 10, "early_stop": "diff", "multi_class": "ovr", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" } } pipeline = common_tools.make_normal_dsl(config, namespace, lr_param, is_ovr=True) # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary common_tools.prettify(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode binning_param = { "name": 'hetero_feature_binning_0', "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["manually", "iv_value_thres", "iv_percentile"], "manually_param": { "filter_out_indexes": [], "filter_out_names": [] }, "unique_param": { "eps": 1e-06 }, "iv_value_param": { "value_threshold": 0.1 }, "iv_percentile_param": { "percentile_threshold": 0.9 }, "variance_coe_param": { "value_threshold": 0.3 }, "outlier_param": { "percentile": 0.95, "upper_threshold": 2.0 } } pipeline = common_tools.make_single_predict_pipeline( config, namespace, selection_param, binning_param=binning_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=False, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=False, output_format="dense") param = { "intersect_method": "raw", "sync_intersect_ids": True, "join_role": "host", "with_encode": True, "only_output_key": True, "encode_params": { "encode_method": "sm3", "salt": "12345", "base64": False } } intersect_0 = Intersection(name="intersect_0", **param) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,) # set data reader and data-io reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost(name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": 3}, validation_freqs=1, cv_param={ "need_cv": True, "n_splits": 5, "shuffle": False, "random_seed": 103 } ) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def run_homo_nn_pipeline(config, namespace, data: dict, nn_component, num_host): if isinstance(config, str): config = load_job_config(config) guest_train_data = data["guest"] host_train_data = data["host"][:num_host] for d in [guest_train_data, *host_train_data]: d["namespace"] = f"{d['namespace']}{namespace}" hosts = config.parties.host[:num_host] pipeline = (PipeLine().set_initiator( role="guest", party_id=config.parties.guest[0]).set_roles( guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role="guest", party_id=config.parties.guest[0]).component_param( table=guest_train_data) for i in range(num_host): reader_0.get_party_instance( role="host", party_id=hosts[i]).component_param(table=host_train_data[i]) dataio_0 = DataIO(name="dataio_0", with_label=True) dataio_0.get_party_instance( role="guest", party_id=config.parties.guest[0]).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=hosts).component_param(with_label=True) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(nn_component, data=Data(train_data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode) pipeline.fit(job_parameters) print(pipeline.get_component("homo_nn_0").get_summary()) pipeline.deploy_component([dataio_0, nn_component]) # predict predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data }), ) # run predict model predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, missing_fill=True, outlier_replace=True) dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False, missing_fill=True, outlier_replace=True) intersection_0 = Intersection(name="intersection_0") federated_sample_0 = FederatedSample(name="federated_sample_0", mode="stratified", method="upsample", fractions=[[0, 1.5], [1, 2.0]]) feature_scale_0 = FeatureScale(name="feature_scale_0", method="min_max_scale", mode="normal") feature_scale_0.get_party_instance(role='guest', party_id=guest).component_param(feat_upper=[1, 2, 1, 1, 0.5, 1, 2, 2, 1, 1]) hetero_feature_binning_0 = HeteroFeatureBinning(name="hetero_feature_binning_0") hetero_feature_selection_0 = HeteroFeatureSelection(name="hetero_feature_selection_0") one_hot_0 = OneHotEncoder(name="one_hot_0") hetero_lr_0 = HeteroLR(name="hetero_lr_0", penalty="L2", optimizer="rmsprop", tol=1e-5, init_param={"init_method": "random_uniform"}, alpha=0.01, max_iter=10, early_stop="diff", batch_size=320, learning_rate=0.15) evaluation_0 = Evaluation(name="evaluation_0") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(federated_sample_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(feature_scale_0, data=Data(data=federated_sample_0.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=feature_scale_0.output.data)) pipeline.add_component(hetero_feature_selection_0, data=Data(data=hetero_feature_binning_0.output.data)) pipeline.add_component(one_hot_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=one_hot_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense", label_name="y", label_type="int") dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=True) homo_data_split_0 = HomoDataSplit(name="homo_data_split_0", stratified=True, test_size=0.3, validate_size=0.2) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_data_split_0, data=Data(data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print(pipeline.get_component("homo_data_split_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) # define ColumnExpand components column_expand_0 = ColumnExpand(name="column_expand_0") column_expand_0.get_party_instance( role="guest", party_id=guest).component_param( need_run=True, method="manual", append_header=["x_0", "x_1", "x_2", "x_3"], fill_value=[0, 0.2, 0.5, 1]) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest", party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(column_expand_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_0, data=Data(data=column_expand_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode pipeline = make_normal_dsl(config, namespace) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary()) common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "ionosphere_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "ionosphere_scale_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, label_name="LABEL", missing_fill=True, missing_fill_method="mean", outlier_replace=True) data_transform_0.get_party_instance(role='host', party_id=host).component_param( with_label=False, missing_fill=True, missing_fill_method="designated", default_value=0, outlier_replace=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=hosts, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="y", label_type="int", output_format="dense") dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") sample_weight_0 = SampleWeight(name="sample_weight_0") sample_weight_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True, class_weight="balanced") sample_weight_0.get_party_instance(role='host', party_id=hosts).component_param(need_run=False) hetero_lr_0 = HeteroLR(name="hetero_lr_0", optimizer="nesterov_momentum_sgd", tol=0.001, alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, init_param={"init_method": "zeros"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(sample_weight_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=sample_weight_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode fast_sbt_param = { "name": "fast_secureboost_0", "task_type": "classification", "learning_rate": 0.1, "num_trees": 4, "subsample_feature_rate": 1, "n_iter_no_change": False, "work_mode": "layered", "guest_depth": 2, "host_depth": 3, "tol": 0.0001, "bin_num": 50, "metrics": ["Recall", "ks", "auc", "roc"], "objective_param": { "objective": "cross_entropy" }, "encrypt_param": { "method": "iterativeAffine" }, "predict_param": { "threshold": 0.5 }, "validation_freqs": 1 } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "hetero_fast_sbt_filter" ], "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 }} pipeline = common_tools.make_normal_dsl(config, namespace, selection_param, fast_sbt_param=fast_sbt_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify(pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] backend = config.backend work_mode = config.work_mode guest_train_data = [{ "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" }, { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" }] pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[0]) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[1]) union_0 = Union(name="union_0", allow_missing=False, keep_duplicate=True) dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense", label_name="y", missing_fill=False, outlier_replace=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component( union_0, data=Data(data=[reader_0.output.data, reader_1.output.data])) pipeline.add_component(dataio_0, data=Data(data=union_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode binning_param = { "name": 'hetero_feature_binning_0', "method": "quantile", "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": ["iv_value_thres", "iv_filter"], "iv_value_param": { "value_threshold": 1, "local_only": True }, "iv_top_k_param": { "k": 7, "local_only": False } } pipeline = common_tools.make_normal_dsl(config, namespace, selection_param, binning_param=binning_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) common_tools.prettify( pipeline.get_component("hetero_feature_selection_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "nus_wide_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "nus_wide_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0', epochs=10, alpha=1, batch_size=-1, mode='plain', communication_efficient=True, local_round=5) hetero_ftl_0.add_nn_layer(Dense(units=32, activation='sigmoid', kernel_initializer=initializers.RandomNormal(stddev=1.0, dtype="float32"), bias_initializer=initializers.Zeros())) hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01)) evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(hetero_ftl_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) """
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed", label_type="float", output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_data_split_0 = HeteroDataSplit(name="hetero_data_split_0", stratified=True, test_size=0.3, validate_size=0.2, split_points=[0.0, 0.2]) hetero_linr_0 = HeteroLinR(name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001, alpha=0.01, max_iter=10, early_stop="weight_diff", batch_size=-1, learning_rate=0.15, decay=0.0, decay_sqrt=False, init_param={"init_method": "zeros"}, encrypted_mode_calculator_param={"mode": "fast"}) hetero_linr_1 = HeteroLinR() pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_data_split_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(hetero_linr_0, data=Data(train_data=hetero_data_split_0.output.data.train_data, validate_data=hetero_data_split_0.output.data.validate_data)) pipeline.add_component(hetero_linr_1, data=Data(test_data=hetero_data_split_0.output.data.test_data), model=Model(model=hetero_linr_0.output.model)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode lr_param = { "name": "hetero_lr_0", "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "validation_freqs": 1, "early_stopping_rounds": 3, "metrics": [], "use_first_metric_only": False, "init_param": { "init_method": "zeros" }, "sqn_param": { "update_interval_L": 3, "memory_M": 5, "sample_size": 5000, "random_seed": None }, "cv_param": { "n_splits": 5, "shuffle": False, "random_seed": 103, "need_cv": False } } pipeline = common_tools.make_normal_dsl(config, namespace, lr_param, has_validate=True) # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary common_tools.prettify(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode lr_param = { "name": "hetero_lr_0", "penalty": "L2", "optimizer": "rmsprop", "tol": 0.0001, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": 320, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "sqn_param": { "update_interval_L": 3, "memory_M": 5, "sample_size": 5000, "random_seed": None }, "cv_param": { "n_splits": 5, "shuffle": False, "random_seed": 103, "need_cv": False } } pipeline = common_tools.make_normal_dsl(config, namespace, lr_param) # dsl_json = predict_pipeline.get_predict_dsl() # conf_json = predict_pipeline.get_predict_conf() # import json # json.dump(dsl_json, open('./hetero-lr-normal-predict-dsl.json', 'w'), indent=4) # json.dump(conf_json, open('./hetero-lr-normal-predict-conf.json', 'w'), indent=4) # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary common_tools.prettify(pipeline.get_component("hetero_lr_0").get_summary()) common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense", label_type="int", label_name="y") homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", optimizer="sgd", tol=0.0001, alpha=0.01, max_iter=30, batch_size=-1, early_stop="weight_diff", learning_rate=0.15, init_param={"init_method": "zeros"}) local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "saga", "max_iter": 2}) local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) evaluation_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(local_baseline_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data])) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "expect", "namespace": f"experiment{namespace}"} host_train_data = {"name": "actual", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_1 = DataIO(name="dataio_1") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=False, output_format="dense") dataio_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=False, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param(with_label=False, output_format="dense") psi_0 = PSI(name='psi_0', max_bin_num=20) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(psi_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode param = { "method": "quantile", "optimal_binning_param": { "metric_method": "gini", "min_bin_pct": 0.05, "max_bin_pct": 0.8, "init_bucket_method": "quantile", "init_bin_nums": 100, "mixture": True }, "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } guest_param = copy.deepcopy(param) guest_param["method"] = 'quantile' host_param = copy.deepcopy(param) host_param["method"] = 'optimal' pipeline = common_tools.make_asymmetric_dsl(config, namespace, guest_param=guest_param, host_param=host_param) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def run_pearson_pipeline(config, namespace, data, common_param=None, guest_only_param=None, host_only_param=None): if isinstance(config, str): config = load_job_config(config) guest_data = data["guest"] host_data = data["host"][0] guest_data["namespace"] = f"{guest_data['namespace']}{namespace}" host_data["namespace"] = f"{host_data['namespace']}{namespace}" pipeline = PipeLine() \ .set_initiator(role='guest', party_id=config.parties.guest[0]) \ .set_roles(guest=config.parties.guest[0], host=config.parties.host[0]) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=config.parties.guest[0]).component_param(table=guest_data) reader_0.get_party_instance(role='host', party_id=config.parties.host[0]).component_param(table=host_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \ .component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=config.parties.host[0]).component_param(with_label=False) intersect_0 = Intersection(name="intersection_0") if common_param is None: common_param = {} hetero_pearson_component = HeteroPearson(name="hetero_pearson_0", **common_param) if guest_only_param: hetero_pearson_component.get_party_instance("guest", config.parties.guest[0]).component_param(**guest_only_param) if host_only_param: hetero_pearson_component.get_party_instance("host", config.parties.host[0]).component_param(**host_only_param) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_pearson_component, data=Data(train_data=intersect_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode) pipeline.fit(job_parameters) return pipeline
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode param = { "name": "hetero_feature_binning_0", "method": "optimal", "optimal_binning_param": { "metric_method": "iv", "min_bin_pct": 0.05, "max_bin_pct": 0.8, "init_bucket_method": "quantile", "init_bin_nums": 100, "mixture": False }, "compress_thres": 10000, "head_size": 10000, "error": 0.001, "bin_num": 10, "bin_indexes": -1, "bin_names": None, "category_indexes": None, "category_names": None, "adjustment_factor": 0.5, "local_only": False, "transform_param": { "transform_cols": -1, "transform_names": None, "transform_type": "bin_num" } } pipeline = common_tools.make_normal_dsl(config, namespace, is_multi_host=True, bin_param=param, host_dense_output=False) job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) backend = config.backend work_mode = config.work_mode lr_param = { "name": "hetero_lr_0", "penalty": "L2", "optimizer": "nesterov_momentum_sgd", "tol": 0.0001, "alpha": 0.01, "max_iter": 10, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "random_uniform" }, "cv_param": { "n_splits": 3, "shuffle": False, "random_seed": 103, "need_cv": True } } pipeline = common_tools.make_normal_dsl(config, namespace, lr_param, is_dense=False, need_evaluation=False) # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary common_tools.prettify(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))