def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense", label_type="int", label_name="y") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0", intersect_method="rsa", sync_intersect_ids=True, only_output_key=False) hetero_lr_0 = HeteroLR(name="hetero_lr_0", penalty="L2", optimizer="nesterov_momentum_sgd", tol=0.0001, alpha=0.0001, max_iter=30, batch_size=-1, early_stop="diff", learning_rate=0.15, init_param={"init_method": "zeros"}) local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr"}) local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi", pos_label=1) evaluation_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(local_baseline_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data, local_baseline_0.output.data])) pipeline.compile() pipeline.fit() # predict pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0, local_baseline_0]) predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.data_transform_0.input.data: reader_0.output.data})) predict_pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data, local_baseline_0.output.data])) predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, label_name="doctorco", label_type="float", output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0", early_stop="weight_diff", max_iter=10, alpha=100.0, batch_size=-1, learning_rate=0.01, optimizer="rmsprop", exposure_colname="exposure", decay_sqrt=False, tol=0.001, init_param={"init_method": "zeros"}, penalty="L2", encrypted_mode_calculator_param={"mode": "fast"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) evaluation_0.get_party_instance(role='host', party_id=host).algorithm_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_poisson_0.output.data)) pipeline.compile() pipeline.fit(backend=backend, work_mode=work_mode) # predict # deploy required components pipeline.deploy_component([dataio_0, intersection_0, hetero_poisson_0]) predict_pipeline = PipeLine() # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component(pipeline, data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data})) # run predict model predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense", label_type="int", label_name="y") homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", optimizer="sgd", tol=0.0001, alpha=0.01, max_iter=30, batch_size=-1, early_stop="weight_diff", learning_rate=0.15, init_param={"init_method": "zeros"}) local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "saga", "max_iter": 2}) local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) evaluation_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(local_baseline_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data])) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "dvisits_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format="sparse") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, label_name="doctorco", label_type="float") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_poisson_0 = HeteroPoisson( name="hetero_poisson_0", early_stop="weight_diff", max_iter=2, alpha=100.0, batch_size=-1, learning_rate=0.01, exposure_colname="exposure", optimizer="rmsprop", penalty="L2", decay_sqrt=False, tol=0.001, init_param={"init_method": "zeros"}, ) evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_poisson_0.output.data)) pipeline.compile() pipeline.fit()
def main(): # parties config guest = 9999 host = 10000 arbiter = 10000 # specify input data name & namespace in database guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} guest_eval_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) # define DataTransform component data_transform_0 = DataTransform(name="data_transform_0") # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role="guest", party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") # define HeteroLR component hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="diff", learning_rate=0.15, optimizer="rmsprop", max_iter=10) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) # set train data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary import json print( json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4)) # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0]) # initiate predict pipeline predict_pipeline = PipeLine() # define new data reader reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_eval_data) # define evaluation component evaluation_0 = Evaluation(name="evaluation_0") evaluation_0.get_party_instance( role="guest", party_id=guest).component_param(need_run=True, eval_type="binary") evaluation_0.get_party_instance( role="host", party_id=host).component_param(need_run=False) # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data })) # add evaluation component to predict pipeline predict_pipeline.add_component( evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data)) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment_sid{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment_sid{namespace}" } guest_eval_data = { "name": "breast_homo_guest", "namespace": f"experiment_sid{namespace}" } host_eval_data = { "name": "breast_homo_host", "namespace": f"experiment_sid{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_eval_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0", with_match_id=True, with_label=True, output_format="dense") data_transform_1 = DataTransform( name="data_transform_1") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') scale_1 = FeatureScale(name='scale_1') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 320, "learning_rate": 0.15, "validation_freqs": 1, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(scale_1, data=Data(data=data_transform_1.output.data), model=Model(scale_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data, validate_data=scale_1.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_hetero_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_1 = DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role='guest', party_id=guest).component_param( with_label=True, output_format="dense") data_transform_1.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") union_0 = Union(name="union_0") hetero_lr_0 = HeteroLR(name="hetero_lr_0", max_iter=3, early_stop="weight_diff", optimizer="nesterov_momentum_sgd", tol=1E-4, alpha=0.01, learning_rate=0.15, init_param={"init_method": "random_uniform"}) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component( data_transform_1, data=Data( data=reader_1.output.data), model=Model( data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(union_0, data=Data(data=[intersect_0.output.data, intersect_1.output.data])) pipeline.add_component(hetero_lr_0, data=Data(train_data=union_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", param="./breast_lr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) """ guest = 9999 host = 10000 arbiter = 9999 backend = 0 work_mode = 1 param = {"penalty": "L2", "max_iter": 5} """ data_set = param.get("data_guest").split('/')[-1] if data_set == "default_credit_homo_guest.csv": guest_data_table = 'default_credit_guest' host_data_table = 'default_credit_host1' elif data_set == 'breast_homo_guest.csv': guest_data_table = 'breast_homo_guest' host_data_table = 'breast_homo_host' elif data_set == 'give_credit_homo_guest.csv': guest_data_table = 'give_credit_homo_guest' host_data_table = 'give_credit_homo_host' elif data_set == 'epsilon_5k_homo_guest.csv': guest_data_table = 'epsilon_5k_homo_guest' host_data_table = 'epsilon_5k_homo_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = { "name": guest_data_table, "namespace": f"experiment{namespace}" } host_train_data = { "name": host_data_table, "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=True) lr_param = {} config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param.get("optimizer", "sgd"), "batch_size": param.get("batch_size", -1), "init_param": { "init_method": param.get("init_method", 'random_uniform') }, "encrypt_param": { "method": None } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") homo_lr_0 = HomoLR(name='homo_lr_0', **lr_param) homo_lr_1 = HomoLR(name='homo_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(test_data=dataio_0.output.data), model=Model(homo_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } result_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) lr_0_data = pipeline.get_component("homo_lr_0").get_output_data().get( "data") lr_1_data = pipeline.get_component("homo_lr_1").get_output_data().get( "data") lr_0_score = extract_data(lr_0_data, "predict_result") lr_0_label = extract_data(lr_0_data, "label") lr_1_score = extract_data(lr_1_data, "predict_result") lr_1_label = extract_data(lr_1_data, "label") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label), "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( lr_0_score, lr_1_score, lr_0_label, lr_1_label) } result_summary["distribution_metrics"] = {"homo_lr": metric_lr} return data_summary, result_summary
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] # data sets guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } guest_validate_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_validate_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_validate_data) data_transform_0, data_transform_1 = DataTransform( name="data_transform_0"), DataTransform(name="data_transform_1") data_transform_0.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) data_transform_1.get_party_instance( role="guest", party_id=guest).component_param(with_label=True, output_format="dense") data_transform_1.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="classification", objective_param={"objective": "cross_entropy"}, encrypt_param={"method": "paillier"}, tree_param={"max_depth": 3}, validation_freqs=1) # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_1 = Evaluation(name="evaluation_1", eval_type="binary") # transformer transformer_0 = SBTTransformer(name='sbt_transformer_0', dense_format=True) # local baseline def get_local_baseline(idx): return LocalBaseline(name="local_baseline_{}".format(idx), model_name="LogisticRegression", model_opts={ "penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 50 }) local_baseline_0 = get_local_baseline(0) local_baseline_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) local_baseline_1 = get_local_baseline(1) local_baseline_1.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_1.get_party_instance( role='host', party_id=host).component_param(need_run=False) evaluation_1.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=data_transform_1.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component( transformer_0, data=Data(data=intersect_0.output.data), model=Model(isometric_model=hetero_secure_boost_0.output.model)) pipeline.add_component(local_baseline_0, data=Data(data=transformer_0.output.data)) pipeline.add_component(local_baseline_1, data=Data(data=intersect_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=local_baseline_0.output.data)) pipeline.add_component(evaluation_1, data=Data(data=local_baseline_1.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() deploy_components = [data_transform_0, scale_0, homo_lr_0] pipeline.deploy_component(components=deploy_components) # predict_pipeline = PipeLine() # # add data reader onto predict pipeline predict_pipeline.add_component(reader_0) # # add selected components from train pipeline onto predict pipeline # # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) predict_pipeline.compile() predict_pipeline.predict() dsl_json = predict_pipeline.get_predict_dsl() conf_json = predict_pipeline.get_predict_conf() # import json json.dump(dsl_json, open('./h**o-lr-normal-predict-dsl.json', 'w'), indent=4) json.dump(conf_json, open('./h**o-lr-normal-predict-conf.json', 'w'), indent=4) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } host_train_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } guest_eval_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } host_eval_data = { "name": "mock_string", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_eval_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0", with_label=True, output_format="dense", label_name='y', data_type="str") # start component numbering at 0 data_transform_1 = DataTransform(name="data_transform_1") homo_onehot_param = { "transform_col_indexes": -1, "transform_col_names": [], "need_alignment": True } homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', **homo_onehot_param) homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1') scale_0 = FeatureScale(name='scale_0', method="standard_scale") scale_1 = FeatureScale(name='scale_1') homo_lr_param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 3, "early_stop": "diff", "batch_size": 500, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": "Paillier" }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param) homo_lr_1 = HomoLR(name='homo_lr_1') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data_transform_1 to replicate model from data_transform_0 pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model)) pipeline.add_component(homo_onehot_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_onehot_1, data=Data(data=data_transform_1.output.data), model=Model(homo_onehot_0.output.model)) pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(scale_1, data=Data(data=homo_onehot_1.output.data), model=Model(scale_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(test_data=scale_1.output.data), model=Model(homo_lr_0.output.model)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data])) pipeline.compile() # fit model pipeline.fit() # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))