def test_regression(self): print('testing regression metric') regression_metric.R2Score().compute(self.reg_score, self.reg_label) regression_metric.MSE().compute(self.reg_score, self.reg_label) regression_metric.RMSE().compute(self.reg_score, self.reg_label) regression_metric.ExplainedVariance().compute(self.reg_score, self.reg_label) regression_metric.Describe().compute(self.reg_score)
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"} host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"} guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"} host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"} # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], guest_depth=param['guest_depth'], host_depth=param['host_depth'], tree_num_per_party=param['tree_num_per_party'], work_mode=param['work_mode'] ) hetero_fast_sbt_1 = HeteroFastSecureBoost(name="hetero_fast_sbt_1") # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(hetero_fast_sbt_1, data=Data(test_data=intersect_1.output.data), model=Model(hetero_fast_sbt_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) sbt_0_data = pipeline.get_component("hetero_fast_sbt_0").get_output_data().get("data") sbt_1_data = pipeline.get_component("hetero_fast_sbt_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = {"hetero_fast_sbt_train": desc_sbt_0, "hetero_fast_sbt_validate": desc_sbt_1} elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, metric_summary
def main(config="../../config.yaml", param="./sshe_linr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense", label_name=param["label_name"], label_type="float") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "optimizer": param["optimizer"], "learning_rate": param["learning_rate"], "init_param": param["init_param"], "batch_size": param["batch_size"], "alpha": param["alpha"], "early_stop": param["early_stop"], "reveal_strategy": param["reveal_strategy"], "tol": 1e-6, "reveal_every_iter": True } hetero_sshe_linr_0 = HeteroSSHELinR(name='hetero_sshe_linr_0', **param) hetero_sshe_linr_1 = HeteroSSHELinR(name='hetero_sshe_linr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="regression", metrics=[ "r2_score", "mean_squared_error", "root_mean_squared_error", "explained_variance" ]) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_sshe_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_sshe_linr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_sshe_linr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_sshe_linr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) data_linr_0 = extract_data( pipeline.get_component("hetero_sshe_linr_0").get_output_data().get( "data"), "predict_result") data_linr_1 = extract_data( pipeline.get_component("hetero_sshe_linr_1").get_output_data().get( "data"), "predict_result") desc_linr_0 = regression_metric.Describe().compute(data_linr_0) desc_linr_1 = regression_metric.Describe().compute(data_linr_1) metric_summary["script_metrics"] = { "linr_train": desc_linr_0, "linr_validate": desc_linr_1 } data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1") evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(test_data=dataio_1.output.data), model=Model(homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() sbt_0_data = pipeline.get_component( "homo_secureboost_0").get_output_data().get("data") sbt_1_data = pipeline.get_component( "homo_secureboost_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = { "sbt_train": desc_sbt_0, "sbt_validate": desc_sbt_1 } elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_validate_data["name"], "host": host_validate_data["name"] } } return data_summary, metric_summary