def main(config="../../config.yaml", param="./lr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) data_set = param.get("data_guest").split('/')[-1] if data_set == "default_credit_hetero_guest.csv": guest_data_table = 'default_credit_hetero_guest' host_data_table = 'default_credit_hetero_host' elif data_set == 'breast_hetero_guest.csv': guest_data_table = 'breast_hetero_guest' host_data_table = 'breast_hetero_host' elif data_set == 'give_credit_hetero_guest.csv': guest_data_table = 'give_credit_hetero_guest' host_data_table = 'give_credit_hetero_host' elif data_set == 'epsilon_5k_hetero_guest.csv': guest_data_table = 'epsilon_5k_hetero_guest' host_data_table = 'epsilon_5k_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = {"name": guest_data_table, "namespace": f"experiment{namespace}"} host_train_data = {"name": host_data_table, "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform(name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "shuffle": False, "masked_rate": 0, "early_stop": "diff", "tol": 1e-5, "floating_point_precision": param.get("floating_point_precision"), "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters() pipeline.fit(job_parameters) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get("data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get("data") lr_0_score = extract_data(lr_0_data, "predict_result") lr_0_label = extract_data(lr_0_data, "label") lr_1_score = extract_data(lr_1_data, "predict_result") lr_1_label = extract_data(lr_1_data, "label") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) result_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label), "ks_2samp": classification_metric.KSTest.compute(lr_0_score, lr_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(lr_0_score, lr_1_score, lr_0_label, lr_1_label)} result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, result_summary
def main(config="../../config.yaml", param="./vechile_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode if isinstance(param, str): param = JobConfig.load_from_file(param) assert isinstance(param, dict) """ guest = 9999 host = 10000 arbiter = 9999 backend = 0 work_mode = 1 param = {"penalty": "L2", "max_iter": 5} """ data_set = param.get("data_guest").split('/')[-1] if data_set == "vehicle_scale_hetero_guest.csv": guest_data_table = 'vehicle_scale_hetero_guest' host_data_table = 'vehicle_scale_hetero_host' else: raise ValueError(f"Cannot recognized data_set: {data_set}") guest_train_data = { "name": guest_data_table, "namespace": f"experiment{namespace}" } host_train_data = { "name": host_data_table, "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") lr_param = { "validation_freqs": None, "early_stopping_rounds": None, } config_param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "alpha": param["alpha"], "learning_rate": param["learning_rate"], "optimizer": param["optimizer"], "batch_size": param["batch_size"], "early_stop": "diff", "init_param": { "init_method": param.get("init_method", 'random_uniform'), "random_seed": param.get("random_seed", 103) } } lr_param.update(config_param) print(f"lr_param: {lr_param}, data_set: {data_set}") hetero_lr_0 = HeteroLR(name='hetero_lr_0', **lr_param) hetero_lr_1 = HeteroLR(name='hetero_lr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_lr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_lr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary result_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) lr_0_data = pipeline.get_component("hetero_lr_0").get_output_data().get( "data") lr_1_data = pipeline.get_component("hetero_lr_1").get_output_data().get( "data") lr_0_score_label = extract_data(lr_0_data, "predict_result", keep_id=True) lr_1_score_label = extract_data(lr_1_data, "predict_result", keep_id=True) metric_lr = { "score_diversity_ratio": classification_metric.Distribution.compute(lr_0_score_label, lr_1_score_label) } result_summary["distribution_metrics"] = {"hetero_lr": metric_lr} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, result_summary
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"} host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"} guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"} host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"} # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,) # set data reader and data-io reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1") reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data) reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_validate_data) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1") dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False) dataio_1.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense") dataio_1.get_party_instance(role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") intersect_1 = Intersection(name="intersection_1") # secure boost component hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], guest_depth=param['guest_depth'], host_depth=param['host_depth'], tree_num_per_party=param['tree_num_per_party'], work_mode=param['work_mode'] ) hetero_fast_sbt_1 = HeteroFastSecureBoost(name="hetero_fast_sbt_1") # evaluation component evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data, validate_data=intersect_1.output.data)) pipeline.add_component(hetero_fast_sbt_1, data=Data(test_data=intersect_1.output.data), model=Model(hetero_fast_sbt_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) sbt_0_data = pipeline.get_component("hetero_fast_sbt_0").get_output_data().get("data") sbt_1_data = pipeline.get_component("hetero_fast_sbt_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = {"hetero_fast_sbt_train": desc_sbt_0, "hetero_fast_sbt_validate": desc_sbt_1} elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label)} metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, metric_summary
def main(config="../../config.yaml", param="./sshe_linr_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] if isinstance(param, str): param = JobConfig.load_from_file(param) guest_train_data = { "name": "motor_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "motor_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0") # start component numbering at 0 # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense", label_name=param["label_name"], label_type="float") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) # define Intersection component intersection_0 = Intersection(name="intersection_0") param = { "penalty": param["penalty"], "max_iter": param["max_iter"], "optimizer": param["optimizer"], "learning_rate": param["learning_rate"], "init_param": param["init_param"], "batch_size": param["batch_size"], "alpha": param["alpha"], "early_stop": param["early_stop"], "reveal_strategy": param["reveal_strategy"], "tol": 1e-6, "reveal_every_iter": True } hetero_sshe_linr_0 = HeteroSSHELinR(name='hetero_sshe_linr_0', **param) hetero_sshe_linr_1 = HeteroSSHELinR(name='hetero_sshe_linr_1') evaluation_0 = Evaluation(name='evaluation_0', eval_type="regression", metrics=[ "r2_score", "mean_squared_error", "root_mean_squared_error", "explained_variance" ]) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_sshe_linr_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_sshe_linr_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_sshe_linr_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_sshe_linr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) data_linr_0 = extract_data( pipeline.get_component("hetero_sshe_linr_0").get_output_data().get( "data"), "predict_result") data_linr_1 = extract_data( pipeline.get_component("hetero_sshe_linr_1").get_output_data().get( "data"), "predict_result") desc_linr_0 = regression_metric.Describe().compute(data_linr_0) desc_linr_1 = regression_metric.Describe().compute(data_linr_1) metric_summary["script_metrics"] = { "linr_train": desc_linr_0, "linr_validate": desc_linr_1 } data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_train_data["name"], "host": host_train_data["name"] } } return data_summary, metric_summary
def main(config="../../config.yaml", param="./hetero_nn_breast_config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = {"name": param["guest_table_name"], "namespace": f"experiment{namespace}"} host_train_data = {"name": param["host_table_name"], "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance(role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=param["epochs"], interactive_layer_lr=param["learning_rate"], batch_size=param["batch_size"], early_stop="diff") hetero_nn_0.add_bottom_model(Dense(units=param["bottom_layer_units"], input_shape=(10,), activation="tanh", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.set_interactve_layer( Dense(units=param["interactive_layer_units"], input_shape=(param["bottom_layer_units"],), activation="relu", kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) hetero_nn_0.add_top_model( Dense(units=param["top_layer_units"], input_shape=(param["interactive_layer_units"],), activation=param["top_act"], kernel_initializer=initializers.RandomUniform(minval=-1, maxval=1, seed=123))) opt = getattr(optimizers, param["opt"])(lr=param["learning_rate"]) hetero_nn_0.compile(optimizer=opt, metrics=param["metrics"], loss=param["loss"]) hetero_nn_1 = HeteroNN(name="hetero_nn_1") if param["loss"] == "categorical_crossentropy": eval_type = "multi" else: eval_type = "binary" evaluation_0 = Evaluation(name="evaluation_0", eval_type=eval_type) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data), model=Model(hetero_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() pipeline.fit() nn_0_data = pipeline.get_component("hetero_nn_0").get_output_data().get("data") nn_1_data = pipeline.get_component("hetero_nn_1").get_output_data().get("data") nn_0_score = extract_data(nn_0_data, "predict_result") nn_0_label = extract_data(nn_0_data, "label") nn_1_score = extract_data(nn_1_data, "predict_result") nn_1_label = extract_data(nn_1_data, "label") nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True) nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) if eval_type == "binary": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label), "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label, nn_1_label)} metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn} elif eval_type == "multi": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)} metric_summary["distribution_metrics"] = {"hetero_nn": metric_nn} data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]}, "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]} } return data_summary, metric_summary
def main(config="../../config.yaml", param="param_conf.yaml", namespace=""): num_host = 1 if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) epoch = param["epoch"] lr = param["lr"] batch_size = param.get("batch_size", -1) optimizer_name = param.get("optimizer", "Adam") encode_label = param.get("encode_label", True) loss = param.get("loss", "categorical_crossentropy") metrics = param.get("metrics", ["accuracy"]) layers = param["layers"] data = getattr(dataset, param.get("dataset", "vehicle")) guest_train_data = data["guest"] host_train_data = data["host"][:num_host] for d in [guest_train_data, *host_train_data]: d["namespace"] = f"{d['namespace']}{namespace}" hosts = config.parties.host[:num_host] pipeline = PipeLine() \ .set_initiator(role='guest', party_id=config.parties.guest[0]) \ .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=config.parties.guest[0]).component_param(table=guest_train_data) for i in range(num_host): reader_0.get_party_instance(role='host', party_id=hosts[i]) \ .component_param(table=host_train_data[i]) dataio_0 = DataIO(name="dataio_0", with_label=True) dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \ .component_param(with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True) homo_nn_0 = HomoNN(name="homo_nn_0", encode_label=encode_label, max_iter=epoch, batch_size=batch_size, early_stop={"early_stop": "diff", "eps": 0.0}) for layer_config in layers: layer = getattr(tensorflow.keras.layers, layer_config["name"]) layer_params = layer_config["params"] homo_nn_0.add(layer(**layer_params)) homo_nn_0.compile(optimizer=getattr(optimizers, optimizer_name)(learning_rate=lr), metrics=metrics, loss=loss) homo_nn_1 = HomoNN(name="homo_nn_1") if param["loss"] == "categorical_crossentropy": eval_type = "multi" else: eval_type = "binary" evaluation_0 = Evaluation(name='evaluation_0', eval_type="multi", metrics=["accuracy", "precision", "recall"]) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_nn_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(homo_nn_1, data=Data(test_data=dataio_0.output.data), model=Model(homo_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=homo_nn_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode) pipeline.fit(job_parameters) metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary()) nn_0_data = pipeline.get_component("homo_nn_0").get_output_data().get("data") nn_1_data = pipeline.get_component("homo_nn_1").get_output_data().get("data") nn_0_score = extract_data(nn_0_data, "predict_result") nn_0_label = extract_data(nn_0_data, "label") nn_1_score = extract_data(nn_1_data, "predict_result") nn_1_label = extract_data(nn_1_data, "label") nn_0_score_label = extract_data(nn_0_data, "predict_result", keep_id=True) nn_1_score_label = extract_data(nn_1_data, "predict_result", keep_id=True) if eval_type == "binary": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label), "ks_2samp": classification_metric.KSTest.compute(nn_0_score, nn_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute(nn_0_score, nn_1_score, nn_0_label, nn_1_label)} metric_summary["distribution_metrics"] = {"homo_nn": metric_nn} elif eval_type == "multi": metric_nn = { "score_diversity_ratio": classification_metric.Distribution.compute(nn_0_score_label, nn_1_score_label)} metric_summary["distribution_metrics"] = {"homo_nn": metric_nn} data_summary = dict( train={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}}, test={"guest": guest_train_data["name"], **{f"host_{i}": host_train_data[i]["name"] for i in range(num_host)}} ) return data_summary, metric_summary
def main(config="../../config.yaml", param='./xgb_config_binary.yaml', namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) if isinstance(param, str): param = JobConfig.load_from_file(param) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": param['data_guest_train'], "namespace": f"experiment{namespace}" } guest_validate_data = { "name": param['data_guest_val'], "namespace": f"experiment{namespace}" } host_train_data = { "name": param['data_host_train'], "namespace": f"experiment{namespace}" } host_validate_data = { "name": param['data_host_val'], "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1') reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1') reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_validate_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_validate_data) dataio_1.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_1.get_party_instance(role='host', party_id=host).component_param( with_label=True, output_format="dense") homo_secureboost_0 = HomoSecureBoost( name="homo_secureboost_0", num_trees=param['tree_num'], task_type=param['task_type'], objective_param={"objective": param['loss_func']}, tree_param={"max_depth": param['tree_depth']}, validation_freqs=1, subsample_feature_rate=1, learning_rate=param['learning_rate'], bin_num=50) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1") evaluation_0 = Evaluation(name='evaluation_0', eval_type=param['eval_type']) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(reader_1) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data, validate_data=dataio_1.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(test_data=dataio_1.output.data), model=Model(homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data)) pipeline.compile() pipeline.fit() sbt_0_data = pipeline.get_component( "homo_secureboost_0").get_output_data().get("data") sbt_1_data = pipeline.get_component( "homo_secureboost_1").get_output_data().get("data") sbt_0_score = extract_data(sbt_0_data, "predict_result") sbt_0_label = extract_data(sbt_0_data, "label") sbt_1_score = extract_data(sbt_1_data, "predict_result") sbt_1_label = extract_data(sbt_1_data, "label") sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True) sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True) metric_summary = parse_summary_result( pipeline.get_component("evaluation_0").get_summary()) if param['eval_type'] == "regression": desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score) desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score) metric_summary["script_metrics"] = { "sbt_train": desc_sbt_0, "sbt_validate": desc_sbt_1 } elif param['eval_type'] == "binary": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label), "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score), "mAP_D_value": classification_metric.AveragePrecisionScore().compute( sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} elif param['eval_type'] == "multi": metric_sbt = { "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label) } metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt} data_summary = { "train": { "guest": guest_train_data["name"], "host": host_train_data["name"] }, "test": { "guest": guest_validate_data["name"], "host": host_validate_data["name"] } } return data_summary, metric_summary