def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "tag_value_1000_140", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True) data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False, input_format="tag", tag_with_value=True) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(): # parties config guest = 9999 # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": "experiment"} tag_data = {"name": "tag_value_1", "namespace": "experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # csv file name from python path & file name pipeline_upload.add_upload_data( file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition, # data info id_delimiter=",") # id delimiter, needed for spark pipeline_upload.add_upload_data(file=os.path.join( DATA_BASE, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition, id_delimiter=",") # upload all data pipeline_upload.upload(drop=1) import json print(json.dumps(pipeline_upload._upload_conf(), indent=4))
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "ionosphere_scale_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "ionosphere_scale_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0") data_transform_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, label_name="LABEL", missing_fill=True, missing_fill_method="mean", outlier_replace=True) data_transform_0.get_party_instance(role='host', party_id=host).component_param( with_label=False, missing_fill=True, missing_fill_method="designated", default_value=0, outlier_replace=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def run_homo_nn_pipeline(config, namespace, data: dict, nn_component, num_host): if isinstance(config, str): config = load_job_config(config) guest_train_data = data["guest"] host_train_data = data["host"][:num_host] for d in [guest_train_data, *host_train_data]: d["namespace"] = f"{d['namespace']}{namespace}" hosts = config.parties.host[:num_host] pipeline = (PipeLine().set_initiator( role="guest", party_id=config.parties.guest[0]).set_roles( guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role="guest", party_id=config.parties.guest[0]).component_param( table=guest_train_data) for i in range(num_host): reader_0.get_party_instance( role="host", party_id=hosts[i]).component_param(table=host_train_data[i]) dataio_0 = DataIO(name="dataio_0", with_label=True) dataio_0.get_party_instance( role="guest", party_id=config.parties.guest[0]).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role="host", party_id=hosts).component_param(with_label=True) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(nn_component, data=Data(train_data=dataio_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=config.backend, work_mode=config.work_mode) pipeline.fit(job_parameters) print(pipeline.get_component("homo_nn_0").get_summary()) pipeline.deploy_component([dataio_0, nn_component]) # predict predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component( pipeline, data=Data( predict_input={pipeline.dataio_0.input.data: reader_0.output.data }), ) # run predict model predict_pipeline.predict(job_parameters)
def main(): # parties config guest = 9999 # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "tag_value_1", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # original csv file path pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition) # upload all data pipeline_upload.upload(drop=1)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] data_base = config.data_base_dir # partition for data storage partition = 4 # table name and namespace, used in FATE job configuration dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"} pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # path to csv file(s) to be uploaded pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition, # data info id_delimiter=",", extend_sid=True) pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition, id_delimiter=",", extend_sid=True) # upload both data pipeline_upload.upload(drop=1)
def main(data_base=DATA_BASE): # parties config guest = 9999 # partition for data storage partition = 4 # table name and namespace, used in FATE job configuration dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # path to csv file(s) to be uploaded, modify to upload designated data # This is an example for standalone version. For cluster version, you will need to upload your data # on each party respectively. pipeline_upload.add_upload_data( file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join( data_base, "examples/data/breast_hetero_host.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=1, partition=partition) # upload data pipeline_upload.upload(drop=1)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] backend = config.backend work_mode = config.work_mode data_base = config.data_base # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"} tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"} pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # csv file name from python path & file name pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition, # data info id_delimiter=",") # needed for spark backend pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition, id_delimiter=",") # upload all data pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
def test_upload(self): upload_pipeline = PipeLine() upload_pipeline.add_upload_data(file=self.file, table_name=self.table_name, namespace=self.job_id) upload_pipeline.upload() upload_count = session.get_data_table(self.table_name, self.job_id).count() return upload_count == self.data_count
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment_sid{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment_sid{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_match_id=True, with_label=True) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.compile() pipeline.fit()
def main(data_base=DATA_BASE): # parties config guest = 9999 # 0 for eggroll, 1 for spark backend = Backend.EGGROLL # 0 for standalone, 1 for cluster work_mode = WorkMode.STANDALONE # use the work mode below for cluster deployment # work_mode = WorkMode.CLUSTER # partition for data storage partition = 4 # table name and namespace, used in FATE job configuration dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # path to csv file(s) to be uploaded, modify to upload designated data pipeline_upload.add_upload_data( file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join( data_base, "examples/data/breast_hetero_host.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=1, partition=partition) # upload data pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
def main(): # parties config guest = 9999 # 0 for eggroll, 1 for spark backend = Backend.EGGROLL # 0 for standalone, 1 for cluster work_mode = WorkMode.STANDALONE # use the work mode below for cluster deployment # work_mode = WorkMode.CLUSTER # partition for data storage partition = 4 dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"} tag_data = {"name": "tag_value_1", "namespace": f"experiment"} pipeline_upload = PipeLine().set_initiator( role="guest", party_id=guest).set_roles(guest=guest) # add upload data info # original csv file path pipeline_upload.add_upload_data( file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"), table_name=dense_data["name"], # table name namespace=dense_data["namespace"], # namespace head=1, partition=partition) # data info pipeline_upload.add_upload_data(file=os.path.join( DATA_BASE, "examples/data/tag_value_1000_140.csv"), table_name=tag_data["name"], namespace=tag_data["namespace"], head=0, partition=partition) # upload all data pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
def main(): # parties config guest = 9999 host = 10000 arbiter = 10000 # specify input data name & namespace in database guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} guest_eval_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) # define DataTransform component data_transform_0 = DataTransform(name="data_transform_0") # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance( role="guest", party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param( with_label=True, output_format="dense") # get and configure DataTransform party instance of host data_transform_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") # define HeteroLR component hetero_lr_0 = HeteroLR(name="hetero_lr_0", early_stop="diff", learning_rate=0.15, optimizer="rmsprop", max_iter=10) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) # set train data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary import json print( json.dumps(pipeline.get_component("hetero_lr_0").get_summary(), indent=4)) # predict # deploy required components pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0]) # initiate predict pipeline predict_pipeline = PipeLine() # define new data reader reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role="guest", party_id=guest).component_param(table=guest_eval_data) reader_1.get_party_instance( role="host", party_id=host).component_param(table=host_eval_data) # define evaluation component evaluation_0 = Evaluation(name="evaluation_0") evaluation_0.get_party_instance( role="guest", party_id=guest).component_param(need_run=True, eval_type="binary") evaluation_0.get_party_instance( role="host", party_id=host).component_param(need_run=False) # add data reader onto predict pipeline predict_pipeline.add_component(reader_1) # add selected components from train pipeline onto predict pipeline # specify data source predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_1.output.data })) # add evaluation component to predict pipeline predict_pipeline.add_component( evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data)) # run predict model predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] guest_train_data = [{ "name": "tag_value_1", "namespace": f"experiment{namespace}" }, { "name": "tag_value_2", "namespace": f"experiment{namespace}" }, { "name": "tag_value_3", "namespace": f"experiment{namespace}" }] pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[0]) reader_1 = Reader(name="reader_1") reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[1]) reader_2 = Reader(name="reader_2") reader_2.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data[2]) union_0 = Union(name="union_0", allow_missing=False, keep_duplicate=True, need_run=True) data_transform_0 = DataTransform(name="data_transform_0", input_format="tag", with_label=False, tag_with_value=True, delimitor=",", output_format="dense") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(reader_2) pipeline.add_component( union_0, data=Data(data=[ reader_0.output.data, reader_1.output.data, reader_2.output.data ])) pipeline.add_component(data_transform_0, data=Data(data=union_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role="guest", party_id=guest).set_roles(guest=guest) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) # define ColumnExpand components column_expand_0 = ColumnExpand(name="column_expand_0") column_expand_0.get_party_instance( role="guest", party_id=guest).component_param( need_run=True, method="manual", append_header=["x_0", "x_1", "x_2", "x_3"], fill_value=[0, 0.2, 0.5, 1]) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest", party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(column_expand_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_0, data=Data(data=column_expand_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True) dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_nn_0 = HeteroNN(name="hetero_nn_0", epochs=100, interactive_layer_lr=0.15, batch_size=-1, early_stop="diff", selector_param={"method": "relative"}) guest_nn_0 = hetero_nn_0.get_party_instance(role='guest', party_id=guest) guest_nn_0.add_bottom_model( Dense(units=3, input_shape=(10, ), activation="relu", kernel_initializer=initializers.Constant(value=1))) guest_nn_0.set_interactve_layer( Dense(units=2, input_shape=(2, ), kernel_initializer=initializers.Constant(value=1))) guest_nn_0.add_top_model( Dense(units=1, input_shape=(2, ), activation="sigmoid", kernel_initializer=initializers.Constant(value=1))) host_nn_0 = hetero_nn_0.get_party_instance(role='host', party_id=host) host_nn_0.add_bottom_model( Dense(units=3, input_shape=(20, ), activation="relu", kernel_initializer=initializers.Constant(value=1))) host_nn_0.set_interactve_layer( Dense(units=2, input_shape=(2, ), kernel_initializer=initializers.Constant(value=1))) hetero_nn_0.compile(optimizer=optimizers.SGD(lr=0.15), metrics=["AUC"], loss="binary_crossentropy") hetero_nn_1 = HeteroNN(name="hetero_nn_1") evaluation_0 = Evaluation(name="evaluation_0") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_nn_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(hetero_nn_1, data=Data(test_data=intersection_0.output.data), model=Model(model=hetero_nn_0.output.model)) pipeline.add_component(evaluation_0, data=Data(data=hetero_nn_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print(hetero_nn_0.get_config(roles={"guest": [guest], "host": [host]})) print(pipeline.get_component("hetero_nn_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_label=True, output_format="dense", label_type="int", label_name="y") homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", optimizer="sgd", tol=0.0001, alpha=0.01, max_iter=30, batch_size=-1, early_stop="weight_diff", learning_rate=0.15, init_param={"init_method": "zeros"}) local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={ "penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "saga", "max_iter": 2 }) local_baseline_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary", pos_label=1) evaluation_0.get_party_instance( role='guest', party_id=guest).component_param(need_run=True) evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=data_transform_0.output.data)) pipeline.add_component(local_baseline_0, data=Data(train_data=data_transform_0.output.data)) pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data])) pipeline.compile() pipeline.fit() # predict pipeline.deploy_component([data_transform_0, homo_lr_0, local_baseline_0]) predict_pipeline = PipeLine() predict_pipeline.add_component(reader_0) predict_pipeline.add_component( pipeline, data=Data(predict_input={ pipeline.data_transform_0.input.data: reader_0.output.data })) predict_pipeline.add_component( evaluation_0, data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data])) predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance( role='guest', party_id=guest).component_param(table=guest_test_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) reader_1.get_party_instance( role='host', party_id=host).component_param(table=host_test_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 param = { "with_label": True, "label_name": "y", "label_type": "int", "output_format": "dense", "missing_fill": True, "missing_fill_method": "mean", "outlier_replace": False, "outlier_replace_method": "designated", "outlier_replace_value": 0.66, "outlier_impute": "-9999" } # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(**param) # get and configure DataIO party instance of host dataio_1.get_party_instance(role='guest', party_id=guest).component_param(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } dataio_0.get_party_instance(role='host', party_id=host).component_param(**param) dataio_1.get_party_instance(role='host', party_id=host).component_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0", intersect_method="raw") intersection_1 = Intersection(name="intersection_1", intersect_method="raw") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv", "init_bucket_method": "quantile" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "unique_value", "iv_filter", "statistic_filter"], "manually_param": { "filter_out_indexes": [1, 2], "filter_out_names": ["x2", "x3"] }, "unique_param": { "eps": 1e-6 }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.1] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, False], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') param = { "task_type": "classification", "learning_rate": 0.1, "num_trees": 10, "subsample_feature_rate": 0.5, "n_iter_no_change": False, "tol": 0.0002, "bin_num": 50, "objective_param": { "objective": "cross_entropy" }, "encrypt_param": { "method": "paillier" }, "predict_param": { "threshold": 0.5 }, "tree_param": { "max_depth": 2 }, "cv_param": { "n_splits": 5, "shuffle": False, "random_seed": 103, "need_cv": False }, "validation_freqs": 2, "early_stopping_rounds": 5, "metrics": ["auc", "ks"] } hetero_secureboost_0 = HeteroSecureBoost(name='hetero_secureboost_0', **param) evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) # set train & validate data of hetero_secureboost_0 component pipeline.add_component( hetero_secureboost_0, data=Data(train_data=hetero_feature_selection_0.output.data, validate_data=hetero_feature_selection_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_secureboost_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit() # query component summary print(pipeline.get_component("hetero_secureboost_0").get_summary())
from pipeline.backend.config import Backend, WorkMode from pipeline.backend.pipeline import PipeLine from pipeline.component import Reader, DataTransform, Intersection, HeteroSecureBoost, Evaluation from pipeline.interface import Data from pipeline.runtime.entity import JobParameters # table name & namespace in data storage # data should be uploaded before running modeling task guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"} host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline # Party ids are indicators of parties involved in federated learning. For standalone mode, # arbitrary integers can be used as party id. pipeline = PipeLine().set_initiator(role="guest", party_id=9999).set_roles(guest=9999, host=10000) # define components # Reader is a component to obtain the uploaded data. This component are very likely to be needed. reader_0 = Reader(name="reader_0") # By the following way, you can set different parameters for different party. reader_0.get_party_instance( role="guest", party_id=9999).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=10000).component_param(table=host_train_data) # Data transform provided some preprocessing to the raw data, including extract label, convert data format, # filling missing value and so on. You may refer to the algorithm list doc for more details. data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_hetero_guest", "namespace": "experiment" } guest_test_data = { "name": "breast_hetero_guest", "namespace": "experiment" } host_train_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } host_test_data = { "name": "breast_hetero_host_tag_value", "namespace": "experiment" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") reader_1 = Reader(name="reader_1") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) reader_1.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_test_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) reader_1.get_party_instance( role='host', party_id=host).algorithm_param(table=host_test_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 dataio_1 = DataIO(name="dataio_1") # start component numbering at 1 param = { "with_label": True, "label_name": "y", "label_type": "int", "output_format": "dense", "missing_fill": True, "missing_fill_method": "mean", "outlier_replace": False, "outlier_replace_method": "designated", "outlier_replace_value": 0.66, "outlier_impute": "-9999" } # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(**param) # get and configure DataIO party instance of host dataio_1.get_party_instance(role='guest', party_id=guest).algorithm_param(**param) param = { "input_format": "tag", "with_label": False, "tag_with_value": True, "delimitor": ";", "output_format": "dense" } dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(**param) dataio_1.get_party_instance(role='host', party_id=host).algorithm_param(**param) # define Intersection components intersection_0 = Intersection(name="intersection_0", intersect_method="raw") intersection_1 = Intersection(name="intersection_1", intersect_method="raw") param = { "name": 'hetero_feature_binning_0', "method": 'optimal', "optimal_binning_param": { "metric_method": "iv", "init_bucket_method": "quantile" }, "bin_indexes": -1 } hetero_feature_binning_0 = HeteroFeatureBinning(**param) statistic_0 = DataStatistics(name='statistic_0') param = { "name": 'hetero_feature_selection_0', "filter_methods": ["manually", "unique_value", "iv_filter", "statistic_filter"], "manually_param": { "filter_out_indexes": [1, 2], "filter_out_names": ["x3", "x4"] }, "unique_param": { "eps": 1e-6 }, "iv_param": { "metrics": ["iv", "iv"], "filter_type": ["top_k", "threshold"], "take_high": [True, True], "threshold": [10, 0.1] }, "statistic_param": { "metrics": ["coefficient_of_variance", "skewness"], "filter_type": ["threshold", "threshold"], "take_high": [True, False], "threshold": [0.001, -0.01] }, "select_col_indexes": -1 } hetero_feature_selection_0 = HeteroFeatureSelection(**param) hetero_feature_selection_1 = HeteroFeatureSelection( name='hetero_feature_selection_1') param = {"name": "hetero_scale_0", "method": "standard_scale"} hetero_scale_0 = FeatureScale(**param) hetero_scale_1 = FeatureScale(name='hetero_scale_1') param = { "penalty": "L2", "optimizer": "nesterov_momentum_sgd", "tol": 1e-4, "alpha": 0.01, "max_iter": 5, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "init_param": { "init_method": "zeros" }, "validation_freqs": None, "early_stopping_rounds": None } hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param) evaluation_0 = Evaluation(name='evaluation_0') # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data)) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.add_component( hetero_feature_selection_0, data=Data(data=intersection_0.output.data), model=Model(isometric_model=[ hetero_feature_binning_0.output.model, statistic_0.output.model ])) pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data), model=Model( hetero_feature_selection_0.output.model)) pipeline.add_component( hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data)) pipeline.add_component( hetero_scale_1, data=Data(data=hetero_feature_selection_1.output.data), model=Model(hetero_scale_0.output.model)) # set train & validate data of hetero_lr_0 component pipeline.add_component(hetero_lr_0, data=Data(train_data=hetero_scale_0.output.data, validate_data=hetero_scale_1.output.data)) pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data])) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataTransform components data_transform_0 = DataTransform( name="data_transform_0", with_label=True, output_format="dense") # start component numbering at 0 homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=1000, method="recursive_query") homo_binning_1 = HomoFeatureBinning(name='homo_binning_1', sample_bins=1000) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(homo_binning_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_binning_1, data=Data(data=data_transform_0.output.data), model=Model(model=homo_binning_0.output.model)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit()
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host arbiter = parties.arbiter[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "breast_homo_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_homo_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense") # start component numbering at 0 scale_0 = FeatureScale(name='scale_0') param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data)) evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary") evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) # query component summary print( json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False)) print( json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
import json from pipeline.backend.pipeline import PipeLine from pipeline.component import Reader, DataTransform, Intersection, HeteroSecureBoost, Evaluation from pipeline.interface import Data # table name & namespace in data storage # data should be uploaded before running modeling task guest_train_data = {"name": "breast_hetero_guest", "namespace": "experiment"} host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"} # initialize pipeline # Party ids are indicators of parties involved in federated learning. For standalone mode, # arbitrary integers can be used as party id. pipeline = PipeLine().set_initiator(role="guest", party_id=9999).set_roles(guest=9999, host=10000) # define components # Reader is a component to obtain the uploaded data. This component are very likely to be needed. reader_0 = Reader(name="reader_0") # By the following way, you can set different parameters for different party. reader_0.get_party_instance( role="guest", party_id=9999).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=10000).component_param(table=host_train_data) # Data transform provided some preprocessing to the raw data, including extract label, convert data format, # filling missing value and so on. You may refer to the algorithm list doc for more details. data_transform_0 = DataTransform(name="data_transform_0", with_label=True)
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode guest_train_data = { "name": "nus_wide_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "nus_wide_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role='guest', party_id=guest).component_param( with_label=True, output_format="dense") dataio_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0', epochs=10, alpha=1, batch_size=-1, mode='plain') hetero_ftl_0.add_nn_layer( Dense(units=32, activation='sigmoid', kernel_initializer=initializers.RandomNormal(stddev=1.0, dtype="float32"), bias_initializer=initializers.Zeros())) hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01)) evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary") pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(hetero_ftl_0, data=Data(train_data=dataio_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) """
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = { "name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "dvisits_hetero_host", "namespace": f"experiment{namespace}" } pipeline = PipeLine().set_initiator( role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format="sparse") data_transform_0.get_party_instance( role='guest', party_id=guest).component_param(with_label=True, label_name="doctorco", label_type="float") data_transform_0.get_party_instance( role='host', party_id=host).component_param(with_label=False) intersection_0 = Intersection(name="intersection_0") hetero_poisson_0 = HeteroPoisson( name="hetero_poisson_0", early_stop="weight_diff", max_iter=2, alpha=100.0, batch_size=-1, learning_rate=0.01, exposure_colname="exposure", optimizer="rmsprop", penalty="L2", decay_sqrt=False, tol=0.001, init_param={"init_method": "zeros"}, ) evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1) evaluation_0.get_party_instance( role='host', party_id=host).component_param(need_run=False) pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data)) pipeline.add_component(evaluation_0, data=Data(data=hetero_poisson_0.output.data)) pipeline.compile() pipeline.fit()
def main(config="../../config.yaml", namespace=""): if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] hosts = parties.host[0] guest_train_data = {"name": "ionosphere_scale_hetero_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "ionosphere_scale_hetero_host", "namespace": f"experiment{namespace}"} # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"} # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", output_format='dense', missing_fill=False) # get DataTransform party instance of guest data_transform_0_guest_party_instance = data_transform_0.get_party_instance(role='guest', party_id=guest) # configure DataTransform for guest data_transform_0_guest_party_instance.component_param(with_label=True, label_name="label") # get and configure DataTransform party instance of host data_transform_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") pipeline.add_component(reader_0) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data)) statistic_param = { "name": "statistic_0", "statistics": ["95%", "coefficient_of_variance", "stddev"], "column_indexes": [1, 2], "column_names": ["x3"] } statistic_0 = DataStatistics(**statistic_param) pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data)) pipeline.compile() # fit model pipeline.fit() # query component summary prettify(pipeline.get_component("statistic_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # data sets guest_train_data = { "name": "student_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "student_hetero_host", "namespace": f"experiment{namespace}" } # init pipeline pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles( guest=guest, host=host, ) # set data reader and data-io reader_0 = Reader(name="reader_0") reader_0.get_party_instance( role="guest", party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance( role="host", party_id=host).component_param(table=host_train_data) dataio_0 = DataIO(name="dataio_0") dataio_0.get_party_instance(role="guest", party_id=guest).component_param( with_label=True, output_format="dense", label_type="float") dataio_0.get_party_instance( role="host", party_id=host).component_param(with_label=False) # data intersect component intersect_0 = Intersection(name="intersection_0") # secure boost component hetero_secure_boost_0 = HeteroSecureBoost( name="hetero_secure_boost_0", num_trees=3, task_type="regression", objective_param={"objective": "lse"}, encrypt_param={"method": "iterativeAffine"}, tree_param={"max_depth": 3}, validation_freqs=1, cv_param={ "need_cv": True, "n_splits": 5, "shuffle": False, "random_seed": 103 }) pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data)) pipeline.compile() job_parameters = JobParameters(backend=backend, work_mode=work_mode) pipeline.fit(job_parameters) print("fitting hetero secureboost done, result:") print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] backend = config.backend work_mode = config.work_mode # specify input data name & namespace in database guest_train_data = { "name": "breast_hetero_guest", "namespace": f"experiment{namespace}" } host_train_data = { "name": "breast_hetero_host", "namespace": f"experiment{namespace}" } # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=host) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance( role='guest', party_id=guest).algorithm_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance( role='host', party_id=host).algorithm_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.algorithm_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance( role='host', party_id=host).algorithm_param(with_label=False) # define Intersection components intersection_0 = Intersection(name="intersection_0") intersection_0.get_party_instance( role="guest", party_id=guest).algorithm_param(intersect_method="rsa", sync_intersect_ids=True, only_output_key=True) # add components to pipeline, in order of task execution pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) # set data input sources of intersection components pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() # fit model pipeline.fit(backend=backend, work_mode=work_mode) # query component summary print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""): # obtain config if isinstance(config, str): config = load_job_config(config) parties = config.parties guest = parties.guest[0] host = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter) reader_0 = Reader(name="reader_0") reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) reader_1 = Reader(name="reader_1") reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data) data_transform_0 = DataTransform(name="data_transform_0", with_label=True) data_transform_1 = DataTransform(name="data_transform_1") federated_sample_0 = FederatedSample(name="federated_sample_0", mode="stratified", method="downsample", fractions=[[0, 1.0], [1, 1.0]], task_type="h**o") homo_binning_0 = HomoFeatureBinning(name='homo_binning_0', sample_bins=10, method="recursive_query") homo_binning_1 = HomoFeatureBinning(name='homo_binning_1') homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0', need_alignment=True) homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1') homo_lr_0 = HomoLR(name="homo_lr_0", penalty="L2", tol=0.0001, alpha=1.0, optimizer="rmsprop", max_iter=5) homo_lr_1 = HomoLR(name="homo_lr_1") local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression", model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True, "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr"}) local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True) local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=True) local_baseline_1 = LocalBaseline(name="local_baseline_1") homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3) homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1", num_trees=3) evaluation_0 = Evaluation(name="evaluation_0") evaluation_1 = Evaluation(name="evaluation_1") pipeline.add_component(reader_0) pipeline.add_component(reader_1) pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data)) pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(model=data_transform_0.output.model)) pipeline.add_component(federated_sample_0, data=Data(data=data_transform_0.output.data)) pipeline.add_component(homo_binning_0, data=Data(data=federated_sample_0.output.data)) pipeline.add_component(homo_binning_1, data=Data(data=data_transform_1.output.data), model=Model(model=homo_binning_0.output.model)) pipeline.add_component(homo_onehot_0, data=Data(data=homo_binning_0.output.data)) pipeline.add_component(homo_onehot_1, data=Data(data=homo_binning_1.output.data), model=Model(model=homo_onehot_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(homo_lr_1, data=Data(data=homo_onehot_1.output.data), model=Model(model=homo_lr_0.output.model)) pipeline.add_component(local_baseline_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(local_baseline_1, data=Data(data=homo_onehot_1.output.data), model=Model(model=local_baseline_0.output.model)) pipeline.add_component(homo_secureboost_0, data=Data(data=homo_onehot_0.output.data)) pipeline.add_component(homo_secureboost_1, data=Data(data=homo_onehot_1.output.data), model=Model(model=homo_secureboost_0.output.model)) pipeline.add_component(evaluation_0, data=Data( data=[homo_lr_0.output.data, homo_lr_1.output.data, local_baseline_0.output.data, local_baseline_1.output.data])) pipeline.add_component(evaluation_1, data=Data( data=[homo_secureboost_0.output.data, homo_secureboost_1.output.data])) pipeline.compile() pipeline.fit() print(pipeline.get_component("evaluation_0").get_summary()) print(pipeline.get_component("evaluation_1").get_summary())
def make_normal_dsl(config, namespace): parties = config.parties guest = parties.guest[0] hosts = parties.host[0] arbiter = parties.arbiter[0] guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"} host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"} # initialize pipeline pipeline = PipeLine() # set job initiator pipeline.set_initiator(role='guest', party_id=guest) # set participants information pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter) # define Reader components to read in data reader_0 = Reader(name="reader_0") # configure Reader for guest reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data) # configure Reader for host reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data) # define DataIO components dataio_0 = DataIO(name="dataio_0") # start component numbering at 0 # get DataIO party instance of guest dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest) # configure DataIO for guest dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense") # get and configure DataIO party instance of host dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True) scale_0 = FeatureScale(name='scale_0') homo_sbt_0 = HomoSecureBoost(name="homo_secureboost_0", num_trees=3, task_type='classification', objective_param={"objective": "cross_entropy"}, tree_param={ "max_depth": 3 }, validation_freqs=1 ) # define Intersection components pipeline.add_component(reader_0) pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data)) pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data)) pipeline.add_component(homo_sbt_0, data=Data(train_data=scale_0.output.data)) selection_param = { "name": "hetero_feature_selection_0", "select_col_indexes": -1, "select_names": [], "filter_methods": [ "homo_sbt_filter" ], "sbt_param": { "metrics": "feature_importance", "filter_type": "threshold", "take_high": True, "threshold": 0.03 }} feature_selection_0 = HeteroFeatureSelection(**selection_param) param = { "penalty": "L2", "optimizer": "sgd", "tol": 1e-05, "alpha": 0.01, "max_iter": 30, "early_stop": "diff", "batch_size": -1, "learning_rate": 0.15, "decay": 1, "decay_sqrt": True, "init_param": { "init_method": "zeros" }, "encrypt_param": { "method": None }, "cv_param": { "n_splits": 4, "shuffle": True, "random_seed": 33, "need_cv": False } } homo_lr_0 = HomoLR(name='homo_lr_0', **param) pipeline.add_component(feature_selection_0, data=Data(data=scale_0.output.data), model=Model(isometric_model=homo_sbt_0.output.model)) pipeline.add_component(homo_lr_0, data=Data(train_data=feature_selection_0.output.data)) evaluation_0 = Evaluation(name='evaluation_0') pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data)) # compile pipeline once finished adding modules, this step will form conf and dsl files for running job pipeline.compile() return pipeline