Exemplo n.º 1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")

    data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                              label_type="int", label_name="y")
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0", intersect_method="rsa", sync_intersect_ids=True,
                                  only_output_key=False)
    hetero_lr_0 = HeteroLR(name="hetero_lr_0", penalty="L2", optimizer="nesterov_momentum_sgd",
                           tol=0.0001, alpha=0.0001, max_iter=30, batch_size=-1,
                           early_stop="diff", learning_rate=0.15, init_param={"init_method": "zeros"})

    local_baseline_0 = LocalBaseline(name="local_baseline_0", model_name="LogisticRegression",
                                     model_opts={"penalty": "l2", "tol": 0.0001, "C": 1.0, "fit_intercept": True,
                                                 "solver": "lbfgs", "max_iter": 5, "multi_class": "ovr"})
    local_baseline_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi", pos_label=1)
    evaluation_0.get_party_instance(role='guest', party_id=guest).component_param(need_run=True)
    evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_lr_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(local_baseline_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data, local_baseline_0.output.data]))

    pipeline.compile()

    pipeline.fit()

    # predict
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0, local_baseline_0])

    predict_pipeline = PipeLine()
    predict_pipeline.add_component(reader_0)
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.data_transform_0.input.data: reader_0.output.data}))
    predict_pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data, local_baseline_0.output.data]))
    predict_pipeline.predict()
Exemplo n.º 2
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host
    arbiter = parties.arbiter[0]

    guest_train_data = {"name": "motor_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = [{"name": "motor_hetero_host", "namespace": f"experiment{namespace}"},
                       {"name": "motor_hetero_host", "namespace": f"experiment{namespace}"}]


    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=hosts, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=hosts[0]).component_param(table=host_train_data[0])
    reader_0.get_party_instance(role='host', party_id=hosts[1]).component_param(table=host_train_data[1])

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, label_name="motor_speed",
                                                                             label_type="float", output_format="dense")
    data_transform_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    hetero_linr_0 = HeteroLinR(name="hetero_linr_0", penalty="L2", optimizer="sgd", tol=0.001,
                               alpha=0.01, max_iter=20, early_stop="weight_diff", batch_size=-1,
                               learning_rate=0.15, decay=0.0, decay_sqrt=False,
                               init_param={"init_method": "zeros"},
                               encrypted_mode_calculator_param={"mode": "fast"})

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=hosts[0]).component_param(need_run=False)
    # evaluation_0.get_party_instance(role='host', party_id=hosts[1]).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_linr_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_linr_0.output.data))

    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_linr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.data_transform_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"}


    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, label_name="doctorco",
                                                                             label_type="float", output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    hetero_poisson_0 = HeteroPoisson(name="hetero_poisson_0", early_stop="weight_diff", max_iter=10,
                                     alpha=100.0, batch_size=-1, learning_rate=0.01, optimizer="rmsprop",
                                     exposure_colname="exposure", decay_sqrt=False, tol=0.001,
                                     init_param={"init_method": "zeros"}, penalty="L2",
                                     encrypted_mode_calculator_param={"mode": "fast"})

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression", pos_label=1)
    evaluation_0.get_party_instance(role='host', party_id=host).algorithm_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_poisson_0, data=Data(train_data=intersection_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_poisson_0.output.data))

    pipeline.compile()

    pipeline.fit(backend=backend, work_mode=work_mode)

    # predict
    # deploy required components
    pipeline.deploy_component([dataio_0, intersection_0, hetero_poisson_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "nus_wide_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "nus_wide_host", "namespace": f"experiment{namespace}"}
    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False)

    hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0',
                             epochs=10, alpha=1, batch_size=-1, mode='plain')

    hetero_ftl_0.add_nn_layer(Dense(units=32, activation='sigmoid',
                                    kernel_initializer=initializers.RandomNormal(stddev=1.0,
                                                                                 dtype="float32"),
                                    bias_initializer=initializers.Zeros()))

    hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01))
    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(hetero_ftl_0, data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data))

    pipeline.compile()

    pipeline.fit(backend=backend, work_mode=work_mode)

    # predict
    # deploy required components
    pipeline.deploy_component([dataio_0, hetero_ftl_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict(backend=backend, work_mode=work_mode)
Exemplo n.º 5
0
def run_homo_nn_pipeline(config, namespace, data: dict, nn_component,
                         num_host):
    if isinstance(config, str):
        config = load_job_config(config)

    guest_train_data = data["guest"]
    host_train_data = data["host"][:num_host]
    for d in [guest_train_data, *host_train_data]:
        d["namespace"] = f"{d['namespace']}{namespace}"

    hosts = config.parties.host[:num_host]
    pipeline = PipeLine() \
        .set_initiator(role='guest', party_id=config.parties.guest[0]) \
        .set_roles(guest=config.parties.guest[0], host=hosts, arbiter=config.parties.arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=config.parties.guest[0]).component_param(
            table=guest_train_data)
    for i in range(num_host):
        reader_0.get_party_instance(role='host', party_id=hosts[i]) \
            .component_param(table=host_train_data[i])

    dataio_0 = DataIO(name="dataio_0", with_label=True)
    dataio_0.get_party_instance(role='guest', party_id=config.parties.guest[0]) \
        .component_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=True)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(nn_component,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.compile()
    job_parameters = JobParameters(backend=config.backend,
                                   work_mode=config.work_mode)
    pipeline.fit(job_parameters)
    print(pipeline.get_component("homo_nn_0").get_summary())
    pipeline.deploy_component([dataio_0, nn_component])

    # predict
    predict_pipeline = PipeLine()
    predict_pipeline.add_component(reader_0)
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }))
    # run predict model
    predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {"name": "dvisits_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "dvisits_hetero_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0", with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    feature_imputation_0 = FeatureImputation(name="feature_imputation_0",
                                             default_value=42,
                                             missing_impute=[0])
    feature_imputation_0.get_party_instance(role='guest', party_id=guest).component_param(
                                             col_missing_fill_method={"doctorco": "min",
                                                                      "hscore": "designated"})

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(feature_imputation_0, data=Data(data=intersection_0.output.data))
    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, intersection_0,
                               feature_imputation_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.data_transform_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 7
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0", with_label=True, output_format="dense")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_lr_0, data=Data(train_data=scale_0.output.data))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(role='host', party_id=host).algorithm_param(need_run=False)
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)

    deploy_components = [dataio_0, scale_0, homo_lr_0]
    pipeline.deploy_component(components=deploy_components)
    #
    predict_pipeline = PipeLine()
    # # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # # add selected components from train pipeline onto predict pipeline
    # # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data}))
    predict_pipeline.compile()
    predict_pipeline.predict(backend=backend, work_mode=work_mode)

    dsl_json = predict_pipeline.get_predict_dsl()
    conf_json = predict_pipeline.get_predict_conf()
    # import json
    json.dump(dsl_json, open('./h**o-lr-normal-predict-dsl.json', 'w'), indent=4)
    json.dump(conf_json, open('./h**o-lr-normal-predict-conf.json', 'w'), indent=4)


    # query component summary
    print(json.dumps(pipeline.get_component("homo_lr_0").get_summary(), indent=4, ensure_ascii=False))
    print(json.dumps(pipeline.get_component("evaluation_0").get_summary(), indent=4, ensure_ascii=False))
Exemplo n.º 8
0
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host

    guest_train_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": "vehicle_scale_hetero_guest", "namespace": f"experiment{namespace}"}

    host_train_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "vehicle_scale_hetero_host", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(
        with_label=False, output_format="dense")

    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data)
    data_transform_1.get_party_instance(
        role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    data_transform_1.get_party_instance(
        role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    param = {
        "method": "quantile",
        "optimal_binning_param": {
            "metric_method": "gini",
            "min_bin_pct": 0.05,
            "max_bin_pct": 0.8,
            "init_bucket_method": "quantile",
            "init_bin_nums": 100,
            "mixture": True
        },
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": [0, 1, 2],
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": -1,
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }

    hetero_feature_binning_0 = HeteroFeatureBinning(name="hetero_feature_binning_0", **param)
    hetero_feature_binning_1 = HeteroFeatureBinning(name='hetero_feature_binning_1')

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(
        data_transform_1, data=Data(
            data=reader_1.output.data), model=Model(
            data_transform_0.output.model))
    pipeline.add_component(intersection_0, data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersection_1, data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_binning_1, data=Data(data=intersection_1.output.data),
                           model=Model(hetero_feature_binning_0.output.model))

    pipeline.compile()
    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_feature_binning_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline, data=Data(
            predict_input={
                pipeline.data_transform_0.input.data: reader_1.output.data}))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 9
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_train_data)

    # define ColumnExpand components
    column_expand_0 = ColumnExpand(name="column_expand_0")
    column_expand_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(
            need_run=True,
            method="manual",
            append_header=["x_0", "x_1", "x_2", "x_3"],
            fill_value=[0, 0.2, 0.5, 1])
    column_expand_0.get_party_instance(
        role="host", party_id=host).algorithm_param(need_run=False)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest",
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="rsa",
                                  sync_intersect_ids=True,
                                  only_output_key=False)

    param = {
        "penalty": "L2",
        "optimizer": "nesterov_momentum_sgd",
        "tol": 0.0001,
        "alpha": 0.01,
        "max_iter": 20,
        "early_stop": "weight_diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "random_uniform"
        },
        "sqn_param": {
            "update_interval_L": 3,
            "memory_M": 5,
            "sample_size": 5000,
            "random_seed": None
        }
    }

    hetero_lr_0 = HeteroLR(name="hetero_lr_0", **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(column_expand_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_0,
                           data=Data(data=column_expand_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("hetero_lr_0").get_summary())

    # predict
    # deploy required components
    pipeline.deploy_component(
        [column_expand_0, dataio_0, intersection_0, hetero_lr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.column_expand_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_secure_boost_0 = HeteroFastSecureBoost(
        name="hetero_fast_secure_boost_0",
        num_trees=4,
        tree_num_per_party=1,
        task_type='classification',
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "iterativeAffine"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        work_mode='mix')

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))

    pipeline.add_component(
        evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    print("fitting hetero fast secureboost done, result:")
    print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary())

    # predict
    # deploy required components
    pipeline.deploy_component(
        [dataio_0, intersect_0, hetero_fast_secure_boost_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }))
    # run predict model
    predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "Paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        EINI_inference=True,
        EINI_random_mask=True)

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_secure_boost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_secure_boost_0").get_summary())

    print('start to predict')

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersect_0, hetero_secure_boost_0, evaluation_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))

    # run predict model
    predict_pipeline.predict()
    predict_result = predict_pipeline.get_component(
        "hetero_secure_boost_0").get_output_data()
    print("Showing 10 data of predict result")
    for ret in predict_result["data"][:10]:
        print(ret)
Exemplo n.º 12
0
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }
    # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
    # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense')

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))

    lr_param = {
        "name": "hetero_sshe_lr_0",
        "penalty": "L2",
        "tol": 0.0001,
        "alpha": 10,
        "max_iter": 30,
        "early_stop": "weight_diff",
        "batch_size": -1,
        "learning_rate": 0.3,
        "decay": 0.5,
        "init_param": {
            "init_method": "const",
            "init_const": 200,
            "fit_intercept": False
        },
        "encrypt_param": {
            "key_length": 1024
        }
    }

    hetero_sshe_lr_0 = HeteroSSHELR(**lr_param)
    pipeline.add_component(hetero_sshe_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_sshe_lr_0.output.data))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary())
    prettify(pipeline.get_component("evaluation_0").get_summary())

    pipeline.deploy_component(
        [data_transform_0, intersection_0, hetero_sshe_lr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()

    return pipeline
Exemplo n.º 13
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)
    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0",
                      with_label=True,
                      output_format="dense",
                      label_type="int",
                      label_name="y")

    homo_lr_0 = HomoLR(name="homo_lr_0",
                       penalty="L2",
                       optimizer="sgd",
                       tol=0.0001,
                       alpha=0.01,
                       max_iter=30,
                       batch_size=-1,
                       early_stop="weight_diff",
                       learning_rate=0.15,
                       init_param={"init_method": "zeros"})

    local_baseline_0 = LocalBaseline(name="local_baseline_0",
                                     model_name="LogisticRegression",
                                     model_opts={
                                         "penalty": "l2",
                                         "tol": 0.0001,
                                         "C": 1.0,
                                         "fit_intercept": True,
                                         "solver": "saga",
                                         "max_iter": 2
                                     })
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    evaluation_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(local_baseline_0,
                           data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data]))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    # predict
    pipeline.deploy_component([dataio_0, homo_lr_0, local_baseline_0])

    predict_pipeline = PipeLine()
    predict_pipeline.add_component(reader_0)
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }))
    predict_pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, local_baseline_0.output.data]))
    predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": "breast_homo_test",
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_homo_test",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name='data_transform_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=True,
                                                    output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    data_transform_1.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role='host', party_id=host).component_param(with_label=True,
                                                    output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=3,
        task_type='classification',
        objective_param={"objective": "cross_entropy"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    evaluation_0 = Evaluation(name='evaluation_0', eval_type='binary')

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(
                               train_data=data_transform_0.output.data,
                               validate_data=data_transform_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, homo_secureboost_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_1.output.data
        }))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 15
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "motor_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "motor_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")

    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="motor_speed",
                                                      label_type="float",
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    hetero_linr_0 = HeteroSSHELinR(name="hetero_linr_0",
                                   penalty="None",
                                   optimizer="sgd",
                                   tol=0.001,
                                   alpha=0.01,
                                   max_iter=20,
                                   early_stop="weight_diff",
                                   batch_size=-1,
                                   learning_rate=0.15,
                                   decay=0.0,
                                   decay_sqrt=False,
                                   init_param={"init_method": "zeros"},
                                   cv_param={
                                       "n_splits": 5,
                                       "shuffle": False,
                                       "random_seed": 42,
                                       "need_cv": False
                                   },
                                   reveal_strategy="encrypted_reveal_in_host",
                                   reveal_every_iter=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_linr_0,
                           data=Data(train_data=intersection_0.output.data))

    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersection_0, hetero_linr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    hetero_pearson_0 = HeteroPearson(name='hetero_pearson_0',
                                     column_indexes=-1)
    hetero_binning_0 = HeteroFeatureBinning(name='hetero_binning_0')
    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": ["vif_filter", "correlation_filter"],
        "vif_param": {
            "threshold": 5
        },
        "correlation_param": {
            "threshold": 0.5,
            "select_federated": False
        }
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_binning_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_pearson_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=[
            hetero_pearson_0.output.model, hetero_binning_0.output.model
        ]))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # predict
    # deploy required components
    pipeline.deploy_component([
        dataio_0, intersection_0, hetero_binning_0, hetero_feature_selection_0
    ])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }))
    # run predict model
    predict_pipeline.predict(job_parameters)
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "ionosphere_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "ionosphere_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     label_name="label")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    data_transform_1 = DataTransform(name="data_transform_1",
                                     output_format="sparse",
                                     label_name="label")
    data_transform_1.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    data_transform_1.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    param = {
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": [0, 1, 2],
            "transform_names": None,
            "transform_type": "bin_num"
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0", **param)
    hetero_feature_binning_0.get_party_instance(
        role="host", party_id=host).component_param(
            transform_param={"transform_type": None})

    hetero_feature_binning_1 = HeteroFeatureBinning(
        name="hetero_feature_binning_1", **param)
    hetero_feature_binning_0.get_party_instance(
        role="host", party_id=host).component_param(
            transform_param={"transform_type": None})

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_feature_binning_1,
                           data=Data(data=intersection_1.output.data))

    pipeline.compile()

    pipeline.fit()

    pipeline.deploy_component(
        [data_transform_0, intersection_0, hetero_feature_binning_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 18
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      label_name="y",
                                                      label_type="int",
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    sample_weight_0 = SampleWeight(name="sample_weight_0")
    sample_weight_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True,
                                                      class_weight="balanced")
    sample_weight_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    sample_weight_1 = SampleWeight(name="sample_weight_1")

    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           optimizer="nesterov_momentum_sgd",
                           tol=0.001,
                           alpha=0.01,
                           max_iter=20,
                           early_stop="weight_diff",
                           batch_size=-1,
                           learning_rate=0.15,
                           init_param={"init_method": "zeros"})

    evaluation_0 = Evaluation(name="evaluation_0",
                              eval_type="binary",
                              pos_label=1)
    # evaluation_0.get_party_instance(role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(sample_weight_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(sample_weight_1,
                           data=Data(data=intersection_0.output.data),
                           model=Model(model=sample_weight_0.output.model))
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=sample_weight_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component(
        [data_transform_0, intersection_0, sample_weight_0, hetero_lr_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_0.output.data
        }))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 19
0
def main():
    # parties config
    guest = 9999
    host = 10000
    arbiter = 10000

    # specify input data name & namespace in database
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)

    # define DataTransform component
    data_transform_0 = DataTransform(name="data_transform_0")

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role="guest", party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(
        with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    # define HeteroLR component
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           early_stop="diff",
                           learning_rate=0.15,
                           optimizer="rmsprop",
                           max_iter=10)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    # set train data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    import json
    print(
        json.dumps(pipeline.get_component("hetero_lr_0").get_summary(),
                   indent=4))

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, intersection_0, hetero_lr_0])

    # initiate predict pipeline
    predict_pipeline = PipeLine()

    # define new data reader
    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_eval_data)

    # define evaluation component
    evaluation_0 = Evaluation(name="evaluation_0")
    evaluation_0.get_party_instance(
        role="guest", party_id=guest).component_param(need_run=True,
                                                      eval_type="binary")
    evaluation_0.get_party_instance(
        role="host", party_id=host).component_param(need_run=False)

    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_1.output.data
        }))
    # add evaluation component to predict pipeline
    predict_pipeline.add_component(
        evaluation_0, data=Data(data=pipeline.hetero_lr_0.output.data))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 20
0
def main():
    # parties config
    guest = 9999
    host = 10000
    arbiter = 10000
    # 0 for eggroll, 1 for spark
    backend = Backend.EGGROLL
    # 0 for standalone, 1 for cluster
    work_mode = WorkMode.STANDALONE
    # use the work mode below for cluster deployment
    # work_mode = WorkMode.CLUSTER

    # specify input data name & namespace in database
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_eval_data = {"name": "breast_hetero_host", "namespace": "experiment"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role="guest", party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_eval_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")
    dataio_1 = DataIO(name="dataio_1")

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role="guest",
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    # define HeteroLR component
    hetero_lr_0 = HeteroLR(name="hetero_lr_0",
                           early_stop="weight_diff",
                           learning_rate=0.15,
                           optimizer="rmsprop",
                           max_iter=10,
                           early_stopping_rounds=2,
                           validation_freqs=1)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set dataio_1 to replicate model from dataio_0
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=intersection_0.output.data,
                                     validate_data=intersection_1.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    import json
    print(
        json.dumps(pipeline.get_component("hetero_lr_0").get_summary(),
                   indent=4))

    # predict
    # deploy required components
    pipeline.deploy_component([dataio_0, intersection_0, hetero_lr_0])

    # initiate predict pipeline
    predict_pipeline = PipeLine()

    reader_2 = Reader(name="reader_2")
    reader_2.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_eval_data)
    reader_2.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_eval_data)
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_2)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_2.output.data
                           }))
    # run predict model
    predict_pipeline.predict(backend=backend, work_mode=work_mode)