Exemplo n.º 1
0
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]


    guest_train_data = {"name": "student_homo_guest", "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": "student_homo_test", "namespace": f"experiment{namespace}"}

    host_train_data = {"name": "student_homo_host", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "student_homo_test", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    data_transform_0, data_transform_1 = DataTransform(name="data_transform_0"), DataTransform(name='data_transform_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data)
    data_transform_0.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                              label_type="float")
    data_transform_0.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense",
                                                                            label_type="float")

    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_validate_data)
    data_transform_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True, output_format="dense",
                                                                              label_type="float")
    data_transform_1.get_party_instance(role='host', party_id=host).component_param(with_label=True, output_format="dense",
                                                                            label_type="float")

    homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0",
                                         num_trees=3,
                                         task_type='regression',
                                         objective_param={"objective": "lse"},
                                         tree_param={
                                             "max_depth": 3
                                         },
                                         validation_freqs=1
                                         )

    evaluation_0 = Evaluation(name='evaluation_0', eval_type='regression')

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_1, data=Data(data=reader_1.output.data), model=Model(data_transform_0.output.model))
    pipeline.add_component(homo_secureboost_0, data=Data(train_data=data_transform_0.output.data,
                                                         validate_data=data_transform_1.output.data
                                                         ))
    pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit()
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "nus_wide_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "nus_wide_host",
        "namespace": f"experiment{namespace}"
    }
    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0")
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0',
                             epochs=10,
                             alpha=1,
                             batch_size=-1,
                             mode='plain',
                             communication_efficient=True,
                             local_round=5)

    hetero_ftl_0.add_nn_layer(
        Dense(units=32,
              activation='sigmoid',
              kernel_initializer=initializers.RandomNormal(stddev=1.0),
              bias_initializer=initializers.Zeros()))
    hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01))
    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(hetero_ftl_0,
                           data=Data(train_data=data_transform_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_ftl_0.output.data))

    pipeline.compile()

    pipeline.fit()
Exemplo n.º 3
0
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "vehicle_scale_homo_guest", "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": "vehicle_scale_homo_test", "namespace": f"experiment{namespace}"}

    host_train_data = {"name": "vehicle_scale_homo_host", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "vehicle_scale_homo_test", "namespace": f"experiment{namespace}"}

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host, arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=True, output_format="dense")

    reader_1.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_validate_data)
    reader_1.get_party_instance(role='host', party_id=host).algorithm_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_1.get_party_instance(role='host', party_id=host).algorithm_param(with_label=True, output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(name="homo_secureboost_0",
                                         num_trees=3,
                                         task_type='classification',
                                         objective_param={"objective": "cross_entropy"},
                                         tree_param={
                                             "max_depth": 3
                                         },
                                         validation_freqs=1
                                         )

    evaluation_0 = Evaluation(name='evaluation_0', eval_type='multi')

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0, data=Data(train_data=dataio_0.output.data,
                                                         validate_data=dataio_1.output.data
                                                         ))
    pipeline.add_component(evaluation_0, data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {"name": "nus_wide_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "nus_wide_host", "namespace": f"experiment{namespace}"}
    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role='host', party_id=host).algorithm_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(role='guest', party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).algorithm_param(with_label=False)

    hetero_ftl_0 = HeteroFTL(name='hetero_ftl_0',
                             epochs=10, alpha=1, batch_size=-1, mode='plain')

    hetero_ftl_0.add_nn_layer(Dense(units=32, activation='sigmoid',
                                    kernel_initializer=initializers.RandomNormal(stddev=1.0,
                                                                                 dtype="float32"),
                                    bias_initializer=initializers.Zeros()))

    hetero_ftl_0.compile(optimizer=optimizers.Adam(lr=0.01))
    evaluation_0 = Evaluation(name='evaluation_0', eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(hetero_ftl_0, data=Data(train_data=dataio_0.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_ftl_0.output.data))

    pipeline.compile()

    pipeline.fit(backend=backend, work_mode=work_mode)

    # predict
    # deploy required components
    pipeline.deploy_component([dataio_0, hetero_ftl_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(pipeline,
                                   data=Data(predict_input={pipeline.dataio_0.input.data: reader_0.output.data}))
    # run predict model
    predict_pipeline.predict(backend=backend, work_mode=work_mode)
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)

    param = {
        "name": 'hetero_feature_selection_0',
        "filter_methods": ["manually", "iv_filter"],
        "manually_param": {
            "filter_out_indexes": [1]
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.001]
        },
        "select_col_indexes": -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)

    param = {"k": 3, "max_iter": 10}

    hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=hetero_feature_binning_0.output.model))
    pipeline.add_component(
        hetero_kmeans_0,
        data=Data(train_data=hetero_feature_selection_0.output.data))
    print(f"data: {hetero_kmeans_0.output.data.data[0]}")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_kmeans_0.output.data.data[0]))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(pipeline.get_component("hetero_kmeans_0").get_summary())
Exemplo n.º 6
0
def main(config="../../config.yaml", param="./xgb_config_binary.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {"name": param['data_guest_train'], "namespace": f"experiment{namespace}"}
    host_train_data = {"name": param['data_host_train'], "namespace": f"experiment{namespace}"}
    guest_validate_data = {"name": param['data_guest_val'], "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": param['data_host_val'], "namespace": f"experiment{namespace}"}

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,)

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(role="guest", party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(role="host", party_id=host).component_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role="host", party_id=host).component_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).component_param(with_label=True, output_format="dense")
    dataio_1.get_party_instance(role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_sbt_0 = HeteroFastSecureBoost(name="hetero_fast_sbt_0",
                                              num_trees=param['tree_num'],
                                              task_type=param['task_type'],
                                              objective_param={"objective": param['loss_func']},
                                              encrypt_param={"method": "iterativeAffine"},
                                              tree_param={"max_depth": param['tree_depth']},
                                              validation_freqs=1,
                                              subsample_feature_rate=1,
                                              learning_rate=param['learning_rate'],
                                              guest_depth=param['guest_depth'],
                                              host_depth=param['host_depth'],
                                              tree_num_per_party=param['tree_num_per_party'],
                                              work_mode=param['work_mode']
                                              )
    hetero_fast_sbt_1 = HeteroFastSecureBoost(name="hetero_fast_sbt_1")
    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_sbt_0, data=Data(train_data=intersect_0.output.data,
                                                        validate_data=intersect_1.output.data))
    pipeline.add_component(hetero_fast_sbt_1, data=Data(test_data=intersect_1.output.data),
                           model=Model(hetero_fast_sbt_0.output.model))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_fast_sbt_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    sbt_0_data = pipeline.get_component("hetero_fast_sbt_0").get_output_data().get("data")
    sbt_1_data = pipeline.get_component("hetero_fast_sbt_1").get_output_data().get("data")
    sbt_0_score = extract_data(sbt_0_data, "predict_result")
    sbt_0_label = extract_data(sbt_0_data, "label")
    sbt_1_score = extract_data(sbt_1_data, "predict_result")
    sbt_1_label = extract_data(sbt_1_data, "label")
    sbt_0_score_label = extract_data(sbt_0_data, "predict_result", keep_id=True)
    sbt_1_score_label = extract_data(sbt_1_data, "predict_result", keep_id=True)
    metric_summary = parse_summary_result(pipeline.get_component("evaluation_0").get_summary())
    if param['eval_type'] == "regression":
        desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score)
        desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score)
        metric_summary["script_metrics"] = {"hetero_fast_sbt_train": desc_sbt_0,
                                            "hetero_fast_sbt_validate": desc_sbt_1}
    elif param['eval_type'] == "binary":
        metric_sbt = {
            "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label),
            "ks_2samp": classification_metric.KSTest.compute(sbt_0_score, sbt_1_score),
            "mAP_D_value": classification_metric.AveragePrecisionScore().compute(sbt_0_score, sbt_1_score, sbt_0_label,
                                                                                 sbt_1_label)}
        metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt}
    elif param['eval_type'] == "multi":
        metric_sbt = {
            "score_diversity_ratio": classification_metric.Distribution.compute(sbt_0_score_label, sbt_1_score_label)}
        metric_summary["distribution_metrics"] = {"hetero_fast_sbt": metric_sbt}

    data_summary = {"train": {"guest": guest_train_data["name"], "host": host_train_data["name"]},
                    "test": {"guest": guest_train_data["name"], "host": host_train_data["name"]}
                    }

    return data_summary, metric_summary
Exemplo n.º 7
0
def make_normal_dsl(config,
                    namespace,
                    lr_param,
                    is_multi_host=False,
                    has_validate=False,
                    is_cv=False,
                    is_ovr=False,
                    is_dense=True,
                    need_evaluation=True):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]
    arbiter = parties.arbiter[0]

    if is_ovr:
        guest_train_data = {
            "name": "vehicle_scale_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_train_data = {
            "name": "vehicle_scale_hetero_host",
            "namespace": f"experiment{namespace}"
        }

        guest_eval_data = {
            "name": "vehicle_scale_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_eval_data = {
            "name": "vehicle_scale_hetero_host",
            "namespace": f"experiment{namespace}"
        }
    else:
        guest_train_data = {
            "name": "breast_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_train_data = {
            "name": "breast_hetero_host",
            "namespace": f"experiment{namespace}"
        }

        guest_eval_data = {
            "name": "breast_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_eval_data = {
            "name": "breast_hetero_host",
            "namespace": f"experiment{namespace}"
        }

    train_line = []
    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    if is_dense:
        dataio_0 = DataIO(name="dataio_0", output_format='dense')
    else:
        dataio_0 = DataIO(name="dataio_0", output_format='sparse')

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    train_line.append(dataio_0)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    train_line.append(intersection_0)

    last_cpn = None
    if has_validate:
        reader_1 = Reader(name="reader_1")
        reader_1.get_party_instance(
            role='guest',
            party_id=guest).component_param(table=guest_eval_data)
        reader_1.get_party_instance(
            role='host', party_id=hosts).component_param(table=host_eval_data)
        pipeline.add_component(reader_1)
        last_cpn = reader_1
        for cpn in train_line:
            cpn_name = cpn.name
            new_name = "_".join(cpn_name.split('_')[:-1] + ['1'])
            validate_cpn = type(cpn)(name=new_name)
            if hasattr(cpn.output, "model"):
                pipeline.add_component(validate_cpn,
                                       data=Data(data=last_cpn.output.data),
                                       model=Model(cpn.output.model))
            else:
                pipeline.add_component(validate_cpn,
                                       data=Data(data=last_cpn.output.data))
            last_cpn = validate_cpn

    hetero_lr_0 = HeteroLR(**lr_param)
    if has_validate:
        pipeline.add_component(hetero_lr_0,
                               data=Data(train_data=intersection_0.output.data,
                                         validate_data=last_cpn.output.data))
    else:
        pipeline.add_component(
            hetero_lr_0, data=Data(train_data=intersection_0.output.data))

    if is_cv:
        pipeline.compile()
        return pipeline

    evaluation_data = [hetero_lr_0.output.data]
    if has_validate:
        hetero_lr_1 = HeteroLR(name='hetero_lr_1')
        pipeline.add_component(hetero_lr_1,
                               data=Data(test_data=last_cpn.output.data),
                               model=Model(hetero_lr_0.output.model))
        evaluation_data.append(hetero_lr_1.output.data)

    if need_evaluation:
        evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
        pipeline.add_component(evaluation_0, data=Data(data=evaluation_data))

    pipeline.compile()
    return pipeline
Exemplo n.º 8
0
def make_feature_engineering_dsl(config,
                                 namespace,
                                 lr_param,
                                 is_multi_host=False,
                                 has_validate=False,
                                 is_cv=False,
                                 is_ovr=False):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]
    arbiter = parties.arbiter[0]

    if is_ovr:
        guest_train_data = {
            "name": "vehicle_scale_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_train_data = {
            "name": "vehicle_scale_hetero_host",
            "namespace": f"experiment{namespace}"
        }

        guest_eval_data = {
            "name": "vehicle_scale_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_eval_data = {
            "name": "vehicle_scale_hetero_host",
            "namespace": f"experiment{namespace}"
        }
    else:
        guest_train_data = {
            "name": "breast_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_train_data = {
            "name": "breast_hetero_host",
            "namespace": f"experiment{namespace}"
        }

        guest_eval_data = {
            "name": "breast_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_eval_data = {
            "name": "breast_hetero_host",
            "namespace": f"experiment{namespace}"
        }

    train_line = []
    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    train_line.append(dataio_0)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    train_line.append(intersection_0)

    feature_scale_0 = FeatureScale(name='feature_scale_0',
                                   method="standard_scale",
                                   need_run=True)
    pipeline.add_component(feature_scale_0,
                           data=Data(data=intersection_0.output.data))
    train_line.append(feature_scale_0)

    binning_param = {
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "adjustment_factor": 0.5,
        "local_only": False,
        "need_run": True,
        "transform_param": {
            "transform_cols": -1,
            "transform_type": "bin_num"
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(
        name='hetero_feature_binning_0', **binning_param)
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    train_line.append(hetero_feature_binning_0)

    selection_param = {
        "select_col_indexes": -1,
        "filter_methods": ["manually", "iv_value_thres", "iv_percentile"],
        "manually_param": {
            "filter_out_indexes": None
        },
        "iv_value_param": {
            "value_threshold": 1.0
        },
        "iv_percentile_param": {
            "percentile_threshold": 0.9
        },
        "need_run": True
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(
        name='hetero_feature_selection_0', **selection_param)
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data),
        model=Model(isometric_model=[hetero_feature_binning_0.output.model]))
    train_line.append(hetero_feature_selection_0)

    onehot_param = {
        "transform_col_indexes": -1,
        "transform_col_names": None,
        "need_run": True
    }
    one_hot_encoder_0 = OneHotEncoder(name='one_hot_encoder_0', **onehot_param)
    pipeline.add_component(
        one_hot_encoder_0,
        data=Data(data=hetero_feature_selection_0.output.data))
    train_line.append(one_hot_encoder_0)

    last_cpn = None
    if has_validate:
        reader_1 = Reader(name="reader_1")
        reader_1.get_party_instance(
            role='guest',
            party_id=guest).component_param(table=guest_eval_data)
        reader_1.get_party_instance(
            role='host', party_id=hosts).component_param(table=host_eval_data)
        pipeline.add_component(reader_1)
        last_cpn = reader_1
        for cpn in train_line:
            cpn_name = cpn.name
            new_name = "_".join(cpn_name.split('_')[:-1] + ['1'])
            validate_cpn = type(cpn)(name=new_name)
            if hasattr(cpn.output, "model"):
                pipeline.add_component(validate_cpn,
                                       data=Data(data=last_cpn.output.data),
                                       model=Model(cpn.output.model))
            else:
                pipeline.add_component(validate_cpn,
                                       data=Data(data=last_cpn.output.data))
            last_cpn = validate_cpn

    hetero_lr_0 = HeteroLR(**lr_param)
    if has_validate:
        pipeline.add_component(hetero_lr_0,
                               data=Data(
                                   train_data=one_hot_encoder_0.output.data,
                                   validate_data=last_cpn.output.data))
    else:
        pipeline.add_component(
            hetero_lr_0, data=Data(train_data=one_hot_encoder_0.output.data))

    if is_cv:
        pipeline.compile()
        return pipeline

    evaluation_data = [hetero_lr_0.output.data]
    if has_validate:
        hetero_lr_1 = HeteroLR(name='hetero_lr_1')
        pipeline.add_component(hetero_lr_1,
                               data=Data(test_data=last_cpn.output.data),
                               model=Model(hetero_lr_0.output.model))
        evaluation_data.append(hetero_lr_1.output.data)

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(evaluation_0, data=Data(data=evaluation_data))

    pipeline.compile()
    return pipeline
def make_normal_dsl(config, namespace):
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]
    arbiter = parties.arbiter[0]
    guest_train_data = {"name": "breast_homo_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "breast_homo_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(role='host', party_id=hosts).component_param(with_label=True)

    scale_0 = FeatureScale(name='scale_0')

    homo_sbt_0 = HomoSecureBoost(name="homo_secureboost_0",
                                 num_trees=3,
                                 task_type='classification',
                                 objective_param={"objective": "cross_entropy"},
                                 tree_param={
                                     "max_depth": 3
                                 },
                                 validation_freqs=1
                                 )

    # define Intersection components
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_sbt_0, data=Data(train_data=scale_0.output.data))

    selection_param = {
        "name": "hetero_feature_selection_0",
        "select_col_indexes": -1,
        "select_names": [],
        "filter_methods": [
            "homo_sbt_filter"
        ],
        "sbt_param": {
            "metrics": "feature_importance",
            "filter_type": "threshold",
            "take_high": True,
            "threshold": 0.03
        }}
    feature_selection_0 = HeteroFeatureSelection(**selection_param)
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)
    pipeline.add_component(feature_selection_0, data=Data(data=scale_0.output.data),
                           model=Model(isometric_model=homo_sbt_0.output.model))
    pipeline.add_component(homo_lr_0, data=Data(train_data=feature_selection_0.output.data))
    evaluation_0 = Evaluation(name='evaluation_0')
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()
    return pipeline
Exemplo n.º 10
0
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = [{
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }, {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }]

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host',
        party_id=hosts[0]).algorithm_param(table=host_train_data[0])
    reader_0.get_party_instance(
        role='host',
        party_id=hosts[1]).algorithm_param(table=host_train_data[1])

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts[0]).algorithm_param(with_label=False)
    dataio_0.get_party_instance(
        role='host', party_id=hosts[1]).algorithm_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    param = {"k": 3, "max_iter": 10}

    hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))

    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    pipeline.add_component(hetero_kmeans_0,
                           data=Data(train_data=intersection_0.output.data))
    # print(f"data: {hetero_kmeans_0.output.data.data[0]}")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_kmeans_0.output.data.data[0]))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("hetero_kmeans_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)
    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_eval_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense')
    data_transform_1 = DataTransform(name="data_transform_1",
                                     output_format='dense')

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=data_transform_1.output.data))

    lr_param = {
        "name": "hetero_sshe_lr_0",
        "penalty": "L2",
        "optimizer": "rmsprop",
        "tol": 0.0001,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "callback_param": {
            "callbacks": ["EarlyStopping", "PerformanceEvaluate"],
            "validation_freqs": 1,
            "early_stopping_rounds": 3
        },
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros"
        },
        "reveal_strategy": "respectively",
        "reveal_every_iter": True
    }

    hetero_sshe_lr_0 = HeteroSSHELR(**lr_param)
    pipeline.add_component(hetero_sshe_lr_0,
                           data=Data(train_data=intersection_0.output.data,
                                     validate_data=intersection_1.output.data))

    evaluation_data = [hetero_sshe_lr_0.output.data]
    hetero_sshe_lr_1 = HeteroSSHELR(name='hetero_sshe_lr_1')
    pipeline.add_component(hetero_sshe_lr_1,
                           data=Data(test_data=intersection_1.output.data),
                           model=Model(hetero_sshe_lr_0.output.model))
    evaluation_data.append(hetero_sshe_lr_1.output.data)

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(evaluation_0, data=Data(data=evaluation_data))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("hetero_sshe_lr_0").get_summary())
    prettify(pipeline.get_component("evaluation_0").get_summary())
    return pipeline
Exemplo n.º 12
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host_0 = parties.host[0]
    host_1 = parties.host[1]

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data_0 = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }
    host_train_data_1 = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data_0 = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data_1 = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest,
                                                host=[host_0, host_1])

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host_0).component_param(table=host_train_data_0)
    reader_0.get_party_instance(
        role="host", party_id=host_1).component_param(table=host_train_data_1)

    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host",
        party_id=host_0).component_param(table=host_validate_data_0)
    reader_1.get_party_instance(
        role="host",
        party_id=host_1).component_param(table=host_validate_data_1)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role="host", party_id=host_0).component_param(with_label=False)
    data_transform_0.get_party_instance(
        role="host", party_id=host_1).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role="host", party_id=host_0).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="host", party_id=host_1).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_secure_boost_0 = HeteroFastSecureBoost(
        name="hetero_fast_secure_boost_0",
        num_trees=3,
        tree_num_per_party=1,
        task_type='classification',
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "Paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        work_mode='mix')

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_fast_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))

    pipeline.add_component(
        evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary())
Exemplo n.º 13
0
def main(config="../../config.yaml",
         param='./xgb_config_binary.yaml',
         namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": param['data_guest_train'],
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": param['data_guest_val'],
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": param['data_host_train'],
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": param['data_host_val'],
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=param['tree_num'],
        task_type=param['task_type'],
        objective_param={"objective": param['loss_func']},
        tree_param={"max_depth": param['tree_depth']},
        validation_freqs=1,
        subsample_feature_rate=1,
        learning_rate=param['learning_rate'],
        bin_num=50)

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(train_data=dataio_0.output.data,
                                     validate_data=dataio_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        }
    }

    return data_summary, pipeline.get_component('evaluation_0').get_summary()
Exemplo n.º 14
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "heart_nonscaled_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "heart_nonscaled_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "heart_nonscaled_hetero_test",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "heart_nonscaled_hetero_test",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_eval_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0",
                      with_label=True,
                      output_format="dense",
                      label_name='target')  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")

    homo_onehot_param = {
        "transform_col_indexes": [1, 2, 5, 6, 8, 10, 11, 12],
        "transform_col_names": [],
        "need_alignment": True
    }

    homo_onehot_0 = HomoOneHotEncoder(name='homo_onehot_0',
                                      **homo_onehot_param)
    homo_onehot_1 = HomoOneHotEncoder(name='homo_onehot_1')

    scale_0 = FeatureScale(name='scale_0', method="standard_scale")
    scale_1 = FeatureScale(name='scale_1')

    homo_lr_param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 3,
        "early_stop": "diff",
        "batch_size": 500,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": "Paillier"
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **homo_lr_param)
    homo_lr_1 = HomoLR(name='homo_lr_1')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set dataio_1 to replicate model from dataio_0
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    pipeline.add_component(homo_onehot_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_onehot_1,
                           data=Data(data=dataio_1.output.data),
                           model=Model(homo_onehot_0.output.model))
    pipeline.add_component(scale_0, data=Data(data=homo_onehot_0.output.data))
    pipeline.add_component(scale_1,
                           data=Data(data=homo_onehot_1.output.data),
                           model=Model(scale_0.output.model))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data))
    pipeline.add_component(homo_lr_1,
                           data=Data(test_data=scale_1.output.data),
                           model=Model(homo_lr_0.output.model))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).algorithm_param(need_run=False)
    pipeline.add_component(
        evaluation_0,
        data=Data(data=[homo_lr_0.output.data, homo_lr_1.output.data]))
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Exemplo n.º 15
0
def main(config="../../config.yaml",
         param='./xgb_config_binary.yaml',
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": param['data_guest_train'],
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": param['data_guest_val'],
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": param['data_host_train'],
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": param['data_host_val'],
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name='dataio_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    dataio_0.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(role='host', party_id=host).component_param(
        with_label=True, output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=param['tree_num'],
        task_type=param['task_type'],
        objective_param={"objective": param['loss_func']},
        tree_param={"max_depth": param['tree_depth']},
        validation_freqs=1,
        subsample_feature_rate=1,
        learning_rate=param['learning_rate'],
        bin_num=50)
    homo_secureboost_1 = HomoSecureBoost(name="homo_secureboost_1")

    evaluation_0 = Evaluation(name='evaluation_0',
                              eval_type=param['eval_type'])

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(train_data=dataio_0.output.data,
                                     validate_data=dataio_1.output.data))
    pipeline.add_component(homo_secureboost_1,
                           data=Data(test_data=dataio_1.output.data),
                           model=Model(homo_secureboost_0.output.model))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    sbt_0_data = pipeline.get_component(
        "homo_secureboost_0").get_output_data().get("data")
    sbt_1_data = pipeline.get_component(
        "homo_secureboost_1").get_output_data().get("data")
    sbt_0_score = extract_data(sbt_0_data, "predict_result")
    sbt_0_label = extract_data(sbt_0_data, "label")
    sbt_1_score = extract_data(sbt_1_data, "predict_result")
    sbt_1_label = extract_data(sbt_1_data, "label")
    sbt_0_score_label = extract_data(sbt_0_data,
                                     "predict_result",
                                     keep_id=True)
    sbt_1_score_label = extract_data(sbt_1_data,
                                     "predict_result",
                                     keep_id=True)
    metric_summary = parse_summary_result(
        pipeline.get_component("evaluation_0").get_summary())
    if param['eval_type'] == "regression":
        desc_sbt_0 = regression_metric.Describe().compute(sbt_0_score)
        desc_sbt_1 = regression_metric.Describe().compute(sbt_1_score)
        metric_summary["script_metrics"] = {
            "sbt_train": desc_sbt_0,
            "sbt_validate": desc_sbt_1
        }
    elif param['eval_type'] == "binary":
        metric_sbt = {
            "score_diversity_ratio":
            classification_metric.Distribution.compute(sbt_0_score_label,
                                                       sbt_1_score_label),
            "ks_2samp":
            classification_metric.KSTest.compute(sbt_0_score, sbt_1_score),
            "mAP_D_value":
            classification_metric.AveragePrecisionScore().compute(
                sbt_0_score, sbt_1_score, sbt_0_label, sbt_1_label)
        }
        metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt}
    elif param['eval_type'] == "multi":
        metric_sbt = {
            "score_diversity_ratio":
            classification_metric.Distribution.compute(sbt_0_score_label,
                                                       sbt_1_score_label)
        }
        metric_summary["distribution_metrics"] = {"homo_sbt": metric_sbt}

    data_summary = {
        "train": {
            "guest": guest_train_data["name"],
            "host": host_train_data["name"]
        },
        "test": {
            "guest": guest_validate_data["name"],
            "host": host_validate_data["name"]
        }
    }

    return data_summary, metric_summary
def main(config="../../config.yaml", namespace=""):

    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)

    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    guest_validate_data = {
        "name": "breast_homo_test",
        "namespace": f"experiment{namespace}"
    }

    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_homo_test",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(
        role='guest', party_id=guest).set_roles(guest=guest,
                                                host=host,
                                                arbiter=arbiter)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name='data_transform_1')
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name='reader_1')

    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=True,
                                                    output_format="dense")

    reader_1.get_party_instance(
        role='guest',
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_validate_data)
    data_transform_1.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role='host', party_id=host).component_param(with_label=True,
                                                    output_format="dense")

    homo_secureboost_0 = HomoSecureBoost(
        name="homo_secureboost_0",
        num_trees=3,
        task_type='classification',
        objective_param={"objective": "cross_entropy"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    evaluation_0 = Evaluation(name='evaluation_0', eval_type='binary')

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(homo_secureboost_0,
                           data=Data(
                               train_data=data_transform_0.output.data,
                               validate_data=data_transform_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(homo_secureboost_0.output.data))

    pipeline.compile()
    pipeline.fit()

    # predict
    # deploy required components
    pipeline.deploy_component([data_transform_0, homo_secureboost_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_1)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(predict_input={
            pipeline.data_transform_0.input.data: reader_1.output.data
        }))
    # run predict model
    predict_pipeline.predict()
Exemplo n.º 17
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {"name": "student_hetero_guest", "namespace": f"experiment{namespace}"}
    host_train_data = {"name": "student_hetero_host", "namespace": f"experiment{namespace}"}

    guest_validate_data = {"name": "student_hetero_guest", "namespace": f"experiment{namespace}"}
    host_validate_data = {"name": "student_hetero_host", "namespace": f"experiment{namespace}"}

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest, host=host,)

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(role="host", party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(role="guest", party_id=guest).algorithm_param(table=guest_validate_data)
    reader_1.get_party_instance(role="host", party_id=host).algorithm_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_0.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param(with_label=True, output_format="dense")
    dataio_1.get_party_instance(role="host", party_id=host).algorithm_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(name="hetero_secure_boost_0",
                                              num_trees=5,
                                              task_type="regression",
                                              objective_param={"objective": "lse"},
                                              encrypt_param={"method": "iterativeAffine"},
                                              tree_param={"max_depth": 3},
                                              validation_freqs=1,
                                              early_stopping_rounds=1)

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="regression")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1, data=Data(data=reader_1.output.data), model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_secure_boost_0, data=Data(train_data=intersect_0.output.data,
                                                            validate_data=intersect_1.output.data))
    pipeline.add_component(evaluation_0, data=Data(data=hetero_secure_boost_0.output.data))

    pipeline.compile()
    pipeline.fit(backend=backend, work_mode=work_mode)

    print("fitting hetero secureboost done, result:")
    print(pipeline.get_component("hetero_secure_boost_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).component_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_secure_boost_0 = HeteroFastSecureBoost(
        name="hetero_fast_secure_boost_0",
        num_trees=4,
        tree_num_per_party=1,
        task_type='classification',
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "iterativeAffine"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        work_mode='mix')

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))

    pipeline.add_component(
        evaluation_0, data=Data(data=hetero_fast_secure_boost_0.output.data))

    pipeline.compile()
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)

    print("fitting hetero fast secureboost done, result:")
    print(pipeline.get_component("hetero_fast_secure_boost_0").get_summary())

    # predict
    # deploy required components
    pipeline.deploy_component(
        [dataio_0, intersect_0, hetero_fast_secure_boost_0])

    predict_pipeline = PipeLine()
    # add data reader onto predict pipeline
    predict_pipeline.add_component(reader_0)
    # add selected components from train pipeline onto predict pipeline
    # specify data source
    predict_pipeline.add_component(
        pipeline,
        data=Data(
            predict_input={pipeline.dataio_0.input.data: reader_0.output.data
                           }))
    # run predict model
    predict_pipeline.predict(job_parameters)
Exemplo n.º 19
0
def main(config="../../config.yaml",
         param="./xgb_config_binary.yaml",
         namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    backend = config.backend
    work_mode = config.work_mode

    # data sets
    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io

    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).algorithm_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).algorithm_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).algorithm_param(table=host_validate_data)

    dataio_0, dataio_1 = DataIO(name="dataio_0"), DataIO(name="dataio_1")

    dataio_0.get_party_instance(role="guest", party_id=guest).algorithm_param(
        with_label=True, output_format="dense")
    dataio_0.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)
    dataio_1.get_party_instance(role="guest", party_id=guest).algorithm_param(
        with_label=True, output_format="dense")
    dataio_1.get_party_instance(
        role="host", party_id=host).algorithm_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_fast_sbt = HeteroFastSecureBoost(
        name="hetero_fast_sbt_0",
        num_trees=10,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "iterativeAffine"},
        tree_param={"max_depth": 3},
        validation_freqs=1,
        learning_rate=0.1,
        tree_num_per_party=1,
        work_mode='mix')

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(intersect_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersect_1, data=Data(data=dataio_1.output.data))
    pipeline.add_component(hetero_fast_sbt,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_fast_sbt.output.data))

    pipeline.compile()
    pipeline.fit(backend=backend, work_mode=work_mode)

    return {}, pipeline.get_component("evaluation_0").get_summary()
Exemplo n.º 20
0
#
#  Copyright 2019 The FATE Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from pipeline.component.evaluation import Evaluation

a = Evaluation(name="evaluation_0")

print(a.output.data)
Exemplo n.º 21
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_homo_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_homo_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0", with_label=True,
                      output_format="dense")  # start component numbering at 0

    scale_0 = FeatureScale(name='scale_0')
    param = {
        "penalty": "L2",
        "optimizer": "sgd",
        "tol": 1e-05,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "decay": 1,
        "decay_sqrt": True,
        "init_param": {
            "init_method": "zeros"
        },
        "encrypt_param": {
            "method": None
        },
        "cv_param": {
            "n_splits": 4,
            "shuffle": True,
            "random_seed": 33,
            "need_cv": False
        }
    }

    homo_lr_0 = HomoLR(name='homo_lr_0', **param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(scale_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(homo_lr_0,
                           data=Data(train_data=scale_0.output.data))
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    evaluation_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)
    pipeline.add_component(evaluation_0, data=Data(data=homo_lr_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(
        json.dumps(pipeline.get_component("homo_lr_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
    print(
        json.dumps(pipeline.get_component("evaluation_0").get_summary(),
                   indent=4,
                   ensure_ascii=False))
Exemplo n.º 22
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    # data sets
    guest_train_data = {
        "name": "vehicle_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "vehicle_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_validate_data = {
        "name": "vehicle_scale_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_validate_data = {
        "name": "vehicle_scale_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # init pipeline
    pipeline = PipeLine().set_initiator(role="guest",
                                        party_id=guest).set_roles(
                                            guest=guest,
                                            host=host,
                                        )

    # set data reader and data-io
    reader_0, reader_1 = Reader(name="reader_0"), Reader(name="reader_1")
    reader_0.get_party_instance(
        role="guest", party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role="host", party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role="guest",
        party_id=guest).component_param(table=guest_validate_data)
    reader_1.get_party_instance(
        role="host", party_id=host).component_param(table=host_validate_data)

    data_transform_0, data_transform_1 = DataTransform(
        name="data_transform_0"), DataTransform(name="data_transform_1")

    data_transform_0.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_0.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)
    data_transform_1.get_party_instance(
        role="guest", party_id=guest).component_param(with_label=True,
                                                      output_format="dense")
    data_transform_1.get_party_instance(
        role="host", party_id=host).component_param(with_label=False)

    # data intersect component
    intersect_0 = Intersection(name="intersection_0")
    intersect_1 = Intersection(name="intersection_1")

    # secure boost component
    hetero_secure_boost_0 = HeteroSecureBoost(
        name="hetero_secure_boost_0",
        num_trees=3,
        task_type="classification",
        objective_param={"objective": "cross_entropy"},
        encrypt_param={"method": "paillier"},
        tree_param={"max_depth": 3},
        validation_freqs=1)

    # evaluation component
    evaluation_0 = Evaluation(name="evaluation_0", eval_type="multi")
    evaluation_1 = Evaluation(name="evaluation_1", eval_type="multi")

    # transformer
    transformer_0 = SBTTransformer(name='sbt_transformer_0', dense_format=True)

    # local baseline
    def get_local_baseline(idx):
        return LocalBaseline(name="local_baseline_{}".format(idx),
                             model_name="LogisticRegression",
                             model_opts={
                                 "penalty": "l2",
                                 "tol": 0.0001,
                                 "C": 1.0,
                                 "fit_intercept": True,
                                 "solver": "lbfgs",
                                 "max_iter": 50
                             })

    local_baseline_0 = get_local_baseline(0)
    local_baseline_0.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_0.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    local_baseline_1 = get_local_baseline(1)
    local_baseline_1.get_party_instance(
        role='guest', party_id=guest).component_param(need_run=True)
    local_baseline_1.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    evaluation_1.get_party_instance(
        role='host', party_id=host).component_param(need_run=False)

    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(data_transform_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(data_transform_0.output.model))
    pipeline.add_component(intersect_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(intersect_1,
                           data=Data(data=data_transform_1.output.data))
    pipeline.add_component(hetero_secure_boost_0,
                           data=Data(train_data=intersect_0.output.data,
                                     validate_data=intersect_1.output.data))
    pipeline.add_component(
        transformer_0,
        data=Data(data=intersect_0.output.data),
        model=Model(isometric_model=hetero_secure_boost_0.output.model))
    pipeline.compile()
    pipeline.fit()