Пример #1
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    guest_test_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }
    host_test_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    reader_1 = Reader(name="reader_1")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='guest', party_id=guest).algorithm_param(table=guest_test_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).algorithm_param(table=host_test_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")  # start component numbering at 1

    param = {
        "with_label": True,
        "label_name": "y",
        "label_type": "int",
        "output_format": "dense",
        "missing_fill": True,
        "missing_fill_method": "mean",
        "outlier_replace": False,
        "outlier_replace_method": "designated",
        "outlier_replace_value": 0.66,
        "outlier_impute": "-9999"
    }
    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.algorithm_param(**param)
    # get and configure DataIO party instance of host
    dataio_1.get_party_instance(role='guest',
                                party_id=guest).algorithm_param(**param)

    param = {
        "input_format": "tag",
        "with_label": False,
        "tag_with_value": True,
        "delimitor": ";",
        "output_format": "dense"
    }
    dataio_0.get_party_instance(role='host',
                                party_id=host).algorithm_param(**param)
    dataio_1.get_party_instance(role='host',
                                party_id=host).algorithm_param(**param)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="raw")
    intersection_1 = Intersection(name="intersection_1",
                                  intersect_method="raw")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv",
            "init_bucket_method": "quantile"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    statistic_0 = DataStatistics(name='statistic_0')
    param = {
        "name":
        'hetero_feature_selection_0',
        "filter_methods":
        ["manually", "unique_value", "iv_filter", "statistic_filter"],
        "manually_param": {
            "filter_out_indexes": [1, 2],
            "filter_out_names": ["x3", "x4"]
        },
        "unique_param": {
            "eps": 1e-6
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.1]
        },
        "statistic_param": {
            "metrics": ["coefficient_of_variance", "skewness"],
            "filter_type": ["threshold", "threshold"],
            "take_high": [True, False],
            "threshold": [0.001, -0.01]
        },
        "select_col_indexes":
        -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name='hetero_feature_selection_1')
    param = {"name": "hetero_scale_0", "method": "standard_scale"}
    hetero_scale_0 = FeatureScale(**param)
    hetero_scale_1 = FeatureScale(name='hetero_scale_1')
    param = {
        "penalty": "L2",
        "optimizer": "nesterov_momentum_sgd",
        "tol": 1e-4,
        "alpha": 0.01,
        "max_iter": 5,
        "early_stop": "diff",
        "batch_size": -1,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros"
        },
        "validation_freqs": None,
        "early_stopping_rounds": None
    }

    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))

    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(statistic_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=[
            hetero_feature_binning_0.output.model, statistic_0.output.model
        ]))
    pipeline.add_component(hetero_feature_selection_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(
                               hetero_feature_selection_0.output.model))

    pipeline.add_component(
        hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(
        hetero_scale_1,
        data=Data(data=hetero_feature_selection_1.output.data),
        model=Model(hetero_scale_0.output.model))

    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=hetero_scale_0.output.data,
                                     validate_data=hetero_scale_1.output.data))

    pipeline.add_component(evaluation_0,
                           data=Data(data=[hetero_lr_0.output.data]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit(backend=backend, work_mode=work_mode)
    # query component summary
    print(pipeline.get_component("hetero_lr_0").get_summary())
Пример #2
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    guest_test_data = {
        "name": "breast_hetero_guest",
        "namespace": "experiment"
    }
    host_train_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }
    host_test_data = {
        "name": "breast_hetero_host_tag_value",
        "namespace": "experiment"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    reader_1 = Reader(name="reader_1")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_test_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)
    reader_1.get_party_instance(
        role='host', party_id=host).component_param(table=host_test_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")  # start component numbering at 1

    param = {
        "with_label": True,
        "label_name": "y",
        "label_type": "int",
        "output_format": "dense",
        "missing_fill": True,
        "missing_fill_method": "mean",
        "outlier_replace": False,
        "outlier_replace_method": "designated",
        "outlier_replace_value": 0.66,
        "outlier_impute": "-9999"
    }
    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(**param)
    # get and configure DataIO party instance of host
    dataio_1.get_party_instance(role='guest',
                                party_id=guest).component_param(**param)

    param = {
        "input_format": "tag",
        "with_label": False,
        "tag_with_value": True,
        "delimitor": ";",
        "output_format": "dense"
    }
    dataio_0.get_party_instance(role='host',
                                party_id=host).component_param(**param)
    dataio_1.get_party_instance(role='host',
                                party_id=host).component_param(**param)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0",
                                  intersect_method="raw")
    intersection_1 = Intersection(name="intersection_1",
                                  intersect_method="raw")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv",
            "init_bucket_method": "quantile"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    statistic_0 = DataStatistics(name='statistic_0')
    param = {
        "name":
        'hetero_feature_selection_0',
        "filter_methods":
        ["manually", "unique_value", "iv_filter", "statistic_filter"],
        "manually_param": {
            "filter_out_indexes": [1, 2],
            "filter_out_names": ["x2", "x3"]
        },
        "unique_param": {
            "eps": 1e-6
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.1]
        },
        "statistic_param": {
            "metrics": ["coefficient_of_variance", "skewness"],
            "filter_type": ["threshold", "threshold"],
            "take_high": [True, False],
            "threshold": [0.001, -0.01]
        },
        "select_col_indexes":
        -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)
    hetero_feature_selection_1 = HeteroFeatureSelection(
        name='hetero_feature_selection_1')

    param = {
        "task_type": "classification",
        "learning_rate": 0.1,
        "num_trees": 10,
        "subsample_feature_rate": 0.5,
        "n_iter_no_change": False,
        "tol": 0.0002,
        "bin_num": 50,
        "objective_param": {
            "objective": "cross_entropy"
        },
        "encrypt_param": {
            "method": "paillier"
        },
        "predict_param": {
            "threshold": 0.5
        },
        "tree_param": {
            "max_depth": 2
        },
        "cv_param": {
            "n_splits": 5,
            "shuffle": False,
            "random_seed": 103,
            "need_cv": False
        },
        "validation_freqs": 2,
        "early_stopping_rounds": 5,
        "metrics": ["auc", "ks"]
    }

    hetero_secureboost_0 = HeteroSecureBoost(name='hetero_secureboost_0',
                                             **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))

    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))

    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(statistic_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=[
            hetero_feature_binning_0.output.model, statistic_0.output.model
        ]))
    pipeline.add_component(hetero_feature_selection_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(
                               hetero_feature_selection_0.output.model))

    # set train & validate data of hetero_secureboost_0 component
    pipeline.add_component(
        hetero_secureboost_0,
        data=Data(train_data=hetero_feature_selection_0.output.data,
                  validate_data=hetero_feature_selection_1.output.data))

    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_secureboost_0.output.data))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(pipeline.get_component("hetero_secureboost_0").get_summary())
Пример #3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data_0 = {"name": "breast_hetero_guest", "namespace": "experiment"}
    guest_train_data_1 = {"name": "breast_hetero_guest", "namespace": "experiment"}
    guest_test_data_0 = {"name": "breast_hetero_guest", "namespace": "experiment"}
    guest_test_data_1 = {"name": "breast_hetero_guest", "namespace": "experiment"}
    host_train_data_0 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"}
    host_train_data_1 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"}
    host_test_data_0 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"}
    host_test_data_1 = {"name": "breast_hetero_host_tag_value", "namespace": "experiment"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    reader_1 = Reader(name="reader_1")
    reader_2 = Reader(name="reader_2")
    reader_3 = Reader(name="reader_3")
    # configure Reader for guest
    reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data_0)
    reader_1.get_party_instance(role='guest', party_id=guest).component_param(table=guest_train_data_1)
    reader_2.get_party_instance(role='guest', party_id=guest).component_param(table=guest_test_data_0)
    reader_3.get_party_instance(role='guest', party_id=guest).component_param(table=guest_test_data_1)
    # configure Reader for host
    reader_0.get_party_instance(role='host', party_id=host).component_param(table=host_train_data_0)
    reader_1.get_party_instance(role='host', party_id=host).component_param(table=host_train_data_1)
    reader_2.get_party_instance(role='host', party_id=host).component_param(table=host_test_data_0)
    reader_3.get_party_instance(role='host', party_id=host).component_param(table=host_test_data_1)

    param = {
        "name": "union_0",
        "keep_duplicate": True
    }
    union_0 = Union(**param)
    param = {
        "name": "union_1",
        "keep_duplicate": True
    }
    union_1 = Union(**param)

    param = {
        "input_format": "tag",
        "with_label": False,
        "tag_with_value": True,
        "delimitor": ";",
        "output_format": "dense"
    }

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")  # start component numbering at 1

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest', party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True, output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(role='host', party_id=host).component_param(**param)
    dataio_1.get_party_instance(role='guest', party_id=guest).component_param(with_label=True)
    dataio_1.get_party_instance(role='host', party_id=host).component_param(**param)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    statistic_0 = DataStatistics(name='statistic_0')
    param = {
        "name": 'hetero_feature_selection_0',
        "filter_methods": ["manually", "iv_filter", "statistic_filter"],
        "manually_param": {
            "filter_out_indexes": [1, 2],
            "filter_out_names": ["x2", "x3"]
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.01]
        },
        "statistic_param": {
            "metrics": ["coefficient_of_variance", "skewness"],
            "filter_type": ["threshold", "threshold"],
            "take_high": [True, True],
            "threshold": [0.001, -0.01]
        },
        "select_col_indexes": -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)
    hetero_feature_selection_1 = HeteroFeatureSelection(name='hetero_feature_selection_1')
    param = {
        "name": "hetero_scale_0",
        "method": "standard_scale"
    }
    hetero_scale_0 = FeatureScale(**param)
    hetero_scale_1 = FeatureScale(name='hetero_scale_1')
    param = {
        "penalty": "L2",
        "validation_freqs": None,
        "early_stopping_rounds": None,
        "max_iter": 5
    }

    hetero_lr_0 = HeteroLR(name='hetero_lr_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0')
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(reader_2)
    pipeline.add_component(reader_3)
    pipeline.add_component(union_0, data=Data(data=[reader_0.output.data, reader_1.output.data]))
    pipeline.add_component(union_1, data=Data(data=[reader_2.output.data, reader_3.output.data]))

    pipeline.add_component(dataio_0, data=Data(data=union_0.output.data))
    pipeline.add_component(dataio_1, data=Data(data=union_1.output.data), model=Model(dataio_0.output.model))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0, data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1, data=Data(data=dataio_1.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0, data=Data(data=intersection_0.output.data))

    pipeline.add_component(statistic_0, data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_selection_0, data=Data(data=intersection_0.output.data),
                           model=Model(isometric_model=[hetero_feature_binning_0.output.model,
                                                        statistic_0.output.model]))
    pipeline.add_component(hetero_feature_selection_1, data=Data(data=intersection_1.output.data),
                           model=Model(hetero_feature_selection_0.output.model))

    pipeline.add_component(hetero_scale_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(hetero_scale_1, data=Data(data=hetero_feature_selection_1.output.data),
                           model=Model(hetero_scale_0.output.model))

    # set train & validate data of hetero_lr_0 component

    pipeline.add_component(hetero_lr_0, data=Data(train_data=hetero_scale_0.output.data,
                                                  validate_data=hetero_scale_1.output.data))

    pipeline.add_component(evaluation_0, data=Data(data=[hetero_lr_0.output.data]))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    print(pipeline.get_component("hetero_lr_0").get_summary())
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment_sid{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment_sid{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest', party_id=guest).\
        set_roles(guest=guest, host=host, arbiter=arbiter)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     with_match_id=True)
    data_transform_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    data_transform_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")
    feature_scale_0 = FeatureScale(name='feature_scale_0',
                                   method="standard_scale",
                                   need_run=True)

    binning_param = {
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "adjustment_factor": 0.5,
        "local_only": False,
        "need_run": True,
        "transform_param": {
            "transform_cols": -1,
            "transform_type": "bin_num"
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(
        name='hetero_feature_binning_0', **binning_param)

    statistic_0 = DataStatistics(name='statistic_0', statistics=["95%"])
    pearson_0 = HeteroPearson(name='pearson_0', column_indexes=-1)
    onehot_0 = OneHotEncoder(name='onehot_0')
    selection_param = {
        "name":
        "hetero_feature_selection_0",
        "select_col_indexes":
        -1,
        "select_names": [],
        "filter_methods": [
            "manually", "unique_value", "iv_filter",
            "coefficient_of_variation_value_thres", "outlier_cols"
        ],
        "manually_param": {
            "filter_out_indexes": [0, 1, 2],
            "filter_out_names": ["x3"]
        },
        "unique_param": {
            "eps": 1e-06
        },
        "iv_param": {
            "metrics": ["iv", "iv", "iv"],
            "filter_type": ["threshold", "top_k", "top_percentile"],
            "threshold": [0.001, 100, 0.99]
        },
        "variance_coe_param": {
            "value_threshold": 0.3
        },
        "outlier_param": {
            "percentile": 0.95,
            "upper_threshold": 2.0
        }
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)

    lr_param = {
        "name": "hetero_lr_0",
        "penalty": "L2",
        "optimizer": "rmsprop",
        "tol": 0.0001,
        "alpha": 0.01,
        "max_iter": 30,
        "early_stop": "diff",
        "batch_size": 320,
        "learning_rate": 0.15,
        "init_param": {
            "init_method": "zeros"
        },
        "sqn_param": {
            "update_interval_L": 3,
            "memory_M": 5,
            "sample_size": 5000,
            "random_seed": None
        },
        "cv_param": {
            "n_splits": 5,
            "shuffle": False,
            "random_seed": 103,
            "need_cv": False
        }
    }

    hetero_lr_0 = HeteroLR(**lr_param)
    evaluation_0 = Evaluation(name='evaluation_0')

    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))
    pipeline.add_component(feature_scale_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    pipeline.add_component(statistic_0,
                           data=Data(data=feature_scale_0.output.data))
    pipeline.add_component(pearson_0,
                           data=Data(data=feature_scale_0.output.data))

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data),
        model=Model(isometric_model=[
            hetero_feature_binning_0.output.model, statistic_0.output.model
        ]))
    pipeline.add_component(
        onehot_0, data=Data(data=hetero_feature_selection_0.output.data))

    pipeline.add_component(hetero_lr_0,
                           data=Data(train_data=onehot_0.output.data))
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_lr_0.output.data))

    pipeline.compile()

    pipeline.fit()
Пример #5
0
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }
    # guest_train_data = {"name": "default_credit_hetero_guest", "namespace": f"experiment{namespace}"}
    # host_train_data = {"name": "default_credit_hetero_host", "namespace": f"experiment{namespace}"}

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    data_transform_0 = DataTransform(name="data_transform_0",
                                     output_format='dense')

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(with_label=True)
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    pipeline.add_component(reader_0)

    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))

    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))

    statistic_param = {
        "name": "statistic_0",
        "statistics": ["95%", "coefficient_of_variance", "stddev"],
        "column_indexes": -1,
        "column_names": []
    }
    statistic_0 = DataStatistics(**statistic_param)
    pipeline.add_component(statistic_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.compile()

    # fit model
    pipeline.fit()
    # query component summary
    prettify(pipeline.get_component("statistic_0").get_summary())
Пример #6
0
def make_normal_dsl(config,
                    namespace,
                    selection_param,
                    is_multi_host=False,
                    host_dense_output=True,
                    **kwargs):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataTransform components
    data_transform_0 = DataTransform(
        name="data_transform_0")  # start component numbering at 0

    # get DataTransform party instance of guest
    data_transform_0_guest_party_instance = data_transform_0.get_party_instance(
        role='guest', party_id=guest)
    # configure DataTransform for guest
    data_transform_0_guest_party_instance.component_param(
        with_label=True, output_format="dense")
    # get and configure DataTransform party instance of host
    data_transform_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(data_transform_0,
                           data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=data_transform_0.output.data))

    last_cpn = intersection_0
    selection_include_model = []
    if 'binning_param' in kwargs:
        hetero_feature_binning_0 = HeteroFeatureBinning(
            **kwargs['binning_param'])
        pipeline.add_component(hetero_feature_binning_0,
                               data=Data(data=last_cpn.output.data))
        selection_include_model.append(hetero_feature_binning_0)
        # last_cpn = hetero_feature_binning_0

    if 'statistic_param' in kwargs:
        # print(f"param: {kwargs['statistic_param']}, kwargs: {kwargs}")
        statistic_0 = DataStatistics(**kwargs['statistic_param'])
        pipeline.add_component(statistic_0,
                               data=Data(data=last_cpn.output.data))
        # last_cpn = statistic_0
        selection_include_model.append(statistic_0)

    if 'psi_param' in kwargs:
        reader_1 = Reader(name="reader_1")
        reader_1.get_party_instance(
            role='guest',
            party_id=guest).component_param(table=guest_eval_data)
        reader_1.get_party_instance(
            role='host', party_id=hosts).component_param(table=host_eval_data)
        data_transform_1 = DataTransform(name="data_transform_1")
        intersection_1 = Intersection(name="intersection_1")
        pipeline.add_component(reader_1)
        pipeline.add_component(data_transform_1,
                               data=Data(data=reader_1.output.data),
                               model=Model(data_transform_0.output.model))
        pipeline.add_component(intersection_1,
                               data=Data(data=data_transform_1.output.data))

        psi_0 = PSI(**kwargs['psi_param'])
        pipeline.add_component(psi_0,
                               data=Data(
                                   train_data=intersection_0.output.data,
                                   validate_data=intersection_1.output.data))
        # last_cpn = statistic_0
        selection_include_model.append(psi_0)

    if 'sbt_param' in kwargs:
        secureboost_0 = HeteroSecureBoost(**kwargs['sbt_param'])

        pipeline.add_component(
            secureboost_0, data=Data(train_data=intersection_0.output.data))
        selection_include_model.append(secureboost_0)

    if "fast_sbt_param" in kwargs:
        fast_sbt_0 = HeteroFastSecureBoost(**kwargs['fast_sbt_param'])
        pipeline.add_component(
            fast_sbt_0, data=Data(train_data=intersection_0.output.data))
        selection_include_model.append(fast_sbt_0)

    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(
            isometric_model=[x.output.model for x in selection_include_model]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()
    return pipeline