示例#1
0
def make_normal_dsl(config,
                    namespace,
                    selection_param,
                    is_multi_host=False,
                    host_dense_output=True,
                    **kwargs):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    last_cpn = intersection_0
    selection_include_model = []
    if 'binning_param' in kwargs:
        hetero_feature_binning_0 = HeteroFeatureBinning(
            **kwargs['binning_param'])
        pipeline.add_component(hetero_feature_binning_0,
                               data=Data(data=last_cpn.output.data))
        selection_include_model.append(hetero_feature_binning_0)
        # last_cpn = hetero_feature_binning_0

    if 'statistic_param' in kwargs:
        # print(f"param: {kwargs['statistic_param']}, kwargs: {kwargs}")
        statistic_0 = DataStatistics(**kwargs['statistic_param'])
        pipeline.add_component(statistic_0,
                               data=Data(data=last_cpn.output.data))
        # last_cpn = statistic_0
        selection_include_model.append(statistic_0)

    if 'psi_param' in kwargs:
        reader_1 = Reader(name="reader_1")
        reader_1.get_party_instance(
            role='guest',
            party_id=guest).component_param(table=guest_eval_data)
        reader_1.get_party_instance(
            role='host', party_id=hosts).component_param(table=host_eval_data)
        dataio_1 = DataIO(name="dataio_1")
        intersection_1 = Intersection(name="intersection_1")
        pipeline.add_component(reader_1)
        pipeline.add_component(dataio_1,
                               data=Data(data=reader_1.output.data),
                               model=Model(dataio_0.output.model))
        pipeline.add_component(intersection_1,
                               data=Data(data=dataio_1.output.data))

        psi_0 = PSI(**kwargs['psi_param'])
        pipeline.add_component(psi_0,
                               data=Data(
                                   train_data=intersection_0.output.data,
                                   validate_data=intersection_1.output.data))
        # last_cpn = statistic_0
        selection_include_model.append(psi_0)

    if 'sbt_param' in kwargs:
        secureboost_0 = HeteroSecureBoost(**kwargs['sbt_param'])

        pipeline.add_component(
            secureboost_0, data=Data(train_data=intersection_0.output.data))
        selection_include_model.append(secureboost_0)

    if "fast_sbt_param" in kwargs:
        fast_sbt_0 = HeteroFastSecureBoost(**kwargs['fast_sbt_param'])
        pipeline.add_component(
            fast_sbt_0, data=Data(train_data=intersection_0.output.data))
        selection_include_model.append(fast_sbt_0)

    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)

    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(
            isometric_model=[x.output.model for x in selection_include_model]))
    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()
    return pipeline
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    arbiter = parties.arbiter[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=host, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    param = {
        "name": 'hetero_feature_binning_0',
        "method": 'optimal',
        "optimal_binning_param": {
            "metric_method": "iv"
        },
        "bin_indexes": -1
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)

    param = {
        "name": 'hetero_feature_selection_0',
        "filter_methods": ["manually", "iv_filter"],
        "manually_param": {
            "filter_out_indexes": [1]
        },
        "iv_param": {
            "metrics": ["iv", "iv"],
            "filter_type": ["top_k", "threshold"],
            "take_high": [True, True],
            "threshold": [10, 0.001]
        },
        "select_col_indexes": -1
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(**param)

    param = {"k": 3, "max_iter": 10}

    hetero_kmeans_0 = HeteroKmeans(name='hetero_kmeans_0', **param)
    evaluation_0 = Evaluation(name='evaluation_0', eval_type='clustering')

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=intersection_0.output.data),
        model=Model(isometric_model=hetero_feature_binning_0.output.model))
    pipeline.add_component(
        hetero_kmeans_0,
        data=Data(train_data=hetero_feature_selection_0.output.data))
    print(f"data: {hetero_kmeans_0.output.data.data[0]}")
    pipeline.add_component(evaluation_0,
                           data=Data(data=hetero_kmeans_0.output.data.data[0]))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)
    # query component summary
    print(pipeline.get_component("hetero_kmeans_0").get_summary())
示例#3
0
def make_single_predict_pipeline(config,
                                 namespace,
                                 selection_param,
                                 is_multi_host=False,
                                 **kwargs):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_eval_data)
    dataio_1 = DataIO(name="dataio_1")
    intersection_1 = Intersection(name="intersection_1")

    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))

    sample_0 = FederatedSample(name='sample_0', fractions=0.9)
    pipeline.add_component(sample_0,
                           data=Data(data=intersection_0.output.data))

    if "binning_param" not in kwargs:
        raise ValueError("Binning_param is needed")

    hetero_feature_binning_0 = HeteroFeatureBinning(**kwargs['binning_param'])
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=sample_0.output.data))

    hetero_feature_binning_1 = HeteroFeatureBinning(
        name='hetero_feature_binning_1')
    pipeline.add_component(hetero_feature_binning_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(hetero_feature_binning_0.output.model))

    hetero_feature_selection_0 = HeteroFeatureSelection(**selection_param)
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data),
        model=Model(isometric_model=[hetero_feature_binning_0.output.model]))

    hetero_feature_selection_1 = HeteroFeatureSelection(
        name='hetero_feature_selection_1')
    pipeline.add_component(
        hetero_feature_selection_1,
        data=Data(data=hetero_feature_binning_1.output.data),
        model=Model(hetero_feature_selection_0.output.model))

    scale_0 = FeatureScale(name='scale_0')
    scale_1 = FeatureScale(name='scale_1')

    pipeline.add_component(
        scale_0, data=Data(data=hetero_feature_selection_0.output.data))
    pipeline.add_component(
        scale_1,
        data=Data(data=hetero_feature_selection_1.output.data),
        model=Model(scale_0.output.model))
    pipeline.compile()
    return pipeline
示例#4
0
def make_feature_engineering_dsl(config,
                                 namespace,
                                 lr_param,
                                 is_multi_host=False,
                                 has_validate=False,
                                 is_cv=False,
                                 is_ovr=False):
    parties = config.parties
    guest = parties.guest[0]
    if is_multi_host:
        hosts = parties.host
    else:
        hosts = parties.host[0]
    arbiter = parties.arbiter[0]

    if is_ovr:
        guest_train_data = {
            "name": "vehicle_scale_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_train_data = {
            "name": "vehicle_scale_hetero_host",
            "namespace": f"experiment{namespace}"
        }

        guest_eval_data = {
            "name": "vehicle_scale_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_eval_data = {
            "name": "vehicle_scale_hetero_host",
            "namespace": f"experiment{namespace}"
        }
    else:
        guest_train_data = {
            "name": "breast_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_train_data = {
            "name": "breast_hetero_host",
            "namespace": f"experiment{namespace}"
        }

        guest_eval_data = {
            "name": "breast_hetero_guest",
            "namespace": f"experiment{namespace}"
        }
        host_eval_data = {
            "name": "breast_hetero_host",
            "namespace": f"experiment{namespace}"
        }

    train_line = []
    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    pipeline.set_roles(guest=guest, host=hosts, arbiter=arbiter)

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts).component_param(with_label=False)

    train_line.append(dataio_0)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))

    train_line.append(intersection_0)

    feature_scale_0 = FeatureScale(name='feature_scale_0',
                                   method="standard_scale",
                                   need_run=True)
    pipeline.add_component(feature_scale_0,
                           data=Data(data=intersection_0.output.data))
    train_line.append(feature_scale_0)

    binning_param = {
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "adjustment_factor": 0.5,
        "local_only": False,
        "need_run": True,
        "transform_param": {
            "transform_cols": -1,
            "transform_type": "bin_num"
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(
        name='hetero_feature_binning_0', **binning_param)
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=feature_scale_0.output.data))
    train_line.append(hetero_feature_binning_0)

    selection_param = {
        "select_col_indexes": -1,
        "filter_methods": ["manually", "iv_value_thres", "iv_percentile"],
        "manually_param": {
            "filter_out_indexes": None
        },
        "iv_value_param": {
            "value_threshold": 1.0
        },
        "iv_percentile_param": {
            "percentile_threshold": 0.9
        },
        "need_run": True
    }
    hetero_feature_selection_0 = HeteroFeatureSelection(
        name='hetero_feature_selection_0', **selection_param)
    pipeline.add_component(
        hetero_feature_selection_0,
        data=Data(data=hetero_feature_binning_0.output.data),
        model=Model(isometric_model=[hetero_feature_binning_0.output.model]))
    train_line.append(hetero_feature_selection_0)

    onehot_param = {
        "transform_col_indexes": -1,
        "transform_col_names": None,
        "need_run": True
    }
    one_hot_encoder_0 = OneHotEncoder(name='one_hot_encoder_0', **onehot_param)
    pipeline.add_component(
        one_hot_encoder_0,
        data=Data(data=hetero_feature_selection_0.output.data))
    train_line.append(one_hot_encoder_0)

    last_cpn = None
    if has_validate:
        reader_1 = Reader(name="reader_1")
        reader_1.get_party_instance(
            role='guest',
            party_id=guest).component_param(table=guest_eval_data)
        reader_1.get_party_instance(
            role='host', party_id=hosts).component_param(table=host_eval_data)
        pipeline.add_component(reader_1)
        last_cpn = reader_1
        for cpn in train_line:
            cpn_name = cpn.name
            new_name = "_".join(cpn_name.split('_')[:-1] + ['1'])
            validate_cpn = type(cpn)(name=new_name)
            if hasattr(cpn.output, "model"):
                pipeline.add_component(validate_cpn,
                                       data=Data(data=last_cpn.output.data),
                                       model=Model(cpn.output.model))
            else:
                pipeline.add_component(validate_cpn,
                                       data=Data(data=last_cpn.output.data))
            last_cpn = validate_cpn

    hetero_lr_0 = HeteroLR(**lr_param)
    if has_validate:
        pipeline.add_component(hetero_lr_0,
                               data=Data(
                                   train_data=one_hot_encoder_0.output.data,
                                   validate_data=last_cpn.output.data))
    else:
        pipeline.add_component(
            hetero_lr_0, data=Data(train_data=one_hot_encoder_0.output.data))

    if is_cv:
        pipeline.compile()
        return pipeline

    evaluation_data = [hetero_lr_0.output.data]
    if has_validate:
        hetero_lr_1 = HeteroLR(name='hetero_lr_1')
        pipeline.add_component(hetero_lr_1,
                               data=Data(test_data=last_cpn.output.data),
                               model=Model(hetero_lr_0.output.model))
        evaluation_data.append(hetero_lr_1.output.data)

    evaluation_0 = Evaluation(name="evaluation_0", eval_type="binary")
    pipeline.add_component(evaluation_0, data=Data(data=evaluation_data))

    pipeline.compile()
    return pipeline
示例#5
0
def make_add_one_hot_dsl(config, namespace, bin_param, is_multi_host=False):
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    guest_eval_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_eval_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    if is_multi_host:
        pipeline.set_roles(guest=guest, host=hosts)
    else:
        pipeline.set_roles(guest=guest, host=hosts[0])

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts[0]).component_param(table=host_train_data)
    if is_multi_host:
        reader_0.get_party_instance(
            role='host',
            party_id=hosts[1]).component_param(table=host_train_data)

    reader_1 = Reader(name="reader_1")
    reader_1.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_eval_data)
    reader_1.get_party_instance(
        role='host', party_id=hosts[0]).component_param(table=host_eval_data)
    if is_multi_host:
        reader_1.get_party_instance(
            role='host',
            party_id=hosts[1]).component_param(table=host_eval_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0
    dataio_1 = DataIO(name="dataio_1")

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    dataio_0.get_party_instance(
        role='host', party_id=hosts[0]).component_param(with_label=False)
    if is_multi_host:
        dataio_0.get_party_instance(
            role='host', party_id=hosts[1]).component_param(with_label=False)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")
    intersection_1 = Intersection(name="intersection_1")

    hetero_feature_binning_0 = HeteroFeatureBinning(**bin_param)
    hetero_feature_binning_1 = HeteroFeatureBinning(
        name='hetero_feature_binning_1')

    one_hot_encoder_0 = OneHotEncoder(name='one_hot_encoder_0',
                                      transform_col_indexes=-1,
                                      transform_col_names=None,
                                      need_run=True)
    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(reader_1)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set dataio_1 to replicate model from dataio_0
    pipeline.add_component(dataio_1,
                           data=Data(data=reader_1.output.data),
                           model=Model(dataio_0.output.model))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(intersection_1,
                           data=Data(data=dataio_1.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))
    pipeline.add_component(hetero_feature_binning_1,
                           data=Data(data=intersection_1.output.data),
                           model=Model(hetero_feature_binning_0.output.model))

    pipeline.add_component(
        one_hot_encoder_0,
        data=Data(data=hetero_feature_binning_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # pipeline.fit(backend=backend, work_mode=work_mode)
    return pipeline
示例#6
0
def make_asymmetric_dsl(config,
                        namespace,
                        guest_param,
                        host_param,
                        dataset='breast',
                        is_multi_host=False,
                        host_dense_output=True):
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host

    if dataset == 'breast':
        guest_table_name = 'breast_hetero_guest'
        host_table_name = 'breast_hetero_host'
    elif dataset == 'default_credit':
        guest_table_name = 'default_credit_hetero_guest'
        host_table_name = 'default_credit_hetero_host'
    else:
        raise ValueError(f"dataset: {dataset} cannot be recognized")

    guest_train_data = {
        "name": guest_table_name,
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": host_table_name,
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    if is_multi_host:
        pipeline.set_roles(guest=guest, host=hosts)
    else:
        pipeline.set_roles(guest=guest, host=hosts[0])

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts[0]).component_param(table=host_train_data)
    if is_multi_host:
        reader_0.get_party_instance(
            role='host',
            party_id=hosts[1]).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    if host_dense_output:
        output_format = 'dense'
    else:
        output_format = 'sparse'
    if is_multi_host:
        dataio_0.get_party_instance(role='host', party_id=hosts). \
            component_param(with_label=False,
                            output_format=output_format)
    else:
        dataio_0.get_party_instance(role='host', party_id=hosts[0]). \
            component_param(with_label=False,
                            output_format=output_format)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0")
    hetero_feature_binning_0.get_party_instance(
        role='guest', party_id=guest).component_param(**guest_param)
    if is_multi_host:
        hetero_feature_binning_0.get_party_instance(
            role='host', party_id=hosts).component_param(**host_param)
    else:
        hetero_feature_binning_0.get_party_instance(
            role='host', party_id=hosts[0]).component_param(**host_param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    # pipeline.fit(backend=backend, work_mode=work_mode)
    return pipeline
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    param = {
        "name": "hetero_feature_binning_0",
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": [0, 1, 2],
            "transform_names": None,
            "transform_type": "woe"
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    hetero_feature_binning_0.get_party_instance(
        role="host", party_id=host).component_param(
            transform_param={"transform_type": None})

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)