Пример #1
0
def make_asymmetric_dsl(config,
                        namespace,
                        guest_param,
                        host_param,
                        dataset='breast',
                        is_multi_host=False,
                        host_dense_output=True):
    parties = config.parties
    guest = parties.guest[0]
    hosts = parties.host

    if dataset == 'breast':
        guest_table_name = 'breast_hetero_guest'
        host_table_name = 'breast_hetero_host'
    elif dataset == 'default_credit':
        guest_table_name = 'default_credit_hetero_guest'
        host_table_name = 'default_credit_hetero_host'
    else:
        raise ValueError(f"dataset: {dataset} cannot be recognized")

    guest_train_data = {
        "name": guest_table_name,
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": host_table_name,
        "namespace": f"experiment{namespace}"
    }

    # initialize pipeline
    pipeline = PipeLine()
    # set job initiator
    pipeline.set_initiator(role='guest', party_id=guest)
    # set participants information
    if is_multi_host:
        pipeline.set_roles(guest=guest, host=hosts)
    else:
        pipeline.set_roles(guest=guest, host=hosts[0])

    # define Reader components to read in data
    reader_0 = Reader(name="reader_0")
    # configure Reader for guest
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    # configure Reader for host
    reader_0.get_party_instance(
        role='host', party_id=hosts[0]).component_param(table=host_train_data)
    if is_multi_host:
        reader_0.get_party_instance(
            role='host',
            party_id=hosts[1]).component_param(table=host_train_data)

    # define DataIO components
    dataio_0 = DataIO(name="dataio_0")  # start component numbering at 0

    # get DataIO party instance of guest
    dataio_0_guest_party_instance = dataio_0.get_party_instance(role='guest',
                                                                party_id=guest)
    # configure DataIO for guest
    dataio_0_guest_party_instance.component_param(with_label=True,
                                                  output_format="dense")
    # get and configure DataIO party instance of host
    if host_dense_output:
        output_format = 'dense'
    else:
        output_format = 'sparse'
    if is_multi_host:
        dataio_0.get_party_instance(role='host', party_id=hosts). \
            component_param(with_label=False,
                            output_format=output_format)
    else:
        dataio_0.get_party_instance(role='host', party_id=hosts[0]). \
            component_param(with_label=False,
                            output_format=output_format)

    # define Intersection components
    intersection_0 = Intersection(name="intersection_0")

    hetero_feature_binning_0 = HeteroFeatureBinning(
        name="hetero_feature_binning_0")
    hetero_feature_binning_0.get_party_instance(
        role='guest', party_id=guest).component_param(**guest_param)
    if is_multi_host:
        hetero_feature_binning_0.get_party_instance(
            role='host', party_id=hosts).component_param(**host_param)
    else:
        hetero_feature_binning_0.get_party_instance(
            role='host', party_id=hosts[0]).component_param(**host_param)

    # add components to pipeline, in order of task execution
    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    # set data input sources of intersection components
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    # set train & validate data of hetero_lr_0 component
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    # compile pipeline once finished adding modules, this step will form conf and dsl files for running job
    pipeline.compile()

    # fit model
    # pipeline.fit(backend=backend, work_mode=work_mode)
    return pipeline
def main(config="../../config.yaml", namespace=""):
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    host = parties.host[0]
    backend = config.backend
    work_mode = config.work_mode

    guest_train_data = {
        "name": "breast_hetero_guest",
        "namespace": f"experiment{namespace}"
    }
    host_train_data = {
        "name": "breast_hetero_host",
        "namespace": f"experiment{namespace}"
    }

    pipeline = PipeLine().set_initiator(role='guest',
                                        party_id=guest).set_roles(guest=guest,
                                                                  host=host)

    reader_0 = Reader(name="reader_0")
    reader_0.get_party_instance(
        role='guest', party_id=guest).component_param(table=guest_train_data)
    reader_0.get_party_instance(
        role='host', party_id=host).component_param(table=host_train_data)

    dataio_0 = DataIO(name="dataio_0")
    dataio_0.get_party_instance(
        role='guest', party_id=guest).component_param(with_label=True)
    dataio_0.get_party_instance(
        role='host', party_id=host).component_param(with_label=False)

    intersection_0 = Intersection(name="intersection_0")

    param = {
        "name": "hetero_feature_binning_0",
        "method": "quantile",
        "compress_thres": 10000,
        "head_size": 10000,
        "error": 0.001,
        "bin_num": 10,
        "bin_indexes": -1,
        "bin_names": None,
        "category_indexes": None,
        "category_names": None,
        "adjustment_factor": 0.5,
        "local_only": False,
        "transform_param": {
            "transform_cols": [0, 1, 2],
            "transform_names": None,
            "transform_type": "woe"
        }
    }
    hetero_feature_binning_0 = HeteroFeatureBinning(**param)
    hetero_feature_binning_0.get_party_instance(
        role="host", party_id=host).component_param(
            transform_param={"transform_type": None})

    pipeline.add_component(reader_0)
    pipeline.add_component(dataio_0, data=Data(data=reader_0.output.data))
    pipeline.add_component(intersection_0,
                           data=Data(data=dataio_0.output.data))
    pipeline.add_component(hetero_feature_binning_0,
                           data=Data(data=intersection_0.output.data))

    pipeline.compile()

    job_parameters = JobParameters(backend=backend, work_mode=work_mode)
    pipeline.fit(job_parameters)