Exemplo n.º 1
0
def main():
    # parties config
    guest = 9999

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}

    tag_data = {"name": "tag_value_1", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # original csv file path
    pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"),
                                    table_name=dense_data["name"],             # table name
                                    namespace=dense_data["namespace"],         # namespace
                                    head=1, partition=partition)               # data info

    pipeline_upload.add_upload_data(file=os.path.join(DATA_BASE, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0, partition=partition)

    # upload all data
    pipeline_upload.upload(drop=1)
Exemplo n.º 2
0
def main():
    # parties config
    guest = 9999

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": "experiment"}

    tag_data = {"name": "tag_value_1", "namespace": "experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # csv file name from python path & file name
    pipeline_upload.add_upload_data(
        file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition,  # data info
        id_delimiter=",")  # id delimiter, needed for spark

    pipeline_upload.add_upload_data(file=os.path.join(
        DATA_BASE, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0,
                                    partition=partition,
                                    id_delimiter=",")
    # upload all data
    pipeline_upload.upload(drop=1)
    import json
    print(json.dumps(pipeline_upload._upload_conf(), indent=4))
Exemplo n.º 3
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    data_base = config.data_base_dir

    # partition for data storage
    partition = 4

    # table name and namespace, used in FATE job configuration
    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}
    tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"}

    pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)

    # add upload data info
    # path to csv file(s) to be uploaded
    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
                                    table_name=dense_data["name"],             # table name
                                    namespace=dense_data["namespace"],         # namespace
                                    head=1, partition=partition,               # data info
                                    id_delimiter=",",
                                    extend_sid=True)

    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0, partition=partition,
                                    id_delimiter=",",
                                    extend_sid=True)
    # upload both data
    pipeline_upload.upload(drop=1)
Exemplo n.º 4
0
def main(data_base=DATA_BASE):
    # parties config
    guest = 9999

    # partition for data storage
    partition = 4

    # table name and namespace, used in FATE job configuration
    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}
    tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # path to csv file(s) to be uploaded, modify to upload designated data
    # This is an example for standalone version. For cluster version, you will need to upload your data
    # on each party respectively.
    pipeline_upload.add_upload_data(
        file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition)  # data info

    pipeline_upload.add_upload_data(file=os.path.join(
        data_base, "examples/data/breast_hetero_host.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=1,
                                    partition=partition)

    # upload data
    pipeline_upload.upload(drop=1)
Exemplo n.º 5
0
def main(config="../../config.yaml", namespace=""):
    # obtain config
    if isinstance(config, str):
        config = load_job_config(config)
    parties = config.parties
    guest = parties.guest[0]
    backend = config.backend
    work_mode = config.work_mode
    data_base = config.data_base

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment{namespace}"}

    tag_data = {"name": "tag_value_1", "namespace": f"experiment{namespace}"}

    pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # csv file name from python path & file name
    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
                                    table_name=dense_data["name"],             # table name
                                    namespace=dense_data["namespace"],         # namespace
                                    head=1, partition=partition,               # data info
                                    id_delimiter=",")                          # needed for spark backend

    pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0, partition=partition,
                                    id_delimiter=",")
    # upload all data
    pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
Exemplo n.º 6
0
    def test_upload(self):
        upload_pipeline = PipeLine()
        upload_pipeline.add_upload_data(file=self.file,
                                        table_name=self.table_name, namespace=self.job_id)
        upload_pipeline.upload()

        upload_count = session.get_data_table(self.table_name, self.job_id).count()
        return upload_count == self.data_count
Exemplo n.º 7
0
def main():
    # parties config
    guest = 9999
    # 0 for eggroll, 1 for spark
    backend = Backend.EGGROLL
    # 0 for standalone, 1 for cluster
    work_mode = WorkMode.STANDALONE
    # use the work mode below for cluster deployment
    # work_mode = WorkMode.CLUSTER

    # partition for data storage
    partition = 4

    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}

    tag_data = {"name": "tag_value_1", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # original csv file path
    pipeline_upload.add_upload_data(
        file=os.path.join(DATA_BASE, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition)  # data info

    pipeline_upload.add_upload_data(file=os.path.join(
        DATA_BASE, "examples/data/tag_value_1000_140.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=0,
                                    partition=partition)

    # upload all data
    pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)
Exemplo n.º 8
0
def main(data_base=DATA_BASE):
    # parties config
    guest = 9999
    # 0 for eggroll, 1 for spark
    backend = Backend.EGGROLL
    # 0 for standalone, 1 for cluster
    work_mode = WorkMode.STANDALONE
    # use the work mode below for cluster deployment
    # work_mode = WorkMode.CLUSTER

    # partition for data storage
    partition = 4

    # table name and namespace, used in FATE job configuration
    dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}
    tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"}

    pipeline_upload = PipeLine().set_initiator(
        role="guest", party_id=guest).set_roles(guest=guest)
    # add upload data info
    # path to csv file(s) to be uploaded, modify to upload designated data
    pipeline_upload.add_upload_data(
        file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
        table_name=dense_data["name"],  # table name
        namespace=dense_data["namespace"],  # namespace
        head=1,
        partition=partition)  # data info

    pipeline_upload.add_upload_data(file=os.path.join(
        data_base, "examples/data/breast_hetero_host.csv"),
                                    table_name=tag_data["name"],
                                    namespace=tag_data["namespace"],
                                    head=1,
                                    partition=partition)

    # upload data
    pipeline_upload.upload(work_mode=work_mode, backend=backend, drop=1)