示例#1
0
    def _init_argument(self):
        parser = argparse.ArgumentParser()
        parser.add_argument('-c',
                            '--config',
                            required=True,
                            type=str,
                            help="Specify a config json file path")
        parser.add_argument('-j',
                            '--job_id',
                            type=str,
                            required=True,
                            help="Specify the job id")
        # parser.add_argument('-p', '--party_id', type=str, required=True, help="Specify the party id")
        # parser.add_argument('-l', '--LOGGER_path', type=str, required=True, help="Specify the LOGGER path")
        args = parser.parse_args()
        config_path = args.config
        self.config_path = config_path
        if not args.config:
            LOGGER.error("Config File should be provided")
            exit(-100)
        self.job_id = args.job_id

        home_dir = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
        param_validation_path = home_dir + "/conf/param_validation.json"
        all_checker = AllChecker(config_path, param_validation_path)
        all_checker.check_all()
        LOGGER.debug("Finish all parameter checkers")
        self._initialize(config_path)
        with open(config_path) as conf_f:
            runtime_json = json.load(conf_f)
        session.init(self.job_id, self.workflow_param.work_mode)
        LOGGER.debug("The job id is {}".format(self.job_id))
        federation.init(self.job_id, runtime_json)
        LOGGER.debug("Finish eggroll and federation init")
        self._init_pipeline()
 def setUp(self):
     session.init("test_cross_entropy")
     self.sigmoid_loss = SigmoidBinaryCrossEntropyLoss()
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False, partition=16)
     self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
示例#3
0
 def run(self, component_parameters=None, args=None):
     self.parameters = component_parameters["DownloadParam"]
     self.parameters["role"] = component_parameters["role"]
     self.parameters["local"] = component_parameters["local"]
     table_name, namespace = dtable_utils.get_table_info(config=self.parameters,
                                                         create=False)
     job_id = self.taskid.split("_")[0]
     session.init(job_id, self.parameters["work_mode"])
     with open(os.path.abspath(self.parameters["output_path"]), "w") as fout:
         data_table = session.get_data_table(name=table_name, namespace=namespace)
         count = data_table.count()
         LOGGER.info('===== begin to export data =====')
         lines = 0
         for key, value in data_table.collect():
             if not value:
                 fout.write(key + "\n")
             else:
                 fout.write(key + self.parameters.get("delimitor", ",") + value + "\n")
             lines += 1
             if lines % 2000 == 0:
                 LOGGER.info("===== export {} lines =====".format(lines))
             if lines % 10000 == 0:
                 job_info = {'f_progress': lines/count*100//1}
                 self.update_job_status(self.parameters["local"]['role'], self.parameters["local"]['party_id'],
                                        job_info)
         self.update_job_status(self.parameters["local"]['role'],
                                self.parameters["local"]['party_id'], {'f_progress': 100})
         self.callback_metric(metric_name='data_access',
                              metric_namespace='download',
                              metric_data=[Metric("count", data_table.count())])
         LOGGER.info("===== export {} lines totally =====".format(lines))
         LOGGER.info('===== export data finish =====')
         LOGGER.info('===== export data file path:{} ====='.format(os.path.abspath(self.parameters["output_path"])))
示例#4
0
    def setUp(self):
        session.init("test_instance")

        dense_inst = []
        headers = ['x' + str(i) for i in range(20)]
        for i in range(100):
            inst = Instance(features=(i % 16 * np.ones(20)))
            dense_inst.append((i, inst))
        self.dense_table = session.parallelize(dense_inst, include_key=True, partition=2)
        self.dense_table.schema = {'header': headers}

        self.sparse_inst = []
        for i in range(100):
            dict = {}
            indices = []
            data = []
            for j in range(20):
                idx = random.randint(0, 29)
                if idx in dict:
                    continue
                dict[idx] = 1
                val = random.random()
                indices.append(idx)
                data.append(val)

            sparse_vec = SparseVector(indices, data, 30)
            self.sparse_inst.append((i, Instance(features=sparse_vec)))

        self.sparse_table = session.parallelize(self.sparse_inst, include_key=True)
        self.sparse_table.schema = {"header": ["fid" + str(i) for i in range(30)]}
示例#5
0
 def setUp(self):
     session.init("test_random_sampler")
     self.data = [(i * 10 + 5, i * i) for i in range(100)]
     self.table = session.parallelize(self.data, include_key=True)
     self.data_to_trans = [(i * 10 + 5, i * i * i) for i in range(100)]
     self.table_trans = session.parallelize(self.data_to_trans,
                                            include_key=True)
示例#6
0
    def run(self, component_parameters=None, args=None):
        self.parameters = component_parameters["UploadParam"]
        self.parameters["role"] = component_parameters["role"]
        self.parameters["local"] = component_parameters["local"]
        job_id = self.taskid.split("_")[0]
        if not os.path.isabs(self.parameters.get("file", "")):
            self.parameters["file"] = os.path.join(file_utils.get_project_base_directory(), self.parameters["file"])
        if not os.path.exists(self.parameters["file"]):
            raise Exception("%s is not exist, please check the configure" % (self.parameters["file"]))
        table_name, namespace = dtable_utils.get_table_info(config=self.parameters,
                                                            create=True)
        _namespace, _table_name = self.generate_table_name(self.parameters["file"])
        if namespace is None:
            namespace = _namespace
        if table_name is None:
            table_name = _table_name
        read_head = self.parameters['head']
        if read_head == 0:
            head = False
        elif read_head == 1:
            head = True
        else:
            raise Exception("'head' in conf.json should be 0 or 1")
        partition = self.parameters["partition"]
        if partition <= 0 or partition >= self.MAX_PARTITION_NUM:
            raise Exception("Error number of partition, it should between %d and %d" % (0, self.MAX_PARTITION_NUM))

        session.init(mode=self.parameters['work_mode'])
        data_table_count = self.save_data_table(table_name, namespace, head, self.parameters.get('in_version', False))
        LOGGER.info("------------load data finish!-----------------")
        LOGGER.info("file: {}".format(self.parameters["file"]))
        LOGGER.info("total data_count: {}".format(data_table_count))
        LOGGER.info("table name: {}, table namespace: {}".format(table_name, namespace))
示例#7
0
 def run():
     parser = argparse.ArgumentParser()
     parser.add_argument('-j',
                         '--job_id',
                         required=True,
                         type=str,
                         help="job id")
     parser.add_argument('-w',
                         '--work_mode',
                         required=True,
                         type=str,
                         help="work mode")
     parser.add_argument('-b',
                         '--backend',
                         required=True,
                         type=str,
                         help="backend")
     args = parser.parse_args()
     job_id = args.job_id
     work_mode = int(args.work_mode)
     backend = int(args.backend)
     session.init(job_id=job_id,
                  mode=work_mode,
                  backend=backend,
                  set_log_dir=False)
     try:
         schedule_logger(job_id.split('_')[0]).info(
             'start stop session {}'.format(session.get_session_id()))
         session.stop()
         schedule_logger(job_id.split('_')[0]).info(
             'stop session {} success'.format(session.get_session_id()))
     except Exception as e:
         pass
示例#8
0
    def init_table_manager_and_federation(cls,
                                          job_id,
                                          role,
                                          num_hosts,
                                          host_ind=0):
        from arch.api import session
        from arch.api import federation

        role_id = {
            "host": [10000 + i for i in range(num_hosts)],
            "guest": [9999],
            "arbiter": [9999]
        }
        session.init(job_id)
        federation.init(
            job_id, {
                "local": {
                    "role":
                    role,
                    "party_id":
                    role_id[role][0]
                    if role != "host" else role_id[role][host_ind]
                },
                "role": role_id
            })
示例#9
0
 def run(self, component_parameters=None, args=None):
     self.parameters = component_parameters["DownloadParam"]
     self.parameters["role"] = component_parameters["role"]
     self.parameters["local"] = component_parameters["local"]
     table_name, namespace = dtable_utils.get_table_info(
         config=self.parameters, create=False)
     job_id = "_".join(self.taskid.split("_")[:2])
     session.init(job_id, self.parameters["work_mode"])
     with open(os.path.abspath(self.parameters["output_path"]),
               "w") as fout:
         data_table = session.get_data_table(name=table_name,
                                             namespace=namespace)
         LOGGER.info('===== begin to export data =====')
         lines = 0
         for key, value in data_table.collect():
             if not value:
                 fout.write(key + "\n")
             else:
                 fout.write(key + self.parameters.get("delimitor", ",") +
                            str(value) + "\n")
             lines += 1
             if lines % 2000 == 0:
                 LOGGER.info("===== export {} lines =====".format(lines))
         LOGGER.info("===== export {} lines totally =====".format(lines))
         LOGGER.info('===== export data finish =====')
         LOGGER.info('===== export data file path:{} ====='.format(
             os.path.abspath(self.parameters["output_path"])))
示例#10
0
def do_export_file(job_id, _data):
    try:
        work_mode = _data.get("work_mode")
        name = _data.get("table_name")
        namespace = _data.get("namespace")
        delimitor = _data.get("delimitor", ",")
        output_path = _data.get("output_path")

        # todo: use eggroll as default storage backend
        session.init(job_id=job_id, mode=work_mode, backend=Backend.EGGROLL)

        with open(os.path.abspath(output_path), "w") as fout:
            data_table = session.get_data_table(name=name, namespace=namespace)

            print('===== begin to export data =====')
            lines = 0

            for key, value in data_table.collect():
                if not value:
                    fout.write(key + "\n")
                else:
                    fout.write(key + delimitor + str(value) + "\n")

                lines += 1
                if lines % 2000 == 0:
                    print("===== export {} lines =====".format(lines))

            print("===== export {} lines totally =====".format(lines))
            print('===== export data finish =====')
    except:
        raise ValueError("cannot export data, please check json file")
示例#11
0
def test_plain_lr():    
    from sklearn.datasets import make_moons
    import functools
    # 修改flow_id 否则内存表可能被覆盖
    session.init(mode=0)
    ns = str(uuid.uuid1())

    X = session.table('testX7', ns, partition=2)
    Y = session.table('testY7', ns, partition=2)

    b = np.array([0])
    eta = 1.2
    max_iter = 10

    total_num = 500

    _x, _y = make_moons(total_num, noise=0.25,random_state=12345)
    for i in range(np.shape(_y)[0]):
        X.put(i, _x[i])
        Y.put(i, _y[i])

    print(len([y for y in Y.collect()]))

    current_milli_time = lambda: int(round(time.time() * 1000))

    start = current_milli_time()
    #shape_w = [1, np.shape(_x)[1]]
    shape_w = [np.shape(_x)[1]]
    w = np.ones(shape_w)

    print(w)
    X = TensorInEgg(None,None,X)
    Y = TensorInEgg(None,None,Y)
    w = TensorInPy(None,None,w)
    b = TensorInPy(None, None, b)

    # lr = LR(shape_w)
    # lr.train(X, Y)
    itr = 0
    while itr < max_iter:
        H = 1 / X
        H = 1.0 / (1 + ((X @ w + b) * -1).map(np.exp))
        R = H - Y

        gradient_w = (R * X).sum() / total_num
        gradient_b = R.sum() / total_num
        w = w - eta * gradient_w
        b = b - eta * gradient_b
        print("aaa",w,b)
        # self.plot(itr)
        itr += 1

    print("train total time: {}".format(current_milli_time() - start))
    _x_test, _y_test = make_moons(50,random_state=12345)
    _x_test = TensorInPy(None,None, _x_test)
    y_pred = 1.0 / (1 + ((_x_test @ w + b) * -1).map(np.exp))
    from sklearn import metrics

    auc = metrics.roc_auc_score(_y_test, y_pred.store.reshape(50))
    print("auc: {}".format(auc))
示例#12
0
 def setUp(self):
     session.init("test_fair_loss")
     self.log_cosh_loss = LogCoshLoss()
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False)
     self.predict = session.parallelize(self.predict_list, include_key=False)
示例#13
0
 def setUp(self):
     session.init("test_least_abs_error_loss")
     self.lae_loss = LeastAbsoluteErrorLoss()
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False)
     self.predict = session.parallelize(self.predict_list, include_key=False)
    def setUp(self):
        self.feature_histogram = FeatureHistogram()
        session.init("test_feature_histogram")
        data_insts = []
        for i in range(1000):
            indices = []
            data = []
            for j in range(10):
                x = random.randint(0, 5)
                if x != 0:
                    data.append(x)
                    indices.append(j)
            sparse_vec = SparseVector(indices, data, shape=10)
            data_insts.append((Instance(features=sparse_vec), (1, random.randint(0, 3))))
        self.node_map = {0: 0, 1: 1, 2: 2, 3: 3}
        self.data_insts = data_insts
        self.data_bin = session.parallelize(data_insts, include_key=False, partition=16)

        self.grad_and_hess_list = [(random.random(), random.random()) for i in range(1000)]
        self.grad_and_hess = session.parallelize(self.grad_and_hess_list, include_key=False, partition=16)

        bin_split_points = []
        for i in range(10):
            bin_split_points.append(np.array([i for i in range(5)]))
        self.bin_split_points = np.array(bin_split_points)
        self.bin_sparse = [0 for i in range(10)]
示例#15
0
    def setUp(self):
        session.init("test_encrypt_mode_calculator")

        self.list_data = []
        self.tuple_data = []
        self.numpy_data = []

        for i in range(30):
            list_value = [100 * i + j for j in range(20)]
            tuple_value = tuple(list_value)
            numpy_value = np.array(list_value, dtype="int")

            self.list_data.append(list_value)
            self.tuple_data.append(tuple_value)
            self.numpy_data.append(numpy_value)

        self.data_list = session.parallelize(self.list_data,
                                             include_key=False,
                                             partition=10)
        self.data_tuple = session.parallelize(self.tuple_data,
                                              include_key=False,
                                              partition=10)
        self.data_numpy = session.parallelize(self.numpy_data,
                                              include_key=False,
                                              partition=10)
 def setUp(self):
     session.init("test_cross_entropy")
     self.softmax_loss = SoftmaxCrossEntropyLoss()
     self.y_list = [i % 5 for i in range(100)]
     self.predict_list = [np.array([random.random() for i in range(5)]) for j in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False, partition=16)
     self.predict = session.parallelize(self.predict_list, include_key=False, partition=16)
示例#17
0
 def setUp(self):
     session.init("test_huber_loss")
     self.delta = 1
     self.huber_loss = HuberLoss(self.delta)
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False)
     self.predict = session.parallelize(self.predict_list, include_key=False)
示例#18
0
 def setUp(self):
     session.init("test_fair_loss")
     self.rho = 0.5
     self.tweedie_loss = TweedieLoss(self.rho)
     self.y_list = [i % 2 for i in range(100)]
     self.predict_list = [random.random() for i in range(100)]
     self.y = session.parallelize(self.y_list, include_key=False)
     self.predict = session.parallelize(self.predict_list, include_key=False)
示例#19
0
    def setUp(self):
        self.jobid = str(uuid.uuid1())
        session.init(self.jobid)

        from federatedml.statistic.intersect.intersect_host import RsaIntersectionHost
        from federatedml.statistic.intersect.intersect_host import RawIntersectionHost
        intersect_param = IntersectParam()
        self.rsa_operator = RsaIntersectionHost(intersect_param)
        self.raw_operator = RawIntersectionHost(intersect_param)
    def setUp(self):
        session.init("test_label_checker")

        self.small_label_set = [Instance(label=i % 5) for i in range(100)]
        self.classify_inst = session.parallelize(self.small_label_set, include_key=False)
        self.regression_label = [Instance(label=random.random()) for i in range(100)]
        self.regression_inst = session.parallelize(self.regression_label)
        self.classify_checker = ClassifyLabelChecker()
        self.regression_checker = RegressionLabelChecker()
示例#21
0
    def _init_argument(self):
        with open(config_path) as conf_f:
            runtime_json = json.load(conf_f)
        self._initialize(runtime_json)

        LOGGER.debug("The Arbiter job id is {}".format(job_id))
        LOGGER.debug("The Arbiter work mode id is {}".format(
            self.workflow_param.work_mode))
        session.init(job_id, self.workflow_param.work_mode)
        federation.init(job_id, runtime_json)
        LOGGER.debug("Finish eggroll and federation init")
示例#22
0
 def clean_tables(self):
     from arch.api import session
     session.init(job_id=self.job_id)
     try:
         session.cleanup("*", self.job_id, True)
     except EnvironmentError:
         pass
     try:
         session.cleanup("*", self.job_id, False)
     except EnvironmentError:
         pass
示例#23
0
    def setUp(self):
        session.init("test_stratified_sampler")
        self.data = []
        self.data_to_trans = []
        for i in range(1000):
            self.data.append((i, Instance(label=i % 4, features=i * i)))
            self.data_to_trans.append((i, Instance(features=i**3)))

        self.table = session.parallelize(self.data, include_key=True)
        self.table_trans = session.parallelize(self.data_to_trans,
                                               include_key=True)
示例#24
0
    def init_session_and_federation(job_id, role, partyid, partyid_map):
        from arch.api import session
        from arch.api import federation

        session.init(job_id)
        federation.init(job_id=job_id,
                        runtime_conf={
                            "local": {
                                "role": role,
                                "party_id": partyid
                            },
                            "role": partyid_map
                        })
示例#25
0
 def setUp(self):
     session.init("123")
     self.data_num = 1000
     self.feature_num = 200
     final_result = []
     for i in range(self.data_num):
         tmp = i * np.ones(self.feature_num)
         inst = Instance(inst_id=i, features=tmp, label=0)
         tmp = (str(i), inst)
         final_result.append(tmp)
     table = session.parallelize(final_result,
                                 include_key=True,
                                 partition=3)
     self.table = table
 def setUp(self):
     self.job_id = str(uuid.uuid1())
     session.init(self.job_id)
     data_num = 100
     feature_num = 8
     self.prepare_data(data_num, feature_num)
     local_baseline_obj = LocalBaseline()
     local_baseline_obj.need_run = True
     local_baseline_obj.header = [
         "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8"
     ]
     local_baseline_obj.model_name = "LogisticRegression"
     local_baseline_obj.model_opts = {}
     self.local_baseline_obj = local_baseline_obj
示例#27
0
def init_session_for_flow_server():
    # Options are used with different backend on demand
    session.init(job_id="session_used_by_fate_flow_server_{}".format(fate_uuid()),
                 mode=RuntimeConfig.WORK_MODE,
                 backend=RuntimeConfig.BACKEND,
                 options={"eggroll.session.processors.per.node": 1})
    # init session detect table
    detect_table = session.table(namespace=DETECT_TABLE[0], name=DETECT_TABLE[1], partition=DETECT_TABLE[2])
    detect_table.destroy()
    detect_table = session.table(namespace=DETECT_TABLE[0], name=DETECT_TABLE[1], partition=DETECT_TABLE[2])
    detect_table.put_all(enumerate(range(DETECT_TABLE[2])))
    stat_logger.info("init detect table {} {} for session {}".format(detect_table.get_namespace(),
                                                                     detect_table.get_name(),
                                                                     session.get_session_id()))
    stat_logger.info("init session {} for fate flow server successfully".format(session.get_session_id()))
示例#28
0
def session_init(job_id, idx):
    from arch.api import session
    from arch.api import federation

    role = "guest" if idx < 1 else "host"
    party_id = 9999 + idx if idx < 1 else 10000 + (idx - 1)
    role_parties = {
        "host": [10000 + i for i in range(NUM_HOSTS)],
        "guest": [9999 + i for i in range(1)]
    }
    session.init(job_id)
    federation.init(
        job_id,
        dict(local=dict(role=role, party_id=party_id), role=role_parties))
    return federation.local_party(), federation.all_parties()
示例#29
0
    def setUp(self):
        self.job_id = str(uuid.uuid1())
        session.init(self.job_id)
        model = HeteroStepwise()
        model.__setattr__('role', consts.GUEST)
        model.__setattr__('fit_intercept', True)

        self.model = model
        data_num = 100
        feature_num = 5
        bool_list = [True, False, True, True, False]
        self.str_mask = "10110"
        self.header = ["x1", "x2", "x3", "x4", "x5"]
        self.mask = self.prepare_mask(bool_list)
        self.table = self.prepare_data(data_num, feature_num, self.header,
                                       "id", "y")
示例#30
0
def init(job_id,
         runtime_conf,
         mode,
         server_conf_path="arch/conf/server_conf.json"):
    session.init(job_id, mode)
    print("runtime_conf:{}".format(runtime_conf))
    all_checker = AllChecker(runtime_conf)
    all_checker.check_all()
    with open(runtime_conf) as conf_p:
        runtime_json = json.load(conf_p)

    if mode is None:
        raise EnvironmentError(
            "eggroll should be initialized before fate_script")
    if mode == WorkMode.STANDALONE:
        RuntimeInstance.FEDERATION = standalone_fate_script.init(
            job_id=job_id, runtime_conf=runtime_json)
    else:
        RuntimeInstance.FEDERATION = cluster_fate_script.init(
            job_id=job_id,
            runtime_conf=runtime_json,
            server_conf_path=server_conf_path)