def test_batch(self): default_batch_reader = aby3.batch(reader=self.dummy_reader, batch_size=3) default_batch_sample_shapes = [(2, 3, 2, 2), (2, 1, 2, 2)] for item, shape in zip(default_batch_reader(), default_batch_sample_shapes): self.assertEqual(item.shape, shape) batch_reader = aby3.batch(reader=self.dummy_reader, batch_size=3, drop_last=True) for item in batch_reader(): self.assertEqual(item.shape, (2, 3, 2, 2))
def get_mpc_test_dataloader(feature_file, feature_shape, role, batch_size): """ Read feature test data for prediction. """ feature_reader = aby3.load_aby3_shares(feature_file, id=role, shape=feature_shape) batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True) return batch_feature
def get_mpc_dataloader(feature_file, label_file, feature_shape, label_shape, feature_name, label_name, role, batch_size): """ Read feature and label training data from files. """ x = fluid.default_main_program().global_block().var(feature_name) y = fluid.default_main_program().global_block().var(label_name) feature_reader = aby3.load_aby3_shares(feature_file, id=role, shape=feature_shape) label_reader = aby3.load_aby3_shares(label_file, id=role, shape=label_shape) batch_feature = aby3.batch(feature_reader, batch_size, drop_last=True) batch_label = aby3.batch(label_reader, batch_size, drop_last=True) # async data loader loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=batch_size) batch_sample = paddle.reader.compose(batch_feature, batch_label) place = fluid.CPUPlace() loader.set_batch_generator(batch_sample, places=place) return loader
optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.001) optimizer.minimize(avg_loss) mpc_data_dir = "./mpc_data/" if not os.path.exists(mpc_data_dir): raise ValueError( "mpc_data_dir is not found. Please prepare encrypted data.") # train_reader feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_feature", id=role, shape=(784, )) label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_label", id=role, shape=(1, )) batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) # test_reader test_feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_test_feature", id=role, shape=(784, )) test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist2_test_label", id=role, shape=(1, )) test_batch_feature = aby3.batch(test_feature_reader, BATCH_SIZE, drop_last=True) test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True)
def train(args): # Init MPC role = int(args.role) pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port)) # Input and Network BATCH_SIZE = args.batch_size FIELD_NUM = args.num_field FEATURE_NUM = args.sparse_feature_number + 1 feat_idx = pfl_mpc.data(name='feat_idx', shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM], lod_level=1, dtype="int64") feat_value = pfl_mpc.data(name='feat_value', shape=[BATCH_SIZE, FIELD_NUM], lod_level=0, dtype="int64") label = pfl_mpc.data(name='label', shape=[BATCH_SIZE, 1], lod_level=1, dtype="int64") inputs = [feat_idx] + [feat_value] + [label] avg_cost, predict = mpc_network.FM(args, inputs, seed=2) infer_program = fluid.default_main_program().clone(for_test=True) optimizer = pfl_mpc.optimizer.SGD(args.base_lr) optimizer.minimize(avg_cost) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # Prepare train data mpc_data_dir = "./mpc_data/" mpc_train_data_dir = mpc_data_dir + 'train/' if not os.path.exists(mpc_train_data_dir): raise ValueError( "{} is not found. Please prepare encrypted data.".format( mpc_train_data_dir)) feature_idx_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_feature_idx", id=role, shape=(FIELD_NUM, FEATURE_NUM)) feature_value_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_feature_value", id=role, shape=(FIELD_NUM, )) label_reader = aby3.load_aby3_shares(mpc_train_data_dir + "criteo_label", id=role, shape=(1, )) batch_feature_idx = aby3.batch(feature_idx_reader, BATCH_SIZE, drop_last=True) batch_feature_value = aby3.batch(feature_value_reader, BATCH_SIZE, drop_last=True) batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) loader = fluid.io.DataLoader.from_generator( feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE) batch_sample = paddle.reader.compose(batch_feature_idx, batch_feature_value, batch_label) loader.set_batch_generator(batch_sample, places=place) # Training logger.info('******************************************') logger.info('Start Training...') logger.info('batch_size = {}, learning_rate = {}'.format( args.batch_size, args.base_lr)) mpc_model_basedir = "./mpc_model/" start_time = time.time() step = 0 for epoch_id in range(args.epoch_num): for sample in loader(): step += 1 exe.run(feed=sample, fetch_list=[predict.name]) batch_end = time.time() if step % 100 == 0: print('Epoch={}, Step={}, current cost time: {}'.format( epoch_id, step, batch_end - start_time)) print('Epoch={}, current cost time: {}'.format(epoch_id, batch_end - start_time)) # For each epoch: save infer program mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format( epoch_id, role) fluid.io.save_inference_model( dirname=mpc_model_dir, feeded_var_names=["feat_idx", "feat_value", "label"], target_vars=[predict], executor=exe, main_program=infer_program, model_filename="__model__") logger.info('Model is saved in {}'.format(mpc_model_dir)) end_time = time.time() print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s'.format( args.epoch_num, BATCH_SIZE, (end_time - start_time))) logger.info('******************************************') logger.info('End Training...')
def load_model_and_infer(args): # Init MPC role = int(args.role) pfl_mpc.init("aby3", role, "localhost", args.server, int(args.port)) place = fluid.CPUPlace() exe = fluid.Executor(place) # Input BATCH_SIZE = args.batch_size FIELD_NUM = args.num_field FEATURE_NUM = args.sparse_feature_number + 1 feat_idx = pfl_mpc.data(name='feat_idx', shape=[BATCH_SIZE, FIELD_NUM, FEATURE_NUM], lod_level=1, dtype="int64") feat_value = pfl_mpc.data(name='feat_value', shape=[BATCH_SIZE, FIELD_NUM], lod_level=0, dtype="int64") label = pfl_mpc.data(name='label', shape=[BATCH_SIZE, 1], lod_level=1, dtype="int64") # Prepare test data mpc_data_dir = "./mpc_data/" mpc_test_data_dir = mpc_data_dir + 'test/' if not os.path.exists(mpc_test_data_dir): raise ValueError( "{}is not found. Please prepare encrypted data.".format( mpc_test_data_dir)) test_feature_idx_reader = aby3.load_aby3_shares( mpc_test_data_dir + "criteo_feature_idx", id=role, shape=(FIELD_NUM, FEATURE_NUM)) test_feature_value_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_feature_value", id=role, shape=(FIELD_NUM, )) test_label_reader = aby3.load_aby3_shares(mpc_test_data_dir + "criteo_label", id=role, shape=(1, )) test_batch_feature_idx = aby3.batch(test_feature_idx_reader, BATCH_SIZE, drop_last=True) test_batch_feature_value = aby3.batch(test_feature_value_reader, BATCH_SIZE, drop_last=True) test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True) test_loader = fluid.io.DataLoader.from_generator( feed_list=[feat_idx, feat_value, label], capacity=BATCH_SIZE, drop_last=True) test_batch_sample = paddle.reader.compose(test_batch_feature_idx, test_batch_feature_value, test_batch_label) test_loader.set_batch_generator(test_batch_sample, places=place) for i in range(args.epoch_num): mpc_model_dir = './mpc_model/epoch{}/party{}'.format(i, role) mpc_model_filename = '__model__' infer(test_loader, role, exe, BATCH_SIZE, mpc_model_dir, mpc_model_filename)