def test(self): role = self.build_role() fleet.init(role) strategy = self.build_strategy() inputs = self.build_input() avg_cost = self.build_net(inputs) self.build_optimizer(avg_cost, strategy)
def setUp(self): os.environ[ "PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:4001,127.0.0.1:4002" os.environ["PADDLE_TRAINERS_NUM"] = str(2) os.environ["TRAINING_ROLE"] = "PSERVER" os.environ["PADDLE_PORT"] = "4001" os.environ["POD_IP"] = "127.0.0.1" role = role_maker.PaddleCloudRoleMaker() fleet.init(role) self.strategy = paddle.distributed.fleet.DistributedStrategy() self.strategy.a_sync = True
def runtime_main(test_class): parser = argparse.ArgumentParser(description='Run Fleet test.') parser.add_argument('--role', type=str, required=True, choices=['pserver', 'trainer', 'heter_trainer']) parser.add_argument('--endpoints', type=str, required=False, default="") parser.add_argument('--trainer_endpoints', type=str, required=False, default="") parser.add_argument('--heter_trainer_endpoints', type=str, required=False, default="") parser.add_argument('--heter_trainer_device', type=str, required=False, default="gpu") parser.add_argument('--gloo_path', type=str, required=False, default="") parser.add_argument('--current_id', type=int, required=False, default=0) parser.add_argument('--trainers', type=int, required=False, default=1) parser.add_argument('--mode', type=str, required=False, default='async') parser.add_argument('--geo_sgd_need_push_nums', type=int, required=False, default=2) parser.add_argument('--reader', type=str, required=False, default='dataset') args = parser.parse_args() model = test_class() role = model.build_role(args) fleet.init(role) strategy = model.build_strategy(args) avg_cost = model.net(args) model.build_optimizer(avg_cost, strategy) fleet_util._set_strategy(strategy) fleet_util._set_role_maker(role) if args.role == "pserver" or args.role == "heter_trainer": model.run_pserver(args) else: if args.reader == "dataset": model.run_dataset_trainer(args) else: model.run_pyreader_trainer(args)
def test_2ps_0_load(self): # init No.0 server env env = {} env["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:4001,127.0.0.1:4002" env["PADDLE_TRAINERS_NUM"] = str(2) env["TRAINING_ROLE"] = "PSERVER" env["PADDLE_PORT"] = "4001" env["POD_IP"] = "127.0.0.1" for k, v in env.items(): os.environ[k] = str(v) """ array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4], [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6], [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9]]) """ emb_array = np.arange(0, 1, 0.1).repeat(10).reshape(10, 10) fc_array = np.arange(0, 1, 0.1).repeat(10).reshape(10, 10) model_path = self.save_origin_model(emb_array, fc_array) role = role_maker.PaddleCloudRoleMaker() fleet.init(role) loss = self.net(emb_array, fc_array) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True optimizer = fluid.optimizer.Adam(1e-3) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) fleet.init_server(model_path) fc_w = np.array(fluid.global_scope().find_var("fc").get_tensor()) emb = np.array( fluid.global_scope().find_var("embedding.block0").get_tensor()) assert fc_w.all() == fc_array.all() assert emb.all() == emb_array[::2].all() shutil.rmtree(model_path)