def init_distributed_infer_env(self, exe, loss, role_maker=None, dirname=None): import paddle.distributed.fleet as fleet if fleet.fleet._runtime_handle is None: fleet.init(role_maker=role_maker) fake_optimizer = paddle.optimizer.SGD() strategy = fleet.DistributedStrategy() strategy.a_sync = True optimizer = fleet.distributed_optimizer(fake_optimizer, strategy=strategy) optimizer.minimize(loss, startup_program=self.origin_startup_program) if fleet.is_server(): fleet.init_server(dirname=dirname) fleet.run_server() else: exe.run(paddle.static.default_startup_program()) fleet.init_worker() self._init_dense_params(exe, dirname) global_startup_program = paddle.static.default_startup_program() global_startup_program = self.origin_startup_program global_main_program = paddle.static.default_main_program() global_main_program = self.origin_main_program
def test_single_run_ps_minimize(self): paddle.enable_static() input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32') input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') cost = fluid.layers.cross_entropy(input=prediction, label=input_y) avg_cost = paddle.mean(x=cost) fleet.init() strategy = paddle.distributed.fleet.DistributedStrategy() optimizer = fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) if fleet.is_server(): fleet.init_server() fleet.run_server() elif fleet.is_worker(): place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(paddle.static.default_startup_program()) step = 10 for i in range(step): cost_val = exe.run(program=fluid.default_main_program(), feed=self.gen_data(), fetch_list=[avg_cost.name]) print("worker_index: %d, step%d cost = %f" % (fleet.worker_index(), i, cost_val[0]))
def train(args): import logging log.setLevel(logging.DEBUG) log.info("start") worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0")) num_devices = int(os.getenv("CPU_NUM", 10)) data = load_raw_edges_fn(args.edge_path, args.undirected) edges = data[0] weights = data[1] node2idx = data[2] num_nodes = len(node2idx) model = DeepwalkModel(num_nodes, args.hidden_size, args.neg_num, args.is_sparse, args.is_distributed, 1.) pyreader = model.pyreader loss = model.forward() # init fleet log.info("init_role") init_role() train_steps = math.ceil(1. * num_nodes * args.epoch / args.batch_size / num_devices / worker_num) log.info("Train step: %s" % train_steps) if args.optimizer == "sgd": args.lr *= args.batch_size * args.walk_len * args.win_size optimization(args.lr, loss, train_steps, args.optimizer) # init and run server or worker if fleet.is_server(): log.info("PS server mode") fleet.init_server() fleet.run_server() if fleet.is_worker(): log.info("start init worker done") exe = F.Executor(F.CPUPlace()) exe.run(F.default_startup_program()) log.info("Startup done") fleet.init_worker() #just the worker, load the sample log.info("init worker done") print("LEO num_nodes:",num_nodes, len(edges)) edges_feat={} edges_feat["weight"] = np.array(weights) graph = pgl.graph.Graph(num_nodes, edges, edge_feat=edges_feat) # bind gen gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) train_prog(exe, F.default_main_program(), loss, pyreader, args, train_steps) print("fleet try to stop worker\r\n") fleet.stop_worker() print("Game over\r\n")
def run_pserver(self, role, strategy): fleet.init(role) avg_cost, x, z, y = self.net() optimizer = fluid.optimizer.SGD(0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) fleet.init_server() fleet.run_server()
def main(args): paddle.set_device("cpu") paddle.enable_static() fleet.init() fake_num_nodes = 1 py_reader, loss = StaticSkipGramModel( fake_num_nodes, args.neg_num, args.embed_size, sparse_embedding=True, shared_embedding=args.shared_embedding) optimizer = F.optimizer.Adam(args.learning_rate, lazy_mode=True) dist_strategy = fleet.DistributedStrategy() dist_strategy.a_sync = True optimizer = fleet.distributed_optimizer(optimizer, dist_strategy) optimizer.minimize(loss) # init and run server or worker if fleet.is_server(): fleet.init_server() fleet.run_server() if fleet.is_worker(): place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) fleet.init_worker() graph = build_graph(args) # bind gen train_ds = ShardedDataset(graph.nodes, args.epoch) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.cpu_batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) py_reader.set_batch_generator(lambda: data_loader) train_loss = train(exe, paddle.static.default_main_program(), py_reader, loss) fleet.stop_worker() if fleet.is_first_worker(): fleet.save_persistables(exe, "./model", paddle.static.default_main_program())
def main(args): paddle.set_device("cpu") paddle.enable_static() role = role_maker.PaddleCloudRoleMaker() fleet.init(role) if args.num_nodes is None: num_nodes = load(args.dataset).num_nodes else: num_nodes = args.num_nodes loss = StaticSkipGramModel( num_nodes, args.neg_num, args.embed_size, sparse=True) optimizer = F.optimizer.Adam(args.learning_rate, lazy_mode=True) dist_strategy = fleet.DistributedStrategy() dist_strategy.a_sync = True optimizer = fleet.distributed_optimizer(optimizer, dist_strategy) optimizer.minimize(loss) # init and run server or worker if fleet.is_server(): fleet.init_server() fleet.run_server() if fleet.is_worker(): place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) fleet.init_worker() graph = load(args.dataset) # bind gen train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader( train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) for epoch in range(args.epoch): train_loss = train(exe, paddle.static.default_main_program(), data_loader, loss) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss) fleet.stop_worker()
def run_pserver(self, args): """ run pserver process, you don't need to implement it. Args: args (ArgumentParser): run args to config dist fleet. """ import paddle.distributed.fleet as fleet if args.role.upper() != "PSERVER": raise ValueError("args role must be PSERVER") fleet.init() self._set_strategy(args) avg_cost = self.net(args) optimizer = fluid.optimizer.SGD(LEARNING_RATE) optimizer = fleet.distributed_optimizer(optimizer, self.dist_strategy) optimizer.minimize(avg_cost) fleet.init_server() fleet.run_server()
def run_server(self): logger.info("Run Server Begin") fleet.init_server() fleet.run_server()
# limitations under the License. import os import fleetx as X import paddle.fluid as fluid import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker configs = X.parse_train_configs() role = role_maker.PaddleCloudRoleMaker() fleet.init(role) model = X.applications.MultiSlotCTR() loader = model.load_multislot_from_file('./ctr_data/train_data') dist_strategy = fleet.DistributedStrategy() dist_strategy.a_sync = False dist_strategy.a_sync = True optimizer = fluid.optimizer.SGD(learning_rate=0.0001) optimizer = fleet.distributed_optimizer(optimizer, dist_strategy) optimizer.minimize(model.loss) if fleet.is_server(): fleet.init_server() fleet.run_server() else: fleet.init_worker() trainer = X.Trainer(fluid.CPUPlace()) trainer.fit(model, loader, epoch=10)
def run_pserver(self, args): fleet.init_server() fleet.run_server()
def run_server(self): logger.info("Run Server Begin") fleet.init_server(config.get("runner.warmup_model_path")) fleet.run_server()
def main(args): role = role_maker.PaddleCloudRoleMaker() fleet.init(role) data = pgl.dataset.RedditDataset(args.normalize, args.symmetry) log.info("Preprocess finish") log.info("Train Examples: %s" % len(data.train_index)) log.info("Val Examples: %s" % len(data.val_index)) log.info("Test Examples: %s" % len(data.test_index)) log.info("Num nodes %s" % data.graph.num_nodes) log.info("Num edges %s" % data.graph.num_edges) log.info("Average Degree %s" % np.mean(data.graph.indegree())) graph = data.graph train_index = data.train_index val_index = data.val_index test_index = data.test_index train_label = data.train_label val_label = data.val_label test_label = data.test_label loss, acc = build_net( input_size=data.feature.shape[-1], num_class=data.num_classes, hidden_size=args.hidden_size, num_layers=len(args.samples)) test_program = paddle.static.default_main_program().clone(for_test=True) strategy = fleet.DistributedStrategy() strategy.a_sync = True optimizer = paddle.fluid.optimizer.Adam(learning_rate=args.lr) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) if fleet.is_server(): fleet.init_server() fleet.run_server() else: place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) fleet.init_worker() train_ds = ShardedDataset(train_index, train_label) valid_ds = ShardedDataset(val_index, val_label) test_ds = ShardedDataset(test_index, test_label) collate_fn = partial(batch_fn, graph=graph, samples=args.samples) train_loader = Dataloader( train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) valid_loader = Dataloader( valid_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) test_loader = Dataloader( test_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) compiled_prog, cpu_num = setup_compiled_prog(loss) for epoch in tqdm.tqdm(range(args.epoch)): train_loss, train_acc = run(train_loader, data.feature, exe, compiled_prog, loss, acc, phase="train", cpu_num=cpu_num) valid_loss, valid_acc = run(valid_loader, data.feature, exe, test_program, loss, acc, phase="valid", cpu_num=1) log.info("Epoch %s Valid-Loss %s Valid-Acc %s" % (epoch, valid_loss, valid_acc)) test_loss, test_acc = run(test_loader, data.feature, exe, test_program, loss, acc, phase="test", cpu_num=1) log.info("Epoch %s Test-Loss %s Test-Acc %s" % (epoch, test_loss, test_acc)) fleet.stop_worker()
def main(args): paddle.set_device("cpu") paddle.enable_static() fleet.init() if args.num_nodes is None: num_nodes = load(args.dataset).num_nodes else: num_nodes = args.num_nodes loss = StaticSkipGramModel(num_nodes, args.neg_num, args.embed_size, sparse=True) optimizer = F.optimizer.Adam(args.learning_rate, lazy_mode=True) dist_strategy = fleet.DistributedStrategy() dist_strategy.a_sync = True optimizer = fleet.distributed_optimizer(optimizer, dist_strategy) optimizer.minimize(loss) # init and run server or worker if fleet.is_server(): fleet.init_server() fleet.run_server() if fleet.is_worker(): place = paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) fleet.init_worker() graph = load(args.dataset) # bind gen train_ds = ShardedDataset(graph.nodes) collate_fn = BatchRandWalk(graph, args.walk_len, args.win_size, args.neg_num, args.neg_sample_type) data_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.sample_workers, collate_fn=collate_fn) cpu_num = int(os.environ.get('CPU_NUM', 1)) if int(cpu_num) > 1: parallel_places = [paddle.CPUPlace()] * cpu_num exec_strategy = paddle.static.ExecutionStrategy() exec_strategy.num_threads = int(cpu_num) build_strategy = paddle.static.BuildStrategy() build_strategy.reduce_strategy = paddle.static.BuildStrategy.ReduceStrategy.Reduce compiled_prog = paddle.static.CompiledProgram( paddle.static.default_main_program()).with_data_parallel( loss_name=loss.name, places=parallel_places, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = paddle.static.default_main_program() for epoch in range(args.epoch): train_loss = train(exe, compiled_prog, data_loader, loss) log.info("Runing epoch:%s\t train_loss:%.6f", epoch, train_loss) fleet.stop_worker() if fleet.is_first_worker(): fleet.save_persistables(exe, "./model", paddle.static.default_main_program())
def fit(): EPOCH_NUM = 3 BATCH_SIZE = 128 type_size = 10 role = role_maker.UserDefinedRoleMaker( current_id=0, role=role_maker.Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011", "127.0.0.1:36012"]) fleet.init(role) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True type_size = createDataList('F:/机器学习/CNN/train', 'D:/cnn/cnn.model.data' + "/") # 用于训练的数据提供器 train_reader = dataReader("D:/cnn/cnn.model.data/trainer.list") train_reader = paddle.batch(paddle.reader.shuffle(reader=train_reader, buf_size=BATCH_SIZE * 100), batch_size=BATCH_SIZE) test_reader = dataReader("D:/cnn/cnn.model.data/test.list") test_reader = paddle.batch(paddle.reader.shuffle(reader=test_reader, buf_size=BATCH_SIZE * 100), batch_size=BATCH_SIZE) data_shape = [3, 32, 32] paddle.enable_static() images = fluid.layers.data(name='images', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # 获取分类器 predict = networkConfiguration(images, type_size) # 定义损失函数和准确率 cost = fluid.layers.cross_entropy(input=predict, label=label) # 交叉熵 avg_cost = fluid.layers.mean(cost) # 计算cost中所有元素的平均值 acc = fluid.layers.accuracy(input=predict, label=label) # 使用输入和标签计算准确率 # 定义优化方法 test_program = fluid.default_main_program().clone(for_test=True) # 获取测试程序 optimizer = fluid.optimizer.Adam(learning_rate=0.001) # 定义优化方法 optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) if fleet.is_server(): fleet.init_server() fleet.run_server() elif fleet.is_worker(): fleet.init_worker() ########## 模型训练&模型评估 ########## # 创建Executor use_cuda = False # 定义使用CPU还是GPU,使用CPU时use_cuda=False place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) print("完成") # 定义数据映射器 feeder = fluid.DataFeeder(feed_list=[images, label], place=place) for pass_id in range(EPOCH_NUM): # 开始训练 for batch_id, data in enumerate(train_reader()): # 遍历train_reader train_cost, train_acc = exe.run( program=fluid.default_main_program(), # 运行主程序 feed=feeder.feed(data), # 喂入一个batch的数据 fetch_list=[avg_cost, acc]) # fetch均方误差和准确率 # 每100次batch打印一次训练、进行一次测试 if batch_id % 20 == 0: print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' % (pass_id, batch_id, train_cost[0], train_acc[0])) # 开始测试 test_costs = [] # 测试的损失值 test_accs = [] # 测试的准确率 for batch_id, data in enumerate(test_reader()): test_cost, test_acc = exe.run( program=test_program, # 执行训练程序 feed=feeder.feed(data), # 喂入数据 fetch_list=[avg_cost, acc]) # fetch误差、准确率 test_costs.append(test_cost[0]) # 记录每个batch的损失值 test_accs.append(test_acc[0]) # 记录每个batch的准确率 test_cost = (sum(test_costs) / len(test_costs)) # 计算误差平均值 test_acc = (sum(test_accs) / len(test_accs)) # 计算准确率平均值 print('Test:%d, Cost:%0.5f, ACC:%0.5f' % (pass_id, test_cost, test_acc)) save(predict, "D:/cnn/cnn.model", exe)