BATCH_SIZE = 64 train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) trainer_num = 2 trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer.trainer_id = trainer_id trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) trainer.trainer_num = trainer_num trainer.key_dir = "./keys/" trainer.start() output_folder = "fl_model" epoch_id = 0 step_i = 0 inputs = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='y', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[inputs, label], place=fluid.CPUPlace()) # for test
else: def reader(): for i in range(1000): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = message.split("trainer")[1] job_path = "job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, int(trainer_id)) job._scheduler_ep = scheduler_conf["ENDPOINT"] trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = endpoint trainer.start() print(trainer._scheduler_ep, trainer._current_ep) output_folder = "fl_model" epoch_id = 0 while not trainer.stop(): print("batch %d start train" % (step_i)) step_i = 0 for data in reader(): trainer.run(feed=data, fetch=[]) step_i += 1 if train_step == trainer._step: break epoch_id += 1 if epoch_id % 5 == 0:
import math import random import json logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) trainer.start() print(trainer._step) test_program = trainer._main_program.clone(for_test=True) def data_generater(trainer_id, inner_step, batch_size, count_by_step): train_file = open( "./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json" % trainer_id, 'r') test_file = open( "./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json" % trainer_id, 'r') json_train = json.load(train_file) json_test = json.load(test_file)
def reader(): for i in range(1000): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) trainer.start() print(trainer._scheduler_ep, trainer._current_ep) output_folder = "fl_model" epoch_id = 0 while not trainer.stop(): print("batch %d start train" % (epoch_id)) train_step = 0 for data in reader(): trainer.run(feed=data, fetch=[]) train_step += 1 if train_step == trainer._step: break epoch_id += 1 if epoch_id % 5 == 0:
for i in range(1000): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) #job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer job._scheduler_ep = os.environ['FL_SCHEDULER_SERVICE_HOST'] + ":" + os.environ[ 'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'] trainer = FLTrainerFactory().create_fl_trainer(job) #trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id) trainer._current_ep = os.environ['TRAINER0_SERVICE_HOST'] + ":" + os.environ[ 'TRAINER0_SERVICE_PORT_TRAINER0'] trainer.start() print(trainer._scheduler_ep, trainer._current_ep) output_folder = "fl_model" epoch_id = 0 while not trainer.stop(): print("batch %d start train" % (epoch_id)) train_step = 0 for data in reader(): trainer.run(feed=data, fetch=[]) train_step += 1 if train_step == trainer._step: break
import paddle import paddle.fluid as fluid import logging import math logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) trainer = FLTrainerFactory().create_fl_trainer(job) trainer.start() test_program = trainer._main_program.clone(for_test=True) alldata = pd.read_csv('alldata.csv') print(len(alldata)) # In[3]: label = alldata['Label'].to_frame() alldata = alldata.drop('Company', 1) alldata = alldata.drop('Label', 1) # In[4]:
from paddle_fl.core.master.fl_job import FLRunTimeJob from paddle_fl.reader.gru4rec_reader import Gru4rec_Reader import paddle.fluid as fluid import numpy as np import sys import os import logging logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest place = fluid.CPUPlace() train_file_dir = "mid_data/node4/%d/" % trainer_id job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) trainer = FLTrainerFactory().create_fl_trainer(job) trainer.start() r = Gru4rec_Reader() train_reader = r.reader(train_file_dir, place, batch_size = 125) output_folder = "model_node4" step_i = 0 while not trainer.stop(): step_i += 1 print("batch %d start train" % (step_i)) for data in train_reader(): #print(np.array(data['src_wordseq'])) ret_avg_cost = trainer.run(feed=data, fetch=["mean_0.tmp_0"]) avg_ppl = np.exp(ret_avg_cost[0])
from paddle_fl.reader.gru4rec_reader import Gru4rec_Reader import paddle.fluid as fluid import numpy as np import sys import os import logging logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest place = fluid.CPUPlace() train_file_dir = "mid_data/node4/%d/" % trainer_id job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id) trainer.start() r = Gru4rec_Reader() train_reader = r.reader(train_file_dir, place, batch_size = 125) output_folder = "model_node4" step_i = 0 while not trainer.stop(): step_i += 1 print("batch %d start train" % (step_i)) train_step = 0 for data in train_reader(): #print(np.array(data['src_wordseq'])) ret_avg_cost = trainer.run(feed=data,