import logging import math import time logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform scheduler IP address to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) place = fluid.CPUPlace() trainer.start(place) test_program = trainer._main_program.clone(for_test=True) train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), batch_size=64) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
logging.basicConfig( filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest place = fluid.CPUPlace() train_file_dir = "mid_data/node4/%d/" % trainer_id job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) place = fluid.CPUPlace() trainer.start(place) r = Gru4rec_Reader() train_reader = r.reader(train_file_dir, place, batch_size=125) output_folder = "model_node4" epoch_i = 0 while not trainer.stop(): epoch_i += 1 train_step = 0 for data in train_reader(): #print(np.array(data['src_wordseq'])) ret_avg_cost = trainer.run(feed=data, fetch=["mean_0.tmp_0"])
BATCH_SIZE = 64 train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) trainer_num = 2 trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer.trainer_id = trainer_id trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) trainer.trainer_num = trainer_num trainer.key_dir = "./keys/" place = fluid.CPUPlace() trainer.start(place) output_folder = "fl_model" epoch_id = 0 step_i = 0 inputs = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='y', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[inputs, label], place=fluid.CPUPlace())
for i in range(10): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer # print(job._trainer_send_program) trainer = FLTrainerFactory().create_fl_trainer(job) use_cuda = False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer._current_ep = "127.0.0.1:8192" trainer.start(place=place) trainer._logger.setLevel(logging.DEBUG) g = reader() if trainer_id > 0: for i in range(trainer_id): next(g) data = next(g) print(data) output_folder = "fl_model" step_i = 0
logging.basicConfig( filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) # Load configs #################### trainer_id = int(args.id) # trainer id job_path = params["federated"]["job_path"] job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:"+ str(params["federated"]["scheduler_port"]) # Inform scheduler IP address to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(params["federated"]["seed_of_clients_port"] + trainer_id) place = paddle.fluid.CPUPlace() trainer.start(place) test_program = trainer._main_program.clone(for_test = True) # Load data ############### # dataset = Time_series_loader(distributed = params["federated"]["distributed"], ts_path = params["federated"]["clients_path"], number_of_clients = params["federated"]["number_of_clients"], lookback = params["federated"]["lookback"], lookforward = params["federated"]["lookforward"]) dataset = select_data(params) train_reader = paddle.batch(reader = dataset.train_data(client = trainer_id), batch_size = params["federated"]["batch_size"]) val_reader = paddle.batch(reader=dataset.val_data(client = trainer_id),
for i in range(1000): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) #job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer job._scheduler_ep = os.environ['FL_SCHEDULER_SERVICE_HOST'] + ":" + os.environ[ 'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'] trainer = FLTrainerFactory().create_fl_trainer(job) #trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id) trainer._current_ep = os.environ['TRAINER0_SERVICE_HOST'] + ":" + os.environ[ 'TRAINER0_SERVICE_PORT_TRAINER0'] place = fluid.CPUPlace() trainer.start(place) print(trainer._scheduler_ep, trainer._current_ep) output_folder = "fl_model" epoch_id = 0 while not trainer.stop(): print("batch %d start train" % (epoch_id)) train_step = 0 for data in reader(): trainer.run(feed=data, fetch=[]) train_step += 1 if train_step == trainer._step:
import logging import math import time logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform scheduler IP address to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) place = fluid.CPUPlace() trainer.start(place) test_program = trainer._main_program.clone(for_test=True) train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500), batch_size=64) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64) input = fluid.layers.data(name='input', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[input, label], place=fluid.CPUPlace())
data_loader = Reader(coco_loader, sample_transforms=sample_trans, batch_transforms=batch_trans, batch_size=1, shuffle=True, drop_empty=True, inputs_def=inputs_def)() return data_loader job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform scheduler IP address to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) trainer.start(fluid.CUDAPlace(trainer_id)) test_program = trainer._main_program.clone(for_test=True) image = fluid.layers.data(name='image', shape=[3, None, None], dtype='float32', lod_level=0) im_info = fluid.layers.data(name='im_info', shape=[None, 3], dtype='float32', lod_level=0) im_id = fluid.layers.data(name='im_id', shape=[None, 1],
def reader(): for i in range(1000): data_dict = {} for i in range(3): data_dict[str(i)] = np.random.rand(1, 5).astype('float32') data_dict["label"] = np.random.randint(2, size=(1, 1)).astype('int64') yield data_dict trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) place = fluid.CPUPlace() trainer.start(place) print("scheduler_ep is {}, current_ep is {}".format(trainer._scheduler_ep, trainer._current_ep)) epoch_id = 0 while not trainer.stop(): if epoch_id > 10: break print("{} epoch {} start train".format( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())), epoch_id)) train_step = 0 for data in reader(): trainer.run(feed=data, fetch=[])