def save_persistables(self, executor, dirname, main_program=None): """ This function filters out all variables with `persistable==True` from the give `main_program` and then saves these variables to the folder `dirname` or file `filename`. The `dirname` is used to specify the folder where persistable variables are going to be saved. If you would like to save variables in separate files, set `filename` None; if you would like to save all variables in a single file, use `filename` to specify the file name. """ if isinstance(executor, ParallelExecutor): raise TypeError( "in fleet.save_persistables() function, executor must be as Executor type, ParallelExecutor is not allowed" ) if not isinstance(executor, Executor): raise TypeError( "in fleet.save_persistables() function, executor must be as Executor type" ) if main_program is None: main_program = self.main_program if isinstance(main_program, CompiledProgram): raise TypeError( "in fleet.save_persistables() function, main_program must be as Program type, CompiledProgram is not allowed" ) if not main_program._is_distributed: raise ValueError( "main_program is for local, may not use fleet.save_persistables" ) io.save_persistables(executor, dirname, main_program, None)
def save_train_snapshot(executor, program, file_name="", train_info={}): name = time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime(time.time())) file_name = file_name + "_" + name file_path = file_utils.get_fullurl("model", file_name, "dir") file_utils.save_file(content=train_info, file_type="model", file_name=file_name, file_format="json") io.save_persistables(executor=executor, dirname=file_path, main_program=program) return file_path
def save_persistables(self, executor, dirname, main_program=None, filename=None): """ This function filters out all variables with `persistable==True` from the give `main_program` and then saves these variables to the folder `dirname` or file `filename`. The `dirname` is used to specify the folder where persistable variables are going to be saved. If you would like to save variables in separate files, set `filename` None; if you would like to save all variables in a single file, use `filename` to specify the file name. """ assert isinstance(executor, Executor), \ "In fleet.save_inference_model() function, executor must be as" \ " Executor type." if main_program is None: main_program = self._origin_program assert isinstance(main_program, Program), \ "In fleet.save_inference_model() function, main_program " \ "must be as Program type." io.save_persistables(executor, dirname, main_program, filename=filename)
def save_persistables(self, executor, dirname, main_program=None): """ This function filters out all variables with `persistable==True` from the give `main_program` and then saves these variables to the folder `dirname` or file `filename`. The `dirname` is used to specify the folder where persistable variables are going to be saved. If you would like to save variables in separate files, set `filename` None; if you would like to save all variables in a single file, use `filename` to specify the file name. """ io.save_persistables(executor, dirname, main_program, None)
def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" UNI_MODEL_DIR = "./tmp/inference_model1" init_program = Program() program = Program() with program_guard(program, init_program): x = layers.data(name='x', shape=[2], dtype='float32') y = layers.data(name='y', shape=[1], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(cost) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) for i in six.moves.xrange(100): tensor_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") exe.run(program, feed={ 'x': tensor_x, 'y': tensor_y }, fetch_list=[avg_cost]) # Separated model and unified model save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) save_inference_model(UNI_MODEL_DIR, ["x", "y"], [avg_cost], exe, program, 'model', 'params') main_program = program.clone()._prune_with_input( feeded_var_names=["x", "y"], targets=[avg_cost]) params_str = save_persistables(exe, None, main_program, None) expected = exe.run(program, feed={ 'x': tensor_x, 'y': tensor_y }, fetch_list=[avg_cost])[0] six.moves.reload_module(executor) # reload to build a new scope model_0 = InferModel(load_inference_model(MODEL_DIR, exe)) with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f: model_str = f.read() model_1 = InferModel( load_inference_model(None, exe, model_str, params_str)) for model in [model_0, model_1]: outs = exe.run(model.program, feed={ model.feed_var_names[0]: tensor_x, model.feed_var_names[1]: tensor_y }, fetch_list=model.fetch_vars) actual = outs[0] self.assertEqual(model.feed_var_names, ["x", "y"]) self.assertEqual(len(model.fetch_vars), 1) print("fetch %s" % str(model.fetch_vars[0])) self.assertEqual(expected, actual) self.assertRaises(ValueError, fluid.io.load_inference_model, None, exe, model_str, None)
def run_trainer(self, args): test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=2) if args.update_method == "pserver": t = self.get_transpiler(args.trainer_id, fluid.default_main_program(), args.endpoints, args.trainers, args.sync_mode) trainer_prog = t.get_trainer_program() else: trainer_prog = fluid.default_main_program() if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() startup_exe = fluid.Executor(place) startup_exe.run(fluid.default_startup_program()) strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 build_stra = fluid.BuildStrategy() if args.use_reduce: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce else: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce exe = fluid.ParallelExecutor(args.use_cuda, loss_name=avg_cost.name, exec_strategy=strategy, build_strategy=build_stra) feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.update_method == "pserver" and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch need_save = bool(int(os.getenv("SAVE", "0"))) model_dir = os.getenv("MODEL_DIR", "") save_mode = os.getenv("SAVE_MODE", "") if save_mode == "LOCAL": if need_save: for _ in six.moves.xrange(RUN_STEP): loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) if need_save and model_dir: io.save_persistables(startup_exe, model_dir, trainer_prog) var = np.array( fluid.global_scope().find_var('__fc_b__').get_tensor()) if six.PY2: print(pickle.dumps(np.ravel(var).tolist())) else: sys.stdout.buffer.write(pickle.dumps(np.ravel(var).tolist())) elif save_mode == "DIST": skip_steps = int(os.getenv("SKIP_STEPS")) loss = None if need_save: for idx in six.moves.xrange(8): loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) if need_save and model_dir and idx == skip_steps and args.trainer_id == 0: io.save_persistables(startup_exe, model_dir, trainer_prog) else: for idx in six.moves.xrange(8): data = get_data() if idx <= skip_steps: continue loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(data)) if six.PY2: print(pickle.dumps(loss.tolist())) else: sys.stdout.buffer.write(pickle.dumps(loss.tolist())) else: raise Exception("save_mode must be LOCAL or DIST")
def save_persistables(self, executor, dirname, main_program=None): io.save_persistables(executor, dirname, main_program, None)
def run_trainer(self, args): test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=2) if args.mem_opt: fluid.memory_optimize(fluid.default_main_program(), skip_grads=True) if args.is_dist: t = self.get_transpiler(args.trainer_id, fluid.default_main_program(), args.endpoints, args.trainers, args.sync_mode) trainer_prog = t.get_trainer_program() else: trainer_prog = fluid.default_main_program() if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() startup_exe = fluid.Executor(place) startup_exe.run(fluid.default_startup_program()) strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 strategy.allow_op_delay = False build_stra = fluid.BuildStrategy() if args.use_reduce: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce else: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce exe = fluid.ParallelExecutor(args.use_cuda, loss_name=avg_cost.name, exec_strategy=strategy, build_strategy=build_stra) feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.is_dist and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch need_save = bool(int(os.getenv("SAVE", "0"))) model_dir = os.getenv("MODEL_DIR", "") if need_save: for _ in six.moves.xrange(RUN_STEP): loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) if need_save and model_dir: io.save_persistables(startup_exe, model_dir, trainer_prog) var = np.array(fluid.global_scope().find_var('__fc_b__').get_tensor()) if six.PY2: print(pickle.dumps(np.ravel(var).tolist())) else: sys.stdout.buffer.write(pickle.dumps(np.ravel(var).tolist()))