def train_loop(main_program): exe.run(fluid.default_startup_program()) embedding_param = fluid.global_scope().find_var( embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) start_time = time.time() batch_id = 0 for pass_id in xrange(PASS_NUM): for data in train_data(): cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) cost = cost[0] if batch_id % 10 == 0: print("avg_cost:" + str(cost)) if batch_id != 0: print("second per batch: " + str((time.time( ) - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(cost) < 60.0: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model(save_dirname, [ 'word_data', 'verb_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', 'ctx_p2_data', 'mark_data' ], [feature_out], exe) return batch_id = batch_id + 1
def load_weights(self, data_path, exe, place, layer_load): data_dict = np.load(data_path).item() for op_name in data_dict: if op_name in layer_load: for param_name, data in data_dict[op_name].iteritems(): try: if param_name == "weights": name = op_name + ".w_0" elif param_name == "biases": name = op_name + ".b_0" v = fluid.global_scope().find_var(name) w = v.get_tensor() w.set(data.reshape(w.shape()), place) print(str(op_name) + " " + str(param_name) + " 加载成功") except ValueError, e: print(e.message) else: print("不需要加载的权重 : " + str(op_name))
def preparefc(): #使用test_program 预测所有类别 warm_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) warmup_reader = paddle.batch(reader.warmup(args), batch_size=args.test_batch_size, drop_last=False) #fc_class.w_0 fc_class_name = 'fc_class.w_0' classfeas = {} classfeasnum = {} for batch_id, data in enumerate(warmup_reader()): test_outputlist = exe.run(test_prog, fetch_list=test_fetch_list, feed=warm_feeder.feed(data)) label_vals = np.asarray([x[1] for x in data]) fea_vals = test_outputlist[0] for label_val, fea_val in zip(label_vals, fea_vals): fea_val /= np.sum(fea_val * fea_val)**0.5 if label_val not in classfeas: classfeas[label_val] = fea_val classfeasnum[label_val] = 1 else: classfeas[label_val] = ( classfeas[label_val] * classfeasnum[label_val] + fea_val) / (classfeasnum[label_val] + 1) classfeasnum[label_val] += 1 assert (len(classfeas) == args.class_dim) v = fluid.global_scope().find_var(fc_class_name) w = v.get_tensor() newfcvalue = np.transpose( np.vstack([ classfeas[i] / np.sum(classfeas[i] * classfeas[i])**0.5 for i in range(args.class_dim) ])) print(np.array(w).shape) print(newfcvalue.shape, newfcvalue.dtype) #np.save('newfc.npy',newfcvalue) #np.save('oldfc.npy',np.array(w)) #newfcvalue = np.load('oldfc.npy') w.set(newfcvalue, place)
def train_loop(main_program): exe.run(fluid.default_startup_program()) embedding_param = fluid.global_scope().find_var( embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) start_time = time.time() batch_id = 0 for pass_id in xrange(PASS_NUM): chunk_evaluator.reset(exe) for data in train_data(): cost, precision, recall, f1_score = exe.run( main_program, feed=feeder.feed(data), fetch_list=[avg_cost] + chunk_evaluator.metrics) pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval( exe) if batch_id % 10 == 0: print("avg_cost:" + str(cost) + " precision:" + str(precision) + " recall:" + str(recall) + " f1_score:" + str(f1_score) + " pass_precision:" + str(pass_precision) + " pass_recall:" + str(pass_recall) + " pass_f1_score:" + str(pass_f1_score)) if batch_id != 0: print("second per batch: " + str((time.time() - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(pass_precision) > 0.01: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model( save_dirname, [ 'word_data', 'verb_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', 'ctx_p2_data', 'mark_data' ], [feature_out], exe) return batch_id = batch_id + 1
def create_test_model(self): """ Create a model for test. :return: """ x = fluid.data(name='x', shape=[None, 13], dtype='float32') y = fluid.data(name='y', shape=[None, 1], dtype='float32') param_attr = ParamAttr(name="fc_0.w_0") bias_attr = ParamAttr(name="fc_0.b_0") y_predict = fluid.layers.fc(input=x, size=1, param_attr=param_attr, bias_attr=bias_attr) main_prog = fluid.default_main_program() startup_program = fluid.default_startup_program() place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if not os.path.exists(self.raw_model_dir): os.makedirs(self.raw_model_dir) fluid.io.save_inference_model(self.raw_model_dir, ['x'], [y_predict], exe) vars = ['fc_0.w_0', 'fc_0.b_0'] vars_tensor = [[[-1.0788183212280273], [2.1307122707366943], [-2.646815538406372], [1.6547845602035522], [-2.13144588470459], [3.6621456146240234], [-1.553664207458496], [0.18727444112300873], [-2.3649044036865234], [-3.407580852508545], [-4.058014392852783], [1.4958711862564087], [-3.9899468421936035]], [22.361257553100586]] global_block = main_prog.global_block() g_scope = fluid.global_scope() for var, tensor in zip(vars, vars_tensor): param = g_scope.find_var(var) param.get_tensor().set(tensor, place) variable = global_block.var(var) fluid.io.save_vars(executor=exe, dirname=self.raw_model_dir, vars=[variable], filename=var)
def transpile(program=None): """ Transpile Paddle program into MPC program. Args: program: The plain Paddle model program, default to default_main_program. Returns: The MPC program. """ if program is None: program = fluid.default_main_program() place = fluid.CPUPlace() if program.num_blocks > 1: raise NotImplementedError( "The number of blocks in current main program" "is {}, which is not supported in this version." .format(program.num_blocks())) global_block = program.global_block() g_scope = fluid.global_scope() mpc_vars_names = _transpile_type_and_shape(block=global_block) # encrypt tensor values for each variable in mpc_var_names for mpc_var_name in mpc_vars_names: if g_scope.find_var(mpc_var_name) is not None: param = g_scope.find_var(mpc_var_name) param_tensor = np.array(param.get_tensor()) mpc_var = global_block.var(mpc_var_name) if mpc_var_name not in plain_vars: param.get_tensor()._set_dims(mpc_var.shape) # process initialized params that should be 0 set_tensor_value = np.array([param_tensor, param_tensor]).astype(np.int64) param.get_tensor().set(set_tensor_value, place) #else: # param.get_tensor().set(np.array(param.get_tensor()).astype('float64'), place) # trigger sync to replace old ops. op_num = global_block.desc.op_size() _ = global_block.desc.append_op() global_block.desc._remove_op(op_num, op_num + 1) return program
def generate_quantized_model(self, model_path, algo="KL", quantizable_op_type=["conv2d"], is_full_quantize=False, is_use_cache_file=False, is_optimize_model=False, batch_size=10, batch_nums=10, is_data_loader=False): place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.global_scope() val_reader = paddle.dataset.mnist.train() def val_data_generator(): batches = [] for data in val_reader(): batches.append(data[0].reshape(1, 28, 28)) if len(batches) == batch_size: batches = np.asarray(batches) yield {"x": batches} batches = [] ptq = PostTrainingQuantization( executor=exe, model_dir=model_path, model_filename='model.pdmodel', params_filename='model.pdiparams', sample_generator=val_reader if not is_data_loader else None, data_loader=val_data_generator if is_data_loader else None, batch_size=batch_size, batch_nums=batch_nums, algo=algo, quantizable_op_type=quantizable_op_type, is_full_quantize=is_full_quantize, optimize_model=is_optimize_model, is_use_cache_file=is_use_cache_file) ptq.quantize() ptq.save_quantized_model( self.int8_model_path, model_filename='model.pdmodel', params_filename='model.pdiparams')
def trainLoop(): batches = DataSet.get_batch_generator(1024, total_step) for i, imgs, landmarks_gt, attributes_gt, euler_angles_gt in batches: preTime = time.time() result = exe.run(fluid.default_main_program(), feed={'img': imgs, 'landmark': landmarks_gt, 'attribute':attributes_gt, 'euler_angle':euler_angles_gt }, fetch_list=[weighted_loss,loss,landmarks_pre,angles_pre]) nowTime = time.time() landmarks = result[2] #print('gt',landmarks_gt.shape) #print('pre',landmarks.shape) landmarks = landmarks.reshape(landmarks.shape[0], -1, 2) # landmark landmarks_gt = landmarks_gt.reshape(landmarks_gt.shape[0], -1, 2)# landmarks_gt lr = np.array(fluid.global_scope().find_var('learning_rate') .get_tensor()) if i % 1000 == 0 and i!= 0: print("Model saved") save_model(exe,fluid.default_main_program(),model=model) if i % 2 == 0: nme_list = [] nme_temp = compute_nme(landmarks, landmarks_gt) for item in nme_temp: nme_list.append(item) # nme #print('nme: {:.4f}'.format(np.mean(nme_list))) # auc and failure rate failureThreshold = 0.1 auc, failure_rate = compute_auc(nme_list, failureThreshold) #print('auc @ {:.1f} failureThreshold: {:.4f}'.format(auc,failureThreshold)) #print('failure_rate: {:}'.format(failure_rate)) print("step {:d},lr {:.6f},w_loss {:.6f},loss {:.6f},nme: {:.4f},auc {:.1f}, failure_rate: {:}, failureThreshold: {:.4f},step_time: {:.3f}".format( i,lr[0],result[0][0],result[1][0],np.mean(nme_list),auc,failure_rate,failureThreshold,nowTime - preTime))
def dump(): args = parse_args() output_data_path = os.path.abspath(args.output_data_path) base_datafile = output_data_path + "/" + NOW_DATETIME + "/base/feature" base_donefile = output_data_path + "/" + "donefile/" + "base.txt" patch_datafile = output_data_path + "/" + NOW_DATETIME + "/patch/feature" patch_donefile = output_data_path + "/" + "donefile/" + "patch.txt" place = fluid.CPUPlace() inference_scope = fluid.Scope() startup_program = fluid.framework.Program() test_program = fluid.framework.Program() with fluid.framework.program_guard(test_program, startup_program): loss, auc_var, batch_auc_var, _, data_list = ctr_dnn_model( args.embedding_size, args.sparse_feature_dim, False) exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=data_list, place=place) fluid.io.load_persistables(executor=exe, dirname=args.model_path, main_program=fluid.default_main_program()) # Dump embedding t = np.array( fluid.global_scope().find_var('SparseFeatFactors').get_tensor()) if not os.access(os.path.dirname(base_datafile), os.F_OK): os.makedirs(os.path.dirname(base_datafile)) with open(base_datafile, "wb") as f: writer = SequenceFileWriter(f) for i in range(0, t.shape[0]): key_bytes = struct.pack('Q', i) row_bytes = struct.pack('%sf' % t.shape[1], *t[i]) writer.write(key_bytes, row_bytes) f.close() write_donefile(base_datafile, base_donefile)
def resave_model(feed_kv): if len(mobile_model_path) > 0: pp_green("has set mobile_model_path, stop checking model & params", 1) sh("cp {}/* {}".format(mobile_model_path, checked_model_path)) return ops = prog.current_block().ops vars = prog.current_block().vars # 强制所有var为可持久化 p_names = [] for name in vars: name = str(name) v = fluid.framework._get_var(name, prog) if not v.persistable: v.persistable = True p_names.append(name) outputs = run_model(feed_kv=feed_kv) has_found_wrong_shape = False # 修正每个var的形状 for name in vars: name = str(name) v = vars[name] if v.persistable: v1 = fluid.global_scope().find_var(name) try: t1 = v1.get_tensor() shape = t1.shape() except: continue if v.desc.shape() != shape: has_found_wrong_shape = True v.desc.set_shape(shape) # 恢复var的可持久化属性 for name in p_names: v = fluid.framework._get_var(name, prog) v.persistable = False if not quantification: fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params") if has_found_wrong_shape: pp_red("has found wrong shape", 1) else: pp_green("has not found wrong shape", 1) pp_green("new model is saved into directory 【{}】".format(checked_model_path), 1)
def channel_prune(program, prune_names, prune_ratios, place, only_graph=False): """通道裁剪。 Args: program (paddle.fluid.Program): 需要裁剪的Program,Program的具体介绍可参见 https://paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/program.html#program。 prune_names (list): 由裁剪参数名组成的参数列表。 prune_ratios (list): 由裁剪率组成的参数列表,与prune_names中的参数列表意义对应。 place (paddle.fluid.CUDAPlace/paddle.fluid.CPUPlace): 运行设备。 only_graph (bool): 是否只修改网络图,当为False时代表同时修改网络图和 scope(全局作用域)中的参数。默认为False。 Returns: paddle.fluid.Program: 裁剪后的Program。 """ prog_var_shape_dict = {} for var in program.list_vars(): try: prog_var_shape_dict[var.name] = var.shape except Exception: pass index = 0 for param, ratio in zip(prune_names, prune_ratios): origin_num = prog_var_shape_dict[param][0] pruned_num = int(round(origin_num * ratio)) while origin_num == pruned_num: ratio -= 0.1 pruned_num = int(round(origin_num * (ratio))) prune_ratios[index] = ratio index += 1 scope = fluid.global_scope() pruner = Pruner() program, _, _ = pruner.prune(program, scope, params=prune_names, ratios=prune_ratios, place=place, lazy=False, only_graph=only_graph, param_backup=False, param_shape_backup=False) return program
def set_zero(var_name, scope=fluid.global_scope(), place=fluid.CPUPlace(), param_type="int64"): """ Set tensor of a Variable to zero. Args: var_name(str): name of Variable scope(Scope): Scope object, default is fluid.global_scope() place(Place): Place object, default is fluid.CPUPlace() param_type(str): param data type, default is int64 Examples: set_zero(myvar.name, myscope) """ param = scope.var(var_name).get_tensor() param_array = np.zeros(param._get_dims()).astype(param_type) param.set(param_array, place)
def test_sensitivity(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 1, 28, 28]) label = fluid.data(name="label", shape=[None, 1], dtype="int64") conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") out = fluid.layers.fc(conv6, size=10, act='softmax') acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) eval_program = main_program.clone(for_test=True) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) def eval_func(program, scope): feeder = fluid.DataFeeder(feed_list=['image', 'label'], place=place, program=program) acc_set = [] for data in val_reader(): acc_np = exe.run(program=program, scope=scope, feed=feeder.feed(data), fetch_list=[acc_top1]) acc_set.append(float(acc_np[0])) acc_val_mean = numpy.array(acc_set).mean() print("acc_val_mean: {}".format(acc_val_mean)) return acc_val_mean sensitivity(eval_program, fluid.global_scope(), place, ["conv4_weights"], eval_func, "./sensitivities_file")
def test_single_pickle_var_static(self): # enable static mode paddle.enable_static() with new_program_scope(): # create network x = paddle.static.data(name="x", shape=[None, IMAGE_SIZE], dtype='float32') z = paddle.static.nn.fc(x, 128) loss = fluid.layers.reduce_mean(z) place = fluid.CPUPlace( ) if not paddle.fluid.core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) prog = paddle.static.default_main_program() for var in prog.list_vars(): if list(var.shape) == [IMAGE_SIZE, 128]: tensor = var.get_value() break scope = fluid.global_scope() origin_tensor = np.array(tensor) path = 'test_single_pickle_var_static/var' paddle.save(tensor, path) self.set_zero(prog, place, scope) # static load lod_static = paddle.load(path) np_static = paddle.load(path, return_numpy=True) # set_tensor(np.ndarray) var.set_value(np_static, scope) self.assertTrue(np.array_equal(origin_tensor, np.array(tensor))) # set_tensor(LoDTensor) self.set_zero(prog, place, scope) var.set_value(lod_static, scope) self.assertTrue(np.array_equal(origin_tensor, np.array(tensor))) # enable dygraph mode paddle.disable_static() var_dygraph = paddle.load(path) np_dygraph = paddle.load(path, return_numpy=True) self.assertTrue(np.array_equal(np.array(tensor), np_dygraph)) self.assertTrue(np.array_equal(np.array(tensor), var_dygraph.numpy()))
def test_2ps_0_load(self): # init No.1 server env env = {} env["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:4001,127.0.0.1:4002" env["PADDLE_TRAINERS_NUM"] = str(2) env["TRAINING_ROLE"] = "PSERVER" env["PADDLE_PORT"] = "4002" env["POD_IP"] = "127.0.0.1" for k, v in env.items(): os.environ[k] = str(v) """ array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3], [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4], [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6], [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7], [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8], [0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9]]) """ emb_array = np.arange(0, 1, 0.1).repeat(10).reshape(10, 10) fc_array = np.arange(0, 1, 0.1).repeat(10).reshape(10, 10) model_path = self.save_origin_model(emb_array, fc_array) startup_program = fluid.framework.Program() test_program = fluid.framework.Program() role = role_maker.PaddleCloudRoleMaker() fleet.init(role) loss = self.net(emb_array, fc_array) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True optimizer = fluid.optimizer.Adam(1e-3) optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) fleet.init_server(model_path) emb = np.array(fluid.global_scope().find_var("embedding.block1") .get_tensor()) assert emb.all() == emb_array[1::2].all() shutil.rmtree(model_path)
def load(self, data_path, exe=None, place=None, ignore_missing=False): '''Load network weights. data_path: The path to the numpy-serialized network weights ignore_missing: If true, serialized weights for missing layers are ignored. ''' fluid = import_fluid() #load fluid mode directly if os.path.isdir(data_path): assert (exe is not None), \ 'must provide a executor to load fluid model' fluid.io.load_persistables(executor=exe, dirname=data_path) return True #load model from a npy file if exe is None or place is None: if self.paddle_env is None: place = fluid.CPUPlace() exe = fluid.Executor(place) self.paddle_env = {'place': place, 'exe': exe} exe = exe.run(fluid.default_startup_program()) else: place = self.paddle_env['place'] exe = self.paddle_env['exe'] data_dict = np.load(data_path).item() for op_name in data_dict: if op_name == 'caffe2fluid_name_trace': self.name_trace = data_dict[op_name] continue layer = self.layers[op_name] for param_name, data in data_dict[op_name].iteritems(): try: name = '%s_%s' % (op_name, param_name) v = fluid.global_scope().find_var(name) w = v.get_tensor() w.set(data.reshape(w.shape()), place) except ValueError: if not ignore_missing: raise return True
def _get_param_all_shares(param_name, share_dirs, model_file): """ Get all shares of one parameter from directories. Args: param_name: The name of parameter. share_dirs: The directories which storing model shares. model_file: The name of model file. :return: """ exe = fluid.Executor(place=fluid.CPUPlace()) param_shares = [] for share_dir in share_dirs: _ = fluid.io.load_inference_model(dirname=share_dir, executor=exe, model_filename=model_file) g_scope = fluid.global_scope() param = g_scope.find_var(param_name) param_tensor = np.array(param.get_tensor()) param_shares.append(param_tensor) return np.array(param_shares, dtype=np.int64)
def init_params(self, place): """ init embed """ def _load_parameter(pretraining_file, vocab_size, word_emb_dim): pretrain_word2vec = np.zeros([vocab_size, word_emb_dim], dtype=np.float32) for line in open(pretraining_file, 'r'): id, _, vec = line.strip('\r\n').split('\t') pretrain_word2vec[int(id)] = map(float, vec.split()) return pretrain_word2vec embedding_param = fluid.global_scope().find_var( "wordid_embedding").get_tensor() pretrain_word2vec = _load_parameter(self._flags.init_train_params, self._flags.vocab_size, self._flags.emb_dim) embedding_param.set(pretrain_word2vec, place) logging.info("init pretrain word2vec:%s" % self._flags.init_train_params)
def _create_loss_op_desc_(loss): shape = [2, 1] one_share = mdu.aby3_one_share mpc_protocol_index = np.array(fluid.global_scope().find_var("mpc_protocol_index").get_tensor()) if MpcProtocols(mpc_protocol_index) is MpcProtocols.PRIVC: shape = [1] one_share = mdu.privc_one_share op_desc = backward._create_op_desc_( "fill_constant", {}, {"Out": [backward._append_grad_suffix_(loss.name)]}, { "shape": shape, "value": one_share, "dtype": loss.dtype, "force_cpu": False, core.op_proto_and_checker_maker.kOpRoleAttrName(): int(core.op_proto_and_checker_maker.OpRole.Backward) | int(core.op_proto_and_checker_maker.OpRole.Loss), }) return op_desc
def program2onnx(model_dir, save_file, model_filename=None, params_filename=None, opset_version=9, enable_onnx_checker=False): try: import paddle except: logging.error( "paddlepaddle not installed, use \"pip install paddlepaddle\"") v0, v1, v2 = paddle.__version__.split('.') if v0 == '0' and v1 == '0' and v2 == '0': logging.warning("You are use develop version of paddlepaddle") elif int(v0) <= 1 and int(v1) < 8: raise ImportError("paddlepaddle>=1.8.0 is required") import paddle2onnx as p2o # convert model save with 'paddle.fluid.io.save_inference_model' if hasattr(paddle, 'enable_static'): paddle.enable_static() exe = fluid.Executor(fluid.CPUPlace()) if model_filename is None and params_filename is None: [program, feed_var_names, fetch_vars] = fluid.io.load_inference_model( model_dir, exe) else: [program, feed_var_names, fetch_vars] = fluid.io.load_inference_model( model_dir, exe, model_filename=model_filename, params_filename=params_filename) p2o.program2onnx( program, fluid.global_scope(), save_file, feed_var_names=feed_var_names, target_vars=fetch_vars, opset_version=opset_version, enable_onnx_checker=enable_onnx_checker)
def train_loop(exe, trainer_prog, trainer_id=0, reader=train_reader): embedding_name = 'emb' embedding_param = fluid.global_scope().find_var( embedding_name).get_tensor() embedding_param.set(word_vector_values, place) batch_id = 0 for pass_id in xrange(num_passes): chunk_evaluator.reset(exe) start_time = time.time() with profiler.profiler( "CPU", 'total', profile_path="/usr/local/nvidia/lib64/tmp") as prof: for data in reader(): cost, batch_precision, batch_recall, batch_f1_score = exe.run( trainer_prog, feed=feeder.feed(data), fetch_list=[avg_cost] + chunk_evaluator.metrics) if batch_id % 5 == 0: print("Pass " + str(pass_id) + ", Batch " + str(batch_id) + ", Cost " + str(cost[0]) + ", Precision " + str(batch_precision[0]) + ", Recall " + str(batch_recall[0]) + ", F1_score" + str(batch_f1_score[0])) batch_id = batch_id + 1 pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval( exe) spent = time.time() - start_time print("pass_id: %d, precision: %f, recall: %f, f1: %f, spent: %f, speed: %f" % \ (pass_id, pass_precision, pass_recall, pass_f1_score, spent, 14987.0 / spent)) pass_precision, pass_recall, pass_f1_score = test( exe, chunk_evaluator, inference_program, test_reader, place) print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str(pass_precision) + " pass_recall:" + str(pass_recall) + " pass_f1_score:" + str(pass_f1_score))
def train_loop(main_program): exe.run(fluid.default_startup_program()) embedding_param = fluid.global_scope().find_var( embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) start_time = time.time() batch_id = 0 for pass_id in range(PASS_NUM): for data in train_data(): cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) cost = cost[0] if batch_id % 10 == 0: print("avg_cost:" + str(cost)) if batch_id != 0: print("second per batch: " + str((time.time() - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(cost) < 80.0: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model( save_dirname, [ 'word_data', 'verb_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', 'ctx_p2_data', 'mark_data' ], [feature_out], exe) return batch_id = batch_id + 1 raise RuntimeError( "This model should save_inference_model and return, but not reach here, please check!" )
def from_pretrained(self, embeds, place, scale=0.05): assert len(embeds) == self.num_embeddings assert len(embeds[0]) == self.embedding_dim embeds = np.array(embeds, dtype='float32') num_known = 0 for i in range(len(embeds)): if np.all(embeds[i] == 0): embeds[i] = np.random.uniform(low=-scale, high=scale, size=self.embedding_dim) else: num_known += 1 if self.padding_idx is not None: embeds[self.padding_idx] = 0 embedding_param = fluid.global_scope().find_var( self.param_attr.name).get_tensor() embedding_param.set(embeds, place) print("{} words have pretrained embeddings ".format(num_known) + "(coverage: {:.3f})".format(num_known / self.num_embeddings))
def resave_model(feed_kv): ops = prog.current_block().ops vars = prog.current_block().vars # 强制所有var为可持久化 p_names = [] for name in vars: name = str(name) v = fluid.framework._get_var(name, prog) if not v.persistable: v.persistable = True p_names.append(name) outputs = run_model(feed_kv=feed_kv) has_found_wrong_shape = False # 修正每个var的形状 for name in vars: name = str(name) v = vars[name] if v.persistable: v1 = fluid.global_scope().find_var(name) try: t1 = v1.get_tensor() shape = t1.shape() except Exception as e: print_e(e) continue if v.desc.shape() != shape: has_found_wrong_shape = True v.desc.set_shape(shape) # 恢复var的可持久化属性 for name in p_names: v = fluid.framework._get_var(name, prog) v.persistable = False if has_found_wrong_shape: pp_red("has found wrong shape", 1) else: pp_green("has not found wrong shape", 1) pp_green( "new model is saved into directory 【{}】".format(checked_model_path), 1)
def __init__(self, conf, npz_config, scope=None, cell_type='gru', embed_regular=0.0, output_type='click', output_dim=2): super(UniRNN, self).__init__() self.conf = conf self.npz_config = npz_config self.data_attributes = conf.data_attributes # feature related initialization self.item_slot_names = conf.item_slot_names self.recent_slot_names = conf.recent_slot_names self.label_slot_names = conf.label_slot_names self.shared_embedding_names = conf.shared_embedding_names self._cell_type = cell_type self._embed_regular = embed_regular self._output_type = output_type self._output_dim = output_dim self.hidden_size = 32 assert self._output_type in ['click', 'credit', 'click_credit', 'rate'], (self._output_type) self.scope = fluid.global_scope() if scope is None else scope with fluid.scope_guard(scope): with fluid.unique_name.guard(): self._create_params()
def extract_weights(args): # add ERNIR to environment print('extract weights start'.center(60, '=')) startup_prog = fluid.Program() test_prog = fluid.Program() place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) args.max_seq_len = 512 args.use_fp16 = False args.num_labels = 2 args.loss_scaling = 1.0 print('model config:') ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): _, _ = create_model(args, pyreader_name='train', ernie_config=ernie_config) fluid.io.load_vars(exe, args.init_pretraining_params, main_program=test_prog, predicate=if_exist) state_dict = collections.OrderedDict() weight_map = build_weight_map() for ernie_name, pytorch_name in weight_map.items(): fluid_tensor = fluid.global_scope().find_var(ernie_name).get_tensor() fluid_array = np.array(fluid_tensor) if 'w_0' in ernie_name: fluid_array = fluid_array.transpose() state_dict[pytorch_name] = fluid_array print(f'{ernie_name} -> {pytorch_name} {fluid_array.shape}') print('extract weights done!'.center(60, '=')) return state_dict
def test_with_asp_and_pure_fp16(self): fleet.init(is_collective=True) train_prog, startup_prog = fluid.Program(), fluid.Program() with paddle.static.amp.fp16_guard(): avg_cost, strategy, \ input_x, input_y = self.net(train_prog, startup_prog) strategy.amp = True strategy.amp_configs = {'use_pure_fp16': True} with fluid.program_guard(train_prog, startup_prog): with paddle.static.amp.fp16_guard(): optimizer = optimizer = paddle.optimizer.Momentum( learning_rate=0.01, multi_precision=True) optimizer = fleet.distributed_optimizer( optimizer, strategy=strategy) optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[input_x, input_y], place=place) exe.run(startup_prog) optimizer.amp_init(place) sparsity.prune_model(train_prog) data = (np.random.randn(64, 32), np.random.randint(2, size=(64, 1))) exe.run(train_prog, feed=feeder.feed([data])) for param in train_prog.global_block().all_parameters(): if ASPHelper._is_supported_layer(train_prog, param.name): mat = np.array(fluid.global_scope().find_var(param.name) .get_tensor()) self.assertTrue( paddle.fluid.contrib.sparsity.check_sparsity( mat.T, n=2, m=4))
def convert_paddle_to_dict(args: object, dict_path: str) -> None: check_exists(args.init_checkpoint) check_exists(args.ernie_config_path) ernie_config = ErnieConfig(args.ernie_config_path) # ernie_config.print_config() place = fluid.CPUPlace() exe = fluid.Executor(place) startup_prog = fluid.Program() test_program = fluid.Program() with fluid.program_guard(test_program, startup_prog): with fluid.unique_name.guard(): _, _ = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config, is_classify=True) exe.run(startup_prog) init_pretraining_params( exe, args.init_checkpoint, main_program=test_program, use_fp16=args.use_fp16) name2params = {} prefix = args.init_checkpoint for var in startup_prog.list_vars(): path = os.path.join(prefix, var.name) if os.path.exists(path): cur_tensor = fluid.global_scope().find_var(var.name).get_tensor() name2params[var.name] = np.array(cur_tensor) joblib.dump(name2params, dict_path)
def shrink_dense_table(self, decay, emb_dim=11, scope=None, table_id=None): """ shrink batch_sum in pserver by multiplying by decay Args: decay(float): the decay rate, usually range in (0, 1) emb_dim(int): one element's length in datanorm layer scope(Scope): Scope object, default is fluid.global_scope() table_id(int): table id of shrinking dense table. None means shrink all, you should specify it when using multiple scopes, default is None. Example: >>> fleet.shrink_dense_table(0.98, 11, myscope1, 1) >>> fleet.shrink_dense_table(0.98, 11, myscope1, 2) >>> fleet.shrink_dense_table(0.98, 11, myscope2, 3) """ if scope is None: scope = fluid.global_scope() self._role_maker._barrier_worker() if self._role_maker.is_first_worker(): for tp in self._opt_info["fleet_desc"].trainer_param: for i in tp.dense_table: if table_id is not None and table_id != i.table_id: continue var_list = [var for var in i.dense_variable_name] skip = False for var in var_list: if scope.find_var(var) is None: skip = True break if skip: continue self._fleet_ptr.shrink_dense_table(i.table_id, scope, var_list, decay, emb_dim) self._role_maker._barrier_worker()
def get_emb_numpy(tree_node_num, node_emb_size, init_model_path=""): all_nodes = fluid.layers.data( name="all_nodes", shape=[-1, 1], dtype="int64", lod_level=1, ) output = fluid.layers.embedding( input=all_nodes, is_sparse=True, size=[tree_node_num, node_emb_size], param_attr=fluid.ParamAttr( name="TDM_Tree_Emb", initializer=paddle.fluid.initializer.UniformInitializer())) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if init_model_path != "": fluid.io.load_persistables(exe, init_model_path) return np.array(fluid.global_scope().find_var("TDM_Tree_Emb").get_tensor())
def residual_block_quant(self, activation_quant_type, weight_quantize_type, quantizable_op_type, for_ci=True): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): loss = residual_block(2) opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) place = fluid.CPUPlace() graph = IrGraph(core.Graph(main.desc), for_test=False) transform_pass = QuantizationTransformPass( scope=fluid.global_scope(), place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quantize_type, quantizable_op_type=quantizable_op_type) transform_pass.apply(graph) if not for_ci: marked_nodes = set() for op in graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) graph.draw('.', 'quantize_residual_' + activation_quant_type, marked_nodes) program = graph.to_program() self.check_program(program) val_graph = IrGraph(core.Graph(program.desc), for_test=False) if not for_ci: val_marked_nodes = set() for op in val_graph.all_op_nodes(): if op.name().find('quantize') > -1: val_marked_nodes.add(op) val_graph.draw('.', 'val_residual_' + activation_quant_type, val_marked_nodes)