paddle.enable_static() inp_blob = np.random.randn(1, 3, 4, 4).astype(np.float32) x = fluid.data(name='xxx', shape=[1, 3, 4, 4], dtype='float32') test_layer = fluid.layers.conv2d(input=x, num_filters=5, filter_size=(1, 1), stride=(1, 1), padding=(1, 1), dilation=(1, 1), groups=1, bias_attr=False) relu = fluid.layers.relu(test_layer) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) inp_dict = {'xxx': inp_blob} var = [relu] res_paddle = exe.run(fluid.default_main_program(), fetch_list=var, feed=inp_dict) fluid.io.save_inference_model(os.path.join(sys.argv[1], "conv2d_relu"), list(inp_dict.keys()), var, exe, model_filename="conv2d_relu.pdmodel", params_filename="conv2d_relu.pdiparams")
import paddle.fluid as fluid import paddle import numpy as np from PIL import Image import matplotlib.pyplot as plt from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] # 用来显示中文 path = "./" params_dirname = path + "test.inference.model" print("训练后文件夹路径" + params_dirname) # 参数初始化 # place = fluid.CUDAPlace(0) place = fluid.CPUPlace() exe = fluid.Executor(place) # 加载数据 - 数据请在https://github.com/GT-ZhangAcer/Paddle_Example下找到mini_classify_data.zip并解压 datatype = 'float32' with open(path + "data/ocrData.txt", 'rt') as f: a = f.read() def data_reader(): def reader(): for i in range(1, 800): im = Image.open(path + "data/" + str(i) + ".jpg").convert('L') im = np.array(im).reshape(1, 1, 30, 15).astype(np.float32) im = im / 255.0 * 2.0 - 1.0 ''' img = paddle.dataset.image.load_image(path + "data/" + str(i+1) + ".jpg")'''
def train(args): """ Train Program """ if not os.path.exists(args.save_path): os.makedirs(args.save_path) # data data_config data_conf = { "batch_size": args.batch_size, "max_turn_num": args.max_turn_num, "max_turn_len": args.max_turn_len, "_EOS_": args._EOS_, } dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size, args.emb_size, args.stack_num, args.channel1_num, args.channel2_num) train_program = fluid.Program() train_startup = fluid.Program() if "CE_MODE_X" in os.environ: train_program.random_seed = 110 train_startup.random_seed = 110 with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): if args.use_pyreader: train_pyreader = dam.create_py_reader(capacity=10, name='train_reader') else: dam.create_data_layers() loss, logits = dam.create_network() loss.persistable = True logits.persistable = True # gradient clipping fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0)) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=400, decay_rate=0.9, staircase=True)) optimizer.minimize(loss) print("begin memory optimization ...") print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.memory_optimize(train_program) print("end memory optimization ...") print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) test_program = fluid.Program() test_startup = fluid.Program() if "CE_MODE_X" in os.environ: test_program.random_seed = 110 test_startup.random_seed = 110 with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): if args.use_pyreader: test_pyreader = dam.create_py_reader(capacity=10, name='test_reader') else: dam.create_data_layers() loss, logits = dam.create_network() loss.persistable = True logits.persistable = True test_program = test_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("device count %d" % dev_count) print("theoretical memory usage: ") print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size)) exe = fluid.Executor(place) exe.run(train_startup) exe.run(test_startup) train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program) test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_program, share_vars_from=train_exe) if args.word_emb_init is not None: print("start loading word embedding init ...") if six.PY2: word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype('float32') else: word_emb = np.array( pickle.load(open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32') dam.set_word_embedding(word_emb, place) print("finish init word embedding ...") print("start loading data ...") with open(args.data_path, 'rb') as f: if six.PY2: train_data, val_data, test_data = pickle.load(f) else: train_data, val_data, test_data = pickle.load(f, encoding="bytes") print("finish loading data ...") val_batches = reader.build_batches(val_data, data_conf) batch_num = len(train_data[six.b('y')]) // args.batch_size val_batch_num = len(val_batches["response"]) print_step = max(1, batch_num // (dev_count * 100)) save_step = max(1, batch_num // (dev_count * 10)) print("begin model training ...") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) def train_with_feed(step): """ Train on one epoch data by feeding """ ave_cost = 0.0 for it in six.moves.xrange(batch_num // dev_count): feed_list = [] for dev in six.moves.xrange(dev_count): index = it * dev_count + dev batch_data = reader.make_one_batch_input(train_batches, index) feed_dict = dict(zip(dam.get_feed_names(), batch_data)) feed_list.append(feed_dict) cost = train_exe.run(feed=feed_list, fetch_list=[loss.name]) ave_cost += np.array(cost[0]).mean() step = step + 1 if step % print_step == 0: print("processed: [" + str(step * dev_count * 1.0 / batch_num) + "] ave loss: [" + str(ave_cost / print_step) + "]") ave_cost = 0.0 if (args.save_path is not None) and (step % save_step == 0): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, 'score.' + str(step)) test_with_feed(test_exe, test_program, dam.get_feed_names(), [logits.name], score_path, val_batches, val_batch_num, dev_count) result_file_path = os.path.join(args.save_path, 'result.' + str(step)) evaluate(score_path, result_file_path) return step, np.array(cost[0]).mean() def train_with_pyreader(step): """ Train on one epoch with pyreader """ def data_provider(): """ Data reader """ for index in six.moves.xrange(batch_num): yield reader.make_one_batch_input(train_batches, index) train_pyreader.decorate_tensor_provider(data_provider) ave_cost = 0.0 train_pyreader.start() while True: try: cost = train_exe.run(fetch_list=[loss.name]) ave_cost += np.array(cost[0]).mean() step = step + 1 if step % print_step == 0: print("processed: [" + str(step * dev_count * 1.0 / batch_num) + "] ave loss: [" + str(ave_cost / print_step) + "]") ave_cost = 0.0 if (args.save_path is not None) and (step % save_step == 0): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, 'score.' + str(step)) test_with_pyreader(test_exe, test_program, test_pyreader, [logits.name], score_path, val_batches, val_batch_num, dev_count) result_file_path = os.path.join(args.save_path, 'result.' + str(step)) evaluate(score_path, result_file_path) except fluid.core.EOFException: train_pyreader.reset() break return step, np.array(cost[0]).mean() # train over different epoches global_step, train_time = 0, 0.0 for epoch in six.moves.xrange(args.num_scan_data): shuffle_train = reader.unison_shuffle( train_data, seed=110 if ("CE_MODE_X" in os.environ) else None) train_batches = reader.build_batches(shuffle_train, data_conf) begin_time = time.time() if args.use_pyreader: global_step, last_cost = train_with_pyreader(global_step) else: global_step, last_cost = train_with_feed(global_step) pass_time_cost = time.time() - begin_time train_time += pass_time_cost print("Pass {0}, pass_time_cost {1}".format( epoch, "%2.2f sec" % pass_time_cost)) # For internal continuous evaluation if "CE_MODE_X" in os.environ: card_num = get_cards() print("kpis\ttrain_cost_card%d\t%f" % (card_num, last_cost)) print("kpis\ttrain_duration_card%d\t%f" % (card_num, train_time))
def __init__(self, program, input_name, logits_name, predict_name, cost_name, bounds, channel_axis=3, preprocess=None): if preprocess is None: preprocess = (0, 1) super(PaddleModel, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocess=preprocess) #用于计算梯度 self._program = program #仅用于预测 self._predict_program = program.clone(for_test=True) self._place = fluid.CUDAPlace(0) if with_gpu else fluid.CPUPlace() self._exe = fluid.Executor(self._place) self._input_name = input_name self._logits_name = logits_name self._predict_name = predict_name self._cost_name = cost_name #change all `is_test` attributes to True 使_program只计算梯度 不自动更新参数 单纯clone后不计算梯度的 import six """ for i in six.moves.range(self._program.desc.num_blocks()): block = self._program.desc.block(i) for j in six.moves.range(block.op_size()): op = block.op(j) if op.has_attr('is_test') and op.type != 'batch_norm_grad': # 兼容旧版本 paddle if hasattr(op, 'set_attr'): op.set_attr('is_test', True) else: op._set_attr('is_test', True) """ for op in self._program.block(0).ops: #print("op type is {}".format(op.type)) if op.type in ["batch_norm"]: # 兼容旧版本 paddle if hasattr(op, 'set_attr'): op.set_attr('is_test', False) op.set_attr('use_global_stats', True) else: op._set_attr('is_test', False) op._set_attr('use_global_stats', True) op.desc.check_attrs() # gradient loss = self._program.block(0).var(self._cost_name) param_grads = fluid.backward.append_backward( loss, parameter_list=[self._input_name]) #self._gradient = filter(lambda p: p[0].name == self._input_name, # param_grads)[0][1] self._gradient = param_grads[0][1]
def train(args, config, train_params, train_file_list): batch_size = train_params["batch_size"] epoc_num = train_params["epoc_num"] optimizer_method = train_params["optimizer_method"] use_pyramidbox = train_params["use_pyramidbox"] use_gpu = args.use_gpu model_save_dir = args.model_save_dir pretrained_model = args.pretrained_model devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) batch_size_per_device = batch_size // devices_num iters_per_epoc = train_params["train_images"] // batch_size num_workers = 8 is_shuffle = True startup_prog = fluid.Program() train_prog = fluid.Program() #only for ce if args.enable_ce: is_shuffle = False SEED = 102 startup_prog.random_seed = SEED train_prog.random_seed = SEED num_workers = 1 pretrained_model = "" if args.batch_num != None: iters_per_epoc = args.batch_num train_py_reader, fetches, loss = build_program(train_params=train_params, main_prog=train_prog, startup_prog=startup_prog, args=args) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) start_epoc = 0 if pretrained_model: if pretrained_model.isdigit(): start_epoc = int(pretrained_model) + 1 pretrained_model = os.path.join(model_save_dir, pretrained_model) print("Resume from %s " % (pretrained_model)) if not os.path.exists(pretrained_model): raise ValueError( "The pre-trained model path [%s] does not exist." % (pretrained_model)) def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) train_reader = reader.train(config, train_file_list, batch_size_per_device, shuffle=is_shuffle, use_multiprocess=args.use_multiprocess, num_workers=num_workers) train_py_reader.decorate_paddle_reader(train_reader) if args.parallel: train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=use_gpu, loss_name=loss.name) def save_model(postfix, program): model_path = os.path.join(model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) print('save models to %s' % (model_path)) fluid.io.save_persistables(exe, model_path, main_program=program) total_time = 0.0 epoch_idx = 0 face_loss = 0 head_loss = 0 for pass_id in range(start_epoc, epoc_num): epoch_idx += 1 start_time = time.time() prev_start_time = start_time end_time = 0 batch_id = 0 train_py_reader.start() while True: try: prev_start_time = start_time start_time = time.time() if args.parallel: fetch_vars = train_exe.run( fetch_list=[v.name for v in fetches]) else: fetch_vars = exe.run(train_prog, fetch_list=fetches) end_time = time.time() fetch_vars = [np.mean(np.array(v)) for v in fetch_vars] face_loss = fetch_vars[0] head_loss = fetch_vars[1] if batch_id % 10 == 0: if not args.use_pyramidbox: print( "Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}". format(pass_id, batch_id, face_loss, start_time - prev_start_time)) else: print("Pass {:d}, batch {:d}, face loss {:.6f}, " \ "head loss {:.6f}, " \ "time {:.5f}".format(pass_id, batch_id, face_loss, head_loss, start_time - prev_start_time)) batch_id += 1 except (fluid.core.EOFException, StopIteration): train_py_reader.reset() break epoch_end_time = time.time() total_time += epoch_end_time - start_time save_model(str(pass_id), train_prog) # only for ce if args.enable_ce: gpu_num = get_cards(args) print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_face_loss_card%s\t%s" % (gpu_num, face_loss)) print("kpis\ttrain_head_loss_card%s\t%s" % (gpu_num, head_loss))
def context(self, trainable=True, pretrained=True): """context for transfer learning. Args: trainable (bool): Set parameters in program to be trainable. pretrained (bool) : Whether to load pretrained model. Returns: inputs (dict): key is 'image', corresponding vaule is image tensor. outputs (dict): key is : 'classification', corresponding value is the result of classification. 'feature_map', corresponding value is the result of the layer before the fully connected layer. context_prog (fluid.Program): program for transfer learning. """ context_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(context_prog, startup_prog): with fluid.unique_name.guard(): image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") mobile_net = MobileNetV2() output, feature_map = mobile_net.net(input=image, class_dim=len( self.label_list), scale=1.0) name_prefix = '@HUB_{}@'.format(self.name) inputs = {'image': name_prefix + image.name} outputs = { 'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name } add_vars_prefix(context_prog, name_prefix) add_vars_prefix(startup_prog, name_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] for key, value in outputs.items() } place = fluid.CPUPlace() exe = fluid.Executor(place) # pretrained if pretrained: def _if_exist(var): b = os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) return b fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) else: exe.run(startup_prog) # trainable for param in context_prog.global_block().iter_parameters(): param.trainable = trainable return inputs, outputs, context_prog
def train_net(train_reader, word_dict, network, use_gpu, parallel, save_dirname, lr=0.002, batch_size=128, pass_num=30): """ train network """ if network == "bilstm_net": network = bilstm_net elif network == "bow_net": network = bow_net elif network == "cnn_net": network = cnn_net elif network == "lstm_net": network = lstm_net elif network == "gru_net": network = gru_net else: print("unknown network type") return # word seq data data = fluid.layers.data(name="words", shape=[1], dtype="int64", lod_level=1) # label data label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost, acc, pred = network(data, label, len(word_dict) + 1) cost = fluid.layers.mean(cost) acc = fluid.layers.mean(acc) # set optimizer sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr) sgd_optimizer.minimize(cost) # set place, executor, datafeeder place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) # initilize parameters exe.run(fluid.default_startup_program()) # parallelize it train_exe = fluid.ParallelExecutor(use_cuda=use_gpu, loss_name=cost.name) \ if args.is_parallel else exe # start training... for pass_id in range(pass_num): data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): # train a batch avg_cost_np, avg_acc_np = train_exe.run( feed=feeder.feed(data), fetch_list=[cost.name, acc.name]) data_size = len(data) total_acc += data_size * np.sum(avg_acc_np) total_cost += data_size * np.sum(avg_cost_np) data_count += data_size * len(avg_acc_np) avg_cost = total_cost / data_count avg_acc = total_acc / data_count print("[train info]: pass_id: %d, avg_acc: %f, avg_cost: %f" % (pass_id, avg_acc, avg_cost)) epoch_model = save_dirname + "/" + "epoch" + str(pass_id) # save the model fluid.io.save_inference_model(epoch_model, ["words"], pred, exe)
def main(use_cuda): """ Advbox demo which demonstrate how to use advbox. """ class_dim = 1000 IMG_NAME = 'img' LABEL_NAME = 'label' #模型路径 http://paddle-imagenet-models.bj.bcebos.com/resnet_50_model.tar 下载并解压 #pretrained_model = "models/resnet_50/115" pretrained_model = "models/alexnet/116/" image_shape = [3, 224, 224] image = fluid.layers.data(name=IMG_NAME, shape=image_shape, dtype='float32') label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') # model definition model = AlexNet() out = model.net(input=image, class_dim=class_dim) # 根据配置选择使用CPU资源还是GPU资源 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) #加载模型参数 if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) logger.info("Load pretrained_model") fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) logging.info("Build advbox") # advbox demo 黑盒攻击 直接传入测试版本的program m = PaddleBlackBoxModel(fluid.default_main_program().clone(for_test=True), IMG_NAME, LABEL_NAME, out.name, (-1, 1), channel_axis=0) #不定向攻击 # 形状为[1,28,28] channel_axis=0 形状为[28,28,1] channel_axis=2 attack = LocalSearchAttack(m) attack_config = {"R": 200, "r": 1.0} test_data = get_image("cat.png") original_data = np.copy(test_data) # 猫对应的标签 imagenet 2012 对应链接https://blog.csdn.net/LegenDavid/article/details/73335578 original_label = None adversary = Adversary(original_data, original_label) logger.info("Non-targeted Attack...") adversary = attack(adversary, **attack_config) if adversary.is_successful(): print('attack success, original_label=%d, adversarial_label=%d' % (adversary.original_label, adversary.adversarial_label)) #对抗样本保存在adversary.adversarial_example adversary_image = np.copy(adversary.adversarial_example) #从[3,224,224]转换成[224,224,3] adversary_image *= img_std adversary_image += img_mean adversary_image = np.array(adversary_image * 255).astype("uint8").transpose([1, 2, 0]) im = Image.fromarray(adversary_image) im.save("adversary_image.jpg") else: print('attack failed, original_label=%d' % (adversary.original_label)) logger.info("LocalSearchAttack attack done")
def do_train(args): train_program = fluid.default_main_program() startup_program = fluid.default_startup_program() dataset = reader.Dataset(args) with fluid.program_guard(train_program, startup_program): train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = creator.create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') test_program = train_program.clone(for_test=True) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) if (dev_count < args.cpu_num): print( "WARNING: The total CPU NUM in this machine is %d, which is less than cpu_num parameter you set. " "Change the cpu_num from %d to %d" % (dev_count, args.cpu_num, dev_count)) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_reader = creator.create_pyreader(args, file_name=args.train_data, feed_list=train_ret['feed_list'], place=place, model='lac', reader=dataset) test_reader = creator.create_pyreader(args, file_name=args.test_data, feed_list=train_ret['feed_list'], place=place, model='lac', reader=dataset, mode='test') exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: utils.init_checkpoint(exe, args.init_checkpoint, train_program) if dev_count > 1: device = "GPU" if args.use_cuda else "CPU" print("%d %s are used to train model" % (dev_count, device)) # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() # exec_strategy.num_threads = dev_count * 6 build_strategy = fluid.compiler.BuildStrategy() # build_strategy.enable_inplace = True compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) # start training num_train_examples = dataset.get_num_examples(args.train_data) max_train_steps = args.epoch * num_train_examples // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) ce_info = [] step = 0 start_time = time.time() for epoch_id in range(args.epoch): ce_time = 0 for data in train_reader(): # this is for minimizing the fetching op, saving the training speed. if step % args.print_steps == 0: fetch_list = [ train_ret["avg_cost"], train_ret["precision"], train_ret["recall"], train_ret["f1_score"] ] else: fetch_list = [] outputs = exe.run( compiled_prog, fetch_list=fetch_list, feed=data[0], ) end_time = time.time() if step % args.print_steps == 0: avg_cost, precision, recall, f1_score = [ np.mean(x) for x in outputs ] print( "[train] step = %d, loss = %.5f, P: %.5f, R: %.5f, F1: %.5f, elapsed time %.5f" % (step, avg_cost, precision, recall, f1_score, end_time - start_time)) start_time = time.time() if step % args.validation_steps == 0: test_process(exe, test_program, test_reader, train_ret) ce_time += end_time - start_time ce_info.append( [ce_time, avg_cost, precision, recall, f1_score]) # save checkpoints if step % args.save_steps == 0 and step != 0: save_path = os.path.join(args.model_save_dir, "step_" + str(step)) fluid.io.save_persistables(exe, save_path, train_program) step += 1 if args.enable_ce: card_num = get_cards() ce_cost = 0 ce_f1 = 0 ce_p = 0 ce_r = 0 ce_time = 0 try: ce_time = ce_info[-2][0] ce_cost = ce_info[-2][1] ce_p = ce_info[-2][2] ce_r = ce_info[-2][3] ce_f1 = ce_info[-2][4] except: print("ce info error") print("kpis\teach_step_duration_card%s\t%s" % (card_num, ce_time)) print("kpis\ttrain_cost_card%s\t%f" % (card_num, ce_cost)) print("kpis\ttrain_precision_card%s\t%f" % (card_num, ce_p)) print("kpis\ttrain_recall_card%s\t%f" % (card_num, ce_r)) print("kpis\ttrain_f1_card%s\t%f" % (card_num, ce_f1))
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config #config = AnalysisConfig(args.model_dir) #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator(input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 for sample in predict_data_generator(): src_ids = sample[0] sent_ids = sample[1] pos_ids = sample[2] task_ids = sample[3] input_mask = sample[4] inputs = [ array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask] ] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time # parse outputs output = outputs[0] log.info(output.name) output_data = output.data.float_data() #assert len(output_data) == args.num_labels * args.batch_size batch_result = np.array(output_data).reshape((-1, args.num_labels)) for single_example_probs in batch_result: log.info("{} example\t{}".format(index, single_example_probs)) index += 1 log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format( index / total_time, total_time, index, args.batch_size))
train_data = np.array(outputs).astype('float32') y_true = np.array(res).astype('float32') #定义网络 x = fluid.layers.data(name="x", shape=[4], dtype='float32') y = fluid.layers.data(name="y", shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) #定义损失函数 cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) #定义优化方法 sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.05) sgd_optimizer.minimize(avg_cost) #参数初始化 cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_startup_program()) ##开始训练,迭代500次 for i in range(500): outs = exe.run(feed={ 'x': train_data, 'y': y_true }, fetch_list=[y_predict.name, avg_cost.name]) if i % 50 == 0: print('iter={:.0f},cost={}'.format(i, outs[1][0])) #存储训练结果 params_dirname = "result" fluid.io.save_inference_model(params_dirname, ['x'], [y_predict], exe) # 开始预测
def main(args): log.info('loading data') dataset = Dataset(args) args.num_class = dataset.num_tasks args.eval_metrics = dataset.eval_metrics args.task_type = dataset.task_type splitted_index = dataset.get_idx_split() train_dataset = Subset(dataset, splitted_index['train']) valid_dataset = Subset(dataset, splitted_index['valid']) test_dataset = Subset(dataset, splitted_index['test']) log.info("preprocess finish") log.info("Train Examples: %s" % len(train_dataset)) log.info("Val Examples: %s" % len(valid_dataset)) log.info("Test Examples: %s" % len(test_dataset)) train_prog = F.Program() startup_prog = F.Program() if args.use_cuda: dev_list = F.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = F.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) # dev_count = args.cpu_num log.info("building model") with F.program_guard(train_prog, startup_prog): with F.unique_name.guard(): graph_model = getattr(Model, args.model_type)(args, dataset) train_ds = GraphDataloader(train_dataset, graph_model.graph_wrapper, batch_size=args.batch_size) num_train_examples = len(train_dataset) max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) scheduled_lr, loss_scaling = optimization( loss=graph_model.loss, warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_prog, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=False, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) test_prog = F.Program() with F.program_guard(test_prog, startup_prog): with F.unique_name.guard(): _graph_model = getattr(Model, args.model_type)(args, dataset) test_prog = test_prog.clone(for_test=True) valid_ds = GraphDataloader(valid_dataset, graph_model.graph_wrapper, batch_size=args.batch_size, shuffle=False) test_ds = GraphDataloader(test_dataset, graph_model.graph_wrapper, batch_size=args.batch_size, shuffle=False) exe = F.Executor(place) exe.run(startup_prog) for init in graph_model.init_vars: init(place) for init in _graph_model.init_vars: init(place) if args.init_pretraining_params is not None: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog) nccl2_num_trainers = 1 nccl2_trainer_id = 0 if dev_count > 1: exec_strategy = F.ExecutionStrategy() exec_strategy.num_threads = dev_count train_exe = F.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_model.loss.name, exec_strategy=exec_strategy, main_program=train_prog, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) test_exe = exe else: train_exe, test_exe = exe, exe evaluator = Evaluator(args.dataset_name) train_and_evaluate(exe=exe, train_exe=train_exe, valid_exe=test_exe, train_ds=train_ds, valid_ds=valid_ds, test_ds=test_ds, train_prog=train_prog, valid_prog=test_prog, args=args, dev_count=dev_count, evaluator=evaluator, model=graph_model)
def build_model(self): data_shape = [None, 3, self.cfg.image_size, self.cfg.image_size] image_real = fluid.data(name='image_real', shape=data_shape, dtype='float32') label_org = fluid.data(name='label_org', shape=[None, self.cfg.c_dim], dtype='float32') label_trg = fluid.data(name='label_trg', shape=[None, self.cfg.c_dim], dtype='float32') # used for continuous evaluation if self.cfg.enable_ce: fluid.default_startup_program().random_seed = 90 py_reader = fluid.io.PyReader( feed_list=[image_real, label_org, label_trg], capacity=128, iterable=True, use_double_buffer=True) gen_trainer = GTrainer(image_real, label_org, label_trg, self.cfg, self.batch_num) dis_trainer = DTrainer(image_real, label_org, label_trg, self.cfg, self.batch_num) # prepare environment place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace() py_reader.decorate_batch_generator( self.train_reader, places=fluid.cuda_places() if self.cfg.use_gpu else fluid.cpu_places()) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if self.cfg.init_model: utility.init_checkpoints(self.cfg, exe, gen_trainer, "net_G") utility.init_checkpoints(self.cfg, exe, dis_trainer, "net_D") ### memory optim build_strategy = fluid.BuildStrategy() gen_trainer_program = fluid.CompiledProgram( gen_trainer.program).with_data_parallel( loss_name=gen_trainer.g_loss.name, build_strategy=build_strategy) dis_trainer_program = fluid.CompiledProgram( dis_trainer.program).with_data_parallel( loss_name=dis_trainer.d_loss.name, build_strategy=build_strategy) # used for continuous evaluation if self.cfg.enable_ce: gen_trainer_program.random_seed = 90 dis_trainer_program.random_seed = 90 t_time = 0 total_train_batch = 0 # used for benchmark for epoch_id in range(self.cfg.epoch): batch_id = 0 for data in py_reader(): if self.cfg.max_iter and total_train_batch == self.cfg.max_iter: # used for benchmark return s_time = time.time() d_loss_real, d_loss_fake, d_loss, d_loss_cls, d_loss_gp = exe.run( dis_trainer_program, fetch_list=[ dis_trainer.d_loss_real, dis_trainer.d_loss_fake, dis_trainer.d_loss, dis_trainer.d_loss_cls, dis_trainer.d_loss_gp ], feed=data) # optimize the generator network if (batch_id + 1) % self.cfg.n_critic == 0: g_loss_fake, g_loss_rec, g_loss_cls, fake_img, rec_img = exe.run( gen_trainer_program, fetch_list=[ gen_trainer.g_loss_fake, gen_trainer.g_loss_rec, gen_trainer.g_loss_cls, gen_trainer.fake_img, gen_trainer.rec_img ], feed=data) print("epoch{}: batch{}: \n\ g_loss_fake: {}; g_loss_rec: {}; g_loss_cls: {}". format(epoch_id, batch_id, g_loss_fake[0], g_loss_rec[0], g_loss_cls[0])) batch_time = time.time() - s_time t_time += batch_time if (batch_id + 1) % self.cfg.print_freq == 0: print("epoch{}: batch{}: \n\ d_loss_real: {}; d_loss_fake: {}; d_loss_cls: {}; d_loss_gp: {} \n\ Batch_time_cost: {}".format(epoch_id, batch_id, d_loss_real[0], d_loss_fake[0], d_loss_cls[0], d_loss_gp[0], batch_time)) sys.stdout.flush() batch_id += 1 # used for ce if self.cfg.enable_ce and batch_id == 100: break total_train_batch += 1 # used for benchmark # profiler tools if self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq: profiler.reset_profiler() elif self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq + 5: return if self.cfg.run_test: image_name = fluid.data(name='image_name', shape=[None, self.cfg.n_samples], dtype='int32') test_py_reader = fluid.io.PyReader( feed_list=[image_real, label_org, label_trg, image_name], capacity=32, iterable=True, use_double_buffer=True) test_py_reader.decorate_batch_generator( self.test_reader, places=fluid.cuda_places() if self.cfg.use_gpu else fluid.cpu_places()) test_program = gen_trainer.infer_program utility.save_test_image(epoch_id, self.cfg, exe, place, test_program, gen_trainer, test_py_reader) if self.cfg.save_checkpoints: utility.checkpoints(epoch_id, self.cfg, exe, gen_trainer, "net_G") utility.checkpoints(epoch_id, self.cfg, exe, dis_trainer, "net_D") # used for continuous evaluation if self.cfg.enable_ce: device_num = fluid.core.get_cuda_device_count( ) if self.cfg.use_gpu else 1 print("kpis\tstargan_g_loss_fake_card{}\t{}".format( device_num, g_loss_fake[0])) print("kpis\tstargan_g_loss_rec_card{}\t{}".format( device_num, g_loss_rec[0])) print("kpis\tstargan_g_loss_cls_card{}\t{}".format( device_num, g_loss_cls[0])) print("kpis\tstargan_d_loss_real_card{}\t{}".format( device_num, d_loss_real[0])) print("kpis\tstargan_d_loss_fake_card{}\t{}".format( device_num, d_loss_fake[0])) print("kpis\tstargan_d_loss_cls_card{}\t{}".format( device_num, d_loss_cls[0])) print("kpis\tstargan_d_loss_gp_card{}\t{}".format( device_num, d_loss_gp[0])) print("kpis\tstargan_Batch_time_cost_card{}\t{}".format( device_num, batch_time))
def main(use_cuda): """ Advbox demo which demonstrate how to use advbox. """ TOTAL_NUM = 500 IMG_NAME = 'img' LABEL_NAME = 'label' img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') # gradient should flow img.stop_gradient = False label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') logits = mnist_cnn_model(img) cost = fluid.layers.cross_entropy(input=logits, label=label) avg_cost = fluid.layers.mean(x=cost) #根据配置选择使用CPU资源还是GPU资源 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) BATCH_SIZE = 1 test_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.test(), buf_size=128 * 10), batch_size=BATCH_SIZE) fluid.io.load_params(exe, "./mnist-gad/", main_program=fluid.default_main_program()) # advbox demo m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (-1, 1), channel_axis=1) #使用静态FGSM epsilon不可变 attack = FGSM_static(m) attack_config = {"epsilon": 0.01} # use test data to generate adversarial examples total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 adversary = Adversary(data[0][0], data[0][1]) # FGSM non-targeted attack adversary = attack(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 #print( # 'attack success, original_label=%d, adversarial_label=%d, count=%d' # % (data[0][1], adversary.adversarial_label, total_count)) else: logger.info('attack failed, original_label=%d, count=%d' % (data[0][1], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("fgsm attack done with GaussianAugmentationDefence") #攻击未被加固的模型 fluid.io.load_params(exe, "./mnist/", main_program=fluid.default_main_program()) # advbox demo m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (-1, 1), channel_axis=1) #使用静态FGSM epsilon不可变 attack = FGSM_static(m) attack_config = {"epsilon": 0.01} # use test data to generate adversarial examples total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 adversary = Adversary(data[0][0], data[0][1]) # FGSM non-targeted attack adversary = attack(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 #print( # 'attack success, original_label=%d, adversarial_label=%d, count=%d' # % (data[0][1], adversary.adversarial_label, total_count)) else: logger.info('attack failed, original_label=%d, count=%d' % (data[0][1], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("fgsm attack done without any defence")
def main(args): """"Main function.""" dataset = load(args.dataset) # normalize indegree = dataset.graph.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1) data = expand_data_dim(dataset) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() precompute_program = fluid.Program() startup_program = fluid.Program() train_program = fluid.Program() val_program = train_program.clone(for_test=True) test_program = train_program.clone(for_test=True) # precompute message passing and gather initializer = [] with fluid.program_guard(precompute_program, startup_program): gw = pgl.graph_wrapper.StaticGraphWrapper(name="graph", place=place, graph=dataset.graph) cached_h = MessagePassing(gw, gw.node_feat["words"], num_layers=args.num_layers, norm=gw.node_feat['norm']) train_cached_h, init = pre_gather(cached_h, 'train', data['train_index']) initializer.append(init) val_cached_h, init = pre_gather(cached_h, 'val', data['val_index']) initializer.append(init) test_cached_h, init = pre_gather(cached_h, 'test', data['test_index']) initializer.append(init) exe = fluid.Executor(place) gw.initialize(place) for init in initializer: init(place) # get train features, val features and test features np_train_cached_h, np_val_cached_h, np_test_cached_h = exe.run( precompute_program, feed={}, fetch_list=[train_cached_h, val_cached_h, test_cached_h], return_numpy=True) initializer = [] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): train_handle = calculate_loss('train', np_train_cached_h, data['train_label'], dataset.num_classes, args) initializer += train_handle['initializer'] adam = fluid.optimizer.Adam( learning_rate=args.lr, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=args.weight_decay)) adam.minimize(train_handle['loss']) with fluid.program_guard(val_program, startup_program): with fluid.unique_name.guard(): val_handle = calculate_loss('val', np_val_cached_h, data['val_label'], dataset.num_classes, args) initializer += val_handle['initializer'] with fluid.program_guard(test_program, startup_program): with fluid.unique_name.guard(): test_handle = calculate_loss('test', np_test_cached_h, data['test_label'], dataset.num_classes, args) initializer += test_handle['initializer'] exe.run(startup_program) for init in initializer: init(place) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() train_loss_t = exe.run(train_program, feed={}, fetch_list=[train_handle['loss']], return_numpy=True)[0] if epoch >= 3: time_per_epoch = 1.0 * (time.time() - t0) dur.append(time_per_epoch) val_loss_t, val_acc_t = exe.run( val_program, feed={}, fetch_list=[val_handle['loss'], val_handle['acc']], return_numpy=True) log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) + "Train Loss: %f " % train_loss_t + "Val Loss: %f " % val_loss_t + "Val Acc: %f " % val_acc_t) test_loss_t, test_acc_t = exe.run( test_program, feed={}, fetch_list=[test_handle['loss'], test_handle['acc']], return_numpy=True) log.info("Test Accuracy: %f" % test_acc_t)
def run_test_withlabel(args): out(args.logfile, datetime.datetime.now()) out(args.logfile, "# python3 " + " ".join(sys.argv)) log = args.logfile trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id() ) if trainer_count > 1 else fluid.CUDAPlace(0) out(log, "Loading data...") train_data, val_data = load_train_data() test_data = load_test_label_data() out(log, "Loading model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=0, ) exe = fluid.Executor(place) paddle.enable_static() fluid.io.load_inference_model(args.model_path_base, exe) val_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, val_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) test_reader = fluid.io.batch(reader_creator(args, test_data, seq_vocab, bracket_vocab), batch_size=args.batch_size) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) y = fluid.data(name="label", shape=[None], dtype="float32") predictions = network(seq, dot) loss = fluid.layers.mse_loss(input=predictions, label=y) avg_loss = fluid.layers.mean(loss) main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) feeder = fluid.DataFeeder(place=place, feed_list=[seq, dot, y]) val_results = [] for data in val_reader(): loss, pred = exe.run(test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) loss = np.array(loss) val_results.append(loss[0]) val_loss = sum(val_results) / len(val_results) out( log, "# Dev Average Loss: {:6.4f} (MSE) -> {:6.4f} (RMSD)".format( float(val_loss), math.sqrt(float(val_loss)))) test_results = [] avg_losses = [] for data in test_reader(): loss, pred, gold = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name, y.name], return_numpy=False) loss = np.array(loss) test_results.append(loss[0]) pred = list(np.array(pred)) gold = list(np.array(gold)) """ print("PRED", ["{:5.3f}".format(x) for x in pred[:20]], "...") print("GOLD", ["{:5.3f}".format(x) for x in gold[:20]], "...") MSE = [] for p,g in zip(pred, gold): mse = (p - g) ** 2 MSE.append(mse) avg_mse = sum(MSE) / len(MSE) print("MSE ", ["{:5.3f}".format(x) for x in MSE[:20]], "...") print("AVG LOSS:", avg_mse) print() avg_losses.append(avg_mse) """ test_loss = sum(test_results) / len(test_results) out( log, "# Test Average Loss: {:6.4f} (MSE) -> {:6.4f} (RMSD)".format( float(test_loss), math.sqrt(float(test_loss))))
def main(): if FLAGS.eval is False: raise ValueError( "Currently only supports `--eval==True` while training in `quantization`." ) env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env \ and 'PADDLE_TRAINERS_NUM' in env \ and int(env['PADDLE_TRAINERS_NUM']) > 1 num_trainers = int(env.get('PADDLE_TRAINERS_NUM', 1)) if FLAGS.dist: trainer_id = int(env['PADDLE_TRAINER_ID']) import random local_seed = (99 + trainer_id) random.seed(local_seed) np.random.seed(local_seed) cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') # build program startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['TrainReader']['inputs_def'] feed_vars, train_loader = model.build_inputs(**inputs_def) if FLAGS.use_pact: feed_vars['image'].stop_gradient = False train_fetches = model.train(feed_vars) loss = train_fetches['loss'] lr = lr_builder() optimizer = optim_builder(lr) optimizer.minimize(loss) # parse train fetches train_keys, train_values, _ = parse_fetches(train_fetches) train_values.append(lr) if FLAGS.eval: eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(eval_reader, place) eval_loader.set_sample_list_generator(eval_reader) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] if cfg.metric == 'WIDERFACE': extra_keys = ['im_id', 'im_shape', 'gt_bbox'] eval_keys, eval_values, eval_cls = parse_fetches( fetches, eval_prog, extra_keys) # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True build_strategy.fuse_all_reduce_ops = False # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' sync_bn = False build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 if FLAGS.dist: dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, train_prog) exec_strategy.num_threads = 1 exe.run(startup_prog) not_quant_pattern = [] if FLAGS.not_quant_pattern: not_quant_pattern = FLAGS.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern } ignore_params = cfg.finetune_exclude_pretrained_params \ if 'finetune_exclude_pretrained_params' in cfg else [] fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' if cfg.pretrain_weights and fuse_bn and not ignore_params: checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights) elif cfg.pretrain_weights: checkpoint.load_params(exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params) if FLAGS.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None # insert quantize op in train_prog, return type is CompiledProgram train_prog_quant = quant_aware(train_prog, place, config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) compiled_train_prog = train_prog_quant.with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) if FLAGS.eval: # insert quantize op in eval_prog eval_prog = quant_aware(eval_prog, place, config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_eval_prog = fluid.CompiledProgram(eval_prog) start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg, devices_num=devices_num, num_trainers=num_trainers) # When iterable mode, set set_sample_list_generator(train_reader, place) train_loader.set_sample_list_generator(train_reader) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' train_stats = TrainingStats(cfg.log_iter, train_keys) train_loader.start() start_time = time.time() end_time = time.time() cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) time_stat = deque(maxlen=cfg.log_iter) best_box_ap_list = [0.0, 0] #[map, iter] for it in range(start_iter, cfg.max_iters): start_time = end_time end_time = time.time() time_stat.append(end_time - start_time) time_cost = np.mean(time_stat) eta_sec = (cfg.max_iters - it) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec))) outs = exe.run(compiled_train_prog, fetch_list=train_values) stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( it, np.mean(outs[-1]), logs, time_cost, eta) logger.info(strs) if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \ and (not FLAGS.dist or trainer_id == 0): save_name = str(it) if it != cfg.max_iters - 1 else "model_final" if FLAGS.eval: # evaluation results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg=cfg) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = it save_checkpoint(exe, eval_prog, os.path.join(save_dir, "best_model"), train_prog) logger.info("Best test box ap: {}, in iter: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def run_train(args): out(args.logfile, datetime.datetime.now()) out(args.logfile, "# python3 " + " ".join(sys.argv)) log = args.logfile train_data, val_data = load_train_data() out(log, "# Training set contains {} Sequences.".format(len(train_data))) out(log, "# Validation set contains {} Sequences.".format(len(val_data))) trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id ) if trainer_count > 1 else fluid.CUDAPlace(0) exe = fluid.Executor(place) paddle.enable_static() out(log, "# Paddle: Using device: {}".format(place)) out(log, "# Initializing model...") seq_vocab, bracket_vocab = process_vocabulary(args, train_data) network = Network( seq_vocab, bracket_vocab, dmodel=args.dmodel, layers=args.layers, dropout=args.dropout, ) main_program = fluid.default_main_program() startup_program = fluid.default_startup_program() current_processed, total_processed = 0, 0 check_every = math.floor((len(train_data) / args.checks_per_epoch)) best_dev_loss, best_dev_model_path = np.inf, None start_time = time.time() out( log, "# Checking validation {} times an epoch (every {} batches)".format( args.checks_per_epoch, check_every)) patience = check_every * args.checks_per_epoch * 2 batches_since_dev_update = 0 train_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, train_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) val_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, val_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=1) seq = fluid.data(name="seq", shape=[None], dtype="int64", lod_level=1) dot = fluid.data(name="dot", shape=[None], dtype="int64", lod_level=1) y = fluid.data(name="label", shape=[None], dtype="float32") predictions = network(seq, dot) loss = fluid.layers.mse_loss(input=predictions, label=y) avg_loss = fluid.layers.mean(loss) test_program = main_program.clone(for_test=True) feeder = paddle.fluid.DataFeeder(place=place, feed_list=[seq, dot, y]) learning_rate = 1e-4 beta1 = 0.9 beta2 = 0.999 epsilon = 1e-08 optimizer = fluid.optimizer.Adam( learning_rate=learning_rate, beta1=beta1, beta2=beta2, epsilon=epsilon, ) optimizer.minimize(avg_loss) exe.run(startup_program) exe_test = fluid.Executor(place) start_epoch_index = 1 for epoch in itertools.count(start=start_epoch_index): if epoch >= args.epochs + 1: break train_reader = fluid.io.batch(fluid.io.shuffle(reader_creator( args, train_data, seq_vocab, bracket_vocab), buf_size=500), batch_size=args.batch_size) out(log, "# Epoch {} starting.".format(epoch)) epoch_start_time = time.time() for batch_index, batch in enumerate(train_reader()): batch_loss, pred_values = exe.run( main_program, feed=feeder.feed(batch), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) batch_loss = np.array(batch_loss) pred_values = np.array(pred_values) total_processed += len(batch) current_processed += len(batch) batches_since_dev_update += 1 out( log, "epoch {:,} " "batch {:,} " "processed {:,} " "batch-loss {:.4f} " "epoch-elapsed {} " "total-elapsed {} " "".format( epoch, batch_index + 1, total_processed, float(batch_loss), format_elapsed(epoch_start_time), format_elapsed(start_time), )) if math.isnan(float(batch_loss[0])): sys.exit("got NaN loss, training failed.") if current_processed >= check_every: current_processed -= (check_every) val_results = [] for data in val_reader(): loss, pred = exe.run( test_program, feed=feeder.feed(data), fetch_list=[avg_loss.name, predictions.name], return_numpy=False) loss = np.array(loss) val_results.append(loss[0]) val_loss = sum(val_results) / len(val_results) out( log, "# Dev Average Loss: {:5.3f} (MSE) -> {:5.3f} (RMSD)". format(float(val_loss), math.sqrt(float(val_loss)))) if val_loss < best_dev_loss: batches_since_dev_update = 0 if best_dev_model_path is not None: path = "{}/{}_dev={:.4f}".format( args.model_path_base, args.model_path_base, best_dev_loss) print("\t\t", best_dev_model_path, os.path.exists(path)) if os.path.exists(path): out( log, "* Removing previous model file {}...".format( path)) shutil.rmtree(path) best_dev_loss = val_loss best_dev_model_path = "{}_dev={:.4f}".format( args.model_path_base, val_loss) out( log, "* Saving new best model to {}...".format( best_dev_model_path)) if not os.path.exists(args.model_path_base): os.mkdir(args.model_path_base) fluid.io.save_inference_model( args.model_path_base + "/" + best_dev_model_path, ['seq', 'dot'], [predictions], exe)
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) config_info = {'input_size': 769, 'output_size': 1, 'block_num': 7} config = ([(cfg.SLIM.NAS_SPACE_NAME, config_info)]) factory = SearchSpaceFactory() space = factory.get_search_space(config) port = cfg.SLIM.NAS_PORT server_address = (cfg.SLIM.NAS_ADDRESS, port) sa_nas = SANAS(config, server_addr=server_address, search_steps=cfg.SLIM.NAS_SEARCH_STEPS, is_server=cfg.SLIM.NAS_IS_SERVER) for step in range(cfg.SLIM.NAS_SEARCH_STEPS): arch = sa_nas.next_archs()[0] start_prog = fluid.Program() train_prog = fluid.Program() data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN) cur_flops = flops(train_prog) print('current step:', step, 'flops:', cur_flops) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(start_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram( train_prog).with_data_parallel(loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]" ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") best_miou = 0.0 for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH: ckpt_dir = save_checkpoint(train_prog, '{}_tmp'.format(port)) _, mean_iou, _, mean_acc = evaluate(cfg=cfg, arch=arch, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if best_miou < mean_iou: print('search step {}, epoch {} best iou {}'.format( step, epoch, mean_iou)) best_miou = mean_iou sa_nas.reward(float(best_miou))
y_true = numpy.array([[2.0], [4.0], [6.0], [8.0]]).astype('float32') # 组建网络 x = fluid.data(name="x", shape=[None, 1], dtype='float32') y = fluid.data(name="y", shape=[None, 1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) # 定义损失函数 cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) # 选择优化方法 sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) # 网络参数初始化 cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_startup_program()) # 开始训练,迭代100次 for i in range(100): outs = exe.run(feed={ 'x': train_data, 'y': y_true }, fetch_list=[y_predict, avg_cost]) # 输出训练结果 print(outs)
def train_loop(args, train_program, py_reader, loss, auc_var, batch_auc_var, trainer_num, trainer_id): dataset = reader.CriteoDataset(args.sparse_feature_dim) train_reader = paddle.batch(paddle.reader.shuffle( dataset.train([args.train_data_path], trainer_num, trainer_id), buf_size=args.batch_size * 100), batch_size=args.batch_size) py_reader.decorate_paddle_reader(train_reader) data_name_list = [] place = fluid.CPUPlace() exe = fluid.Executor(place) exec_strategy = fluid.ExecutionStrategy() build_strategy = fluid.BuildStrategy() if os.getenv("NUM_THREADS", ""): exec_strategy.num_threads = int(os.getenv("NUM_THREADS")) cpu_num = int(os.environ.get('CPU_NUM', cpu_count())) build_strategy.reduce_strategy = \ fluid.BuildStrategy.ReduceStrategy.Reduce if cpu_num > 1 \ else fluid.BuildStrategy.ReduceStrategy.AllReduce pe = fluid.ParallelExecutor(use_cuda=False, loss_name=loss.name, main_program=train_program, build_strategy=build_strategy, exec_strategy=exec_strategy) exe.run(fluid.default_startup_program()) for pass_id in range(args.num_passes): pass_start = time.time() batch_id = 0 py_reader.start() try: while True: loss_val, auc_val, batch_auc_val = pe.run( fetch_list=[loss.name, auc_var.name, batch_auc_var.name]) loss_val = np.mean(loss_val) auc_val = np.mean(auc_val) batch_auc_val = np.mean(batch_auc_val) logger.info( "TRAIN --> pass: {} batch: {} loss: {} auc: {}, batch_auc: {}" .format(pass_id, batch_id, loss_val / args.batch_size, auc_val, batch_auc_val)) if batch_id % 1000 == 0 and batch_id != 0: model_dir = args.model_output_dir + '/batch-' + str( batch_id) if args.trainer_id == 0: fluid.io.save_inference_model(model_dir, data_name_list, [loss, auc_var], exe) batch_id += 1 except fluid.core.EOFException: py_reader.reset() print("pass_id: %d, pass_time_cost: %f" % (pass_id, time.time() - pass_start)) model_dir = args.model_output_dir + '/pass-' + str(pass_id) if args.trainer_id == 0: fluid.io.save_inference_model(model_dir, data_name_list, [loss, auc_var], exe)
def infer(): if not os.path.exists(cfg.vis_dir): os.makedirs(cfg.vis_dir) palette = get_palette(cfg.class_num) # 人像分割结果显示阈值 thresh = 120 place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # 加载预测模型 test_prog, feed_name, fetch_list = fluid.io.load_inference_model( dirname=cfg.model_path, executor=exe, params_filename='__params__') #加载预测数据集 test_dataset = TestDataSet() data_num = test_dataset.data_num for idx in range(data_num): # 数据获取 ori_img, image, im_name, im_shape = test_dataset.get_data(idx) if image is None: print(im_name, 'is None') continue # 预测 if cfg.example == 'ACE2P': # ACE2P模型使用多尺度预测 reader = importlib.import_module('reader') multi_scale_test = getattr(reader, 'multi_scale_test') parsing, logits = multi_scale_test(exe, test_prog, feed_name, fetch_list, image, im_shape) else: # HumanSeg,RoadLine模型单尺度预测 result = exe.run(program=test_prog, feed={feed_name[0]: image}, fetch_list=fetch_list) parsing = np.argmax(result[0][0], axis=0) parsing = cv2.resize(parsing.astype(np.uint8), im_shape[::-1]) # 预测结果保存 result_path = os.path.join(cfg.vis_dir, im_name + '.png') if cfg.example == 'HumanSeg': logits = result[0][0][1] * 255 logits = cv2.resize(logits, im_shape[::-1]) ret, logits = cv2.threshold(logits, thresh, 0, cv2.THRESH_TOZERO) logits = 255 * (logits - thresh) / (255 - thresh) # 将分割结果添加到alpha通道 rgba = np.concatenate((ori_img, np.expand_dims(logits, axis=2)), axis=2) cv2.imwrite(result_path, rgba) else: output_im = PILImage.fromarray(np.asarray(parsing, dtype=np.uint8)) output_im.putpalette(palette) output_im.save(result_path) if (idx + 1) % 100 == 0: print('%d processd' % (idx + 1)) print('%d processd done' % (idx + 1)) return 0
def _predict(self, test_reader=None, model_path=None, batch_size=1, batch_num=1, skip_batch_num=0, transform_to_int8=False): place = fluid.CPUPlace() exe = fluid.Executor(place) inference_scope = fluid.executor.global_scope() with fluid.scope_guard(inference_scope): if os.path.exists(os.path.join(model_path, '__model__')): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) else: [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model( model_path, exe, 'model', 'params') graph = IrGraph(core.Graph(inference_program.desc), for_test=True) if (self._debug): graph.draw('.', 'quant_orig', graph.all_op_nodes()) if (transform_to_int8): mkldnn_int8_pass = QuantInt8MkldnnPass( _scope=inference_scope, _place=place) graph = mkldnn_int8_pass.apply(graph) else: graph = self._prepare_for_fp32_mkldnn(graph) inference_program = graph.to_program() dshape = [3, 224, 224] outputs = [] infer_accs1 = [] infer_accs5 = [] fpses = [] batch_times = [] total_samples = 0 iters = 0 infer_start_time = time.time() for data in test_reader(): if batch_num > 0 and iters >= batch_num: break if iters == skip_batch_num: total_samples = 0 infer_start_time = time.time() images = list(map(lambda x: x[0].reshape(dshape), data)) images = np.array(images).astype('float32') labels = np.array([x[1] for x in data]).astype('int64') start = time.time() out = exe.run(inference_program, feed={feed_target_names[0]: images}, fetch_list=fetch_targets) batch_time = (time.time() - start) * 1000 # in miliseconds outputs.append(out[0]) batch_acc1, batch_acc5 = self._get_batch_accuracy(out[0], labels) infer_accs1.append(batch_acc1) infer_accs5.append(batch_acc5) samples = len(data) total_samples += samples batch_times.append(batch_time) fps = samples / batch_time * 1000 fpses.append(fps) iters += 1 appx = ' (warm-up)' if iters <= skip_batch_num else '' _logger.info('batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, ' 'latency: {3:.4f} ms, fps: {4:.2f}'.format( iters, batch_acc1, batch_acc5, batch_time / batch_size, fps, appx)) # Postprocess benchmark data batch_latencies = batch_times[skip_batch_num:] batch_latency_avg = np.average(batch_latencies) latency_avg = batch_latency_avg / batch_size fpses = fpses[skip_batch_num:] fps_avg = np.average(fpses) infer_total_time = time.time() - infer_start_time acc1_avg = np.mean(infer_accs1) acc5_avg = np.mean(infer_accs5) _logger.info('Total inference run time: {:.2f} s'.format( infer_total_time)) return outputs, acc1_avg, acc5_avg, fps_avg, latency_avg
def infer(): args = parse_args() print(args) place = fluid.CPUPlace() inference_scope = fluid.Scope() test_valid_files = [ os.path.join(args.test_valid_data_dir, fname) for fname in next(os.walk(args.test_valid_data_dir))[2] ] test_files = random.sample(test_valid_files, int(len(test_valid_files) * 0.5)) if not test_files: test_files = test_valid_files print('test files num {}'.format(len(test_files))) criteo_dataset = CriteoDataset() criteo_dataset.setup(args.vocab_dir) test_reader = criteo_dataset.test_reader(test_files, args.batch_size, 100) startup_program = fluid.framework.Program() test_program = fluid.framework.Program() cur_model_path = os.path.join(args.model_output_dir, 'epoch_' + args.test_epoch, "checkpoint") with fluid.scope_guard(inference_scope): with fluid.framework.program_guard(test_program, startup_program): cat_feat_dims_dict = OrderedDict() for line in open(args.cat_feat_num): spls = line.strip().split() assert len(spls) == 2 cat_feat_dims_dict[spls[0]] = int(spls[1]) dcn_model = DCN(args.cross_num, args.dnn_hidden_units, args.l2_reg_cross, args.use_bn, args.clip_by_norm, cat_feat_dims_dict, args.is_sparse) dcn_model.build_network(is_test=True) exe = fluid.Executor(place) feeder = fluid.DataFeeder( feed_list=dcn_model.data_list, place=place) exe.run(startup_program) fluid.io.load(fluid.default_main_program(), cur_model_path) for var in dcn_model.auc_states: # reset auc states set_zero(var.name, scope=inference_scope, place=place) loss_all = 0 num_ins = 0 for batch_id, data_test in enumerate(test_reader()): loss_val, auc_val = exe.run(test_program, feed=feeder.feed(data_test), fetch_list=[ dcn_model.avg_logloss.name, dcn_model.auc_var.name ]) # num_ins += len(data_test) num_ins += 1 loss_all += loss_val logger.info('TEST --> batch: {} loss: {} auc_val: {}'.format( batch_id, loss_all / num_ins, auc_val)) print( 'The last log info is the total Logloss and AUC for all test data. ' )
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="show", ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.VIS_FILE_LIST dataset = SegDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Generator full colormap for maximum 256 classes color_map = get_color_map_list(256) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = "show" makedirs(save_dir) fetch_list = [pred.name, logit.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) img_cnt = 0 for imgs, grts, img_names, valid_shapes, org_shapes in test_reader: pred_shape = (imgs.shape[2], imgs.shape[3]) pred, logit = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = pred.shape[0] # TODO: use multi-thread to write images for i in range(num_imgs): # Add more comments res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8) img_name = img_names[i] res_shape = (res_map.shape[0], res_map.shape[1]) if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]: res_map = cv2.resize(res_map, pred_shape, interpolation=cv2.INTER_NEAREST) valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1]) res_map = res_map[0:valid_shape[0], 0:valid_shape[1]] org_shape = (org_shapes[i, 0], org_shapes[i, 1]) res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]), interpolation=cv2.INTER_NEAREST) png_fn = to_png_fn(img_name) # colorful segment result visualization vis_fn = os.path.join(save_dir, png_fn) dirname = os.path.dirname(vis_fn) makedirs(dirname) pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L') pred_mask.putpalette(color_map) # pred_mask.save(vis_fn) pred_mask_np = np.array(pred_mask.convert("RGB")) im_pred = PILImage.fromarray(pred_mask_np) # Original image # BGR->RGB img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR, img_name))[..., ::-1] im_ori = PILImage.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # log_writer.add_image("Images/{}".format(img_name), img, epoch) # add ground truth (label) images im_pred_cat = PILImage.blend(im_ori, im_pred, 0.5) im_ori = join(im_ori, im_ori, flag="vertical") im_pred_cat = join(im_pred_cat, im_pred, flag="vertical") new_img = join(im_ori, im_pred_cat) new_img.save(vis_fn) img_cnt += 1 print("#{} show image path: {}".format(img_cnt, vis_fn))
def main(use_cuda): """ Advbox example which demonstrate how to use advbox. """ # base marco TOTAL_NUM = 100 IMG_NAME = 'image' LABEL_NAME = 'label' # parse args args = parser.parse_args() print_arguments(args) # parameters from arguments class_dim = args.class_dim model_name = args.model target_class = args.target pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] if args.log_debug: logging.getLogger().setLevel(logging.INFO) assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) # model definition model = models.__dict__[model_name]() # declare vars image = fluid.layers.data(name=IMG_NAME, shape=image_shape, dtype='float32') logits = model.net(input=image, class_dim=class_dim) # clone program and graph for inference infer_program = fluid.default_main_program().clone(for_test=True) image.stop_gradient = False label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=logits, label=label) avg_cost = fluid.layers.mean(x=cost) BATCH_SIZE = 1 test_reader = paddle.batch(reader.test(TEST_LIST, DATA_PATH), batch_size=BATCH_SIZE) # setup run environment enable_gpu = use_cuda and args.use_gpu place = fluid.CUDAPlace(0) if enable_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # advbox demo m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (0, 1), channel_axis=3) # Adversarial method: CW attack = CW_L2(m, learning_rate=0.1, attack_model=model.conv_net, with_gpu=enable_gpu, shape=image_shape, dim=class_dim, confidence_level=0.9, multi_clip=True) attack_config = { "attack_iterations": 50, "c_search_step": 10, "c_range": (0.01, 100), "c_start": 10, "targeted": True } # reload model vars if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) # inference pred_label = infer(infer_program, image, logits, place, exe) # if only inference ,and exit if args.inference: exit(0) print("--------------------adversary-------------------") # use test data to generate adversarial examples total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 data_img = [data[0][0]] filename = data[0][1] org_data = data_img[0][0] adversary = Adversary(org_data, pred_label[filename]) #target attack if target_class != -1: tlabel = target_class adversary.set_target(is_targeted_attack=True, target_label=tlabel) adversary = attack(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 print( 'attack success, original_label=%d, adversarial_label=%d, count=%d' % (pred_label[filename], adversary.adversarial_label, total_count)) #output original image, adversarial image and difference image generation_image(total_count, org_data, pred_label[filename], adversary.adversarial_example, adversary.adversarial_label, "CW") else: print('attack failed, original_label=%d, count=%d' % (pred_label[filename], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("cw attack done")
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() if args.enable_ce: assert args.model == "ResNet50" assert args.loss_name == "arcmargin" np.random.seed(0) startup_prog.random_seed = 1000 train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_feas, image, label = build_program( is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [global_lr.name, train_cost.name, train_acc1.name, train_acc5.name] test_fetch_list = [test_feas.name] if args.with_mem_opt: fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) logging.debug('after run startup program') if checkpoint is not None: fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars( exe, pretrained_model, main_program=train_prog, predicate=if_exist) devicenum = get_gpu_num() assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False) test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader) train_exe = fluid.ParallelExecutor( main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 train_py_reader.start() iter_no = 0 train_info = [0, 0, 0, 0] while iter_no <= args.total_iter_num: t1 = time.time() lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += np.mean(np.array(acc1)) train_info[2] += np.mean(np.array(acc5)) train_info[3] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[3] avg_acc1 = train_info[1] / train_info[3] avg_acc5 = train_info[2] / train_info[3] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, data in enumerate(test_reader()): t1 = time.time() [feas] = exe.run(test_prog, fetch_list = test_fetch_list, feed=test_feeder.feed(data)) label = np.asarray([x[1] for x in data]) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path, main_program=train_prog) iter_no += 1 # This is for continuous evaluation only if args.enable_ce: # Use the mean cost/acc for training print("kpis train_cost %s" % (avg_loss)) print("kpis test_recall %s" % (recall))
def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): """ Distill the Head Features, so as to perform transfer learning. Args: num_classes (int): number of categories trainable (bool): whether to set parameters trainable. pretrained (bool): whether to load default pretrained model. phase (str): optional choices are 'train' and 'predict'. Returns: inputs (dict): the input variables. outputs (dict): the output variables. context_prog (Program): the program to execute transfer learning. """ context_prog = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): image = fluid.layers.data( name='image', shape=[-1, 3, -1, -1], dtype='float32') # backbone backbone = ResNet( norm_type='affine_channel', depth=50, feature_maps=[2, 3, 4, 5], freeze_at=2) body_feats = backbone(image) # fpn fpn = FPN( max_level=6, min_level=2, num_chan=256, spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) var_prefix = '@HUB_{}@'.format(self.name) im_info = fluid.layers.data( name='im_info', shape=[3], dtype='float32', lod_level=0) im_shape = fluid.layers.data( name='im_shape', shape=[3], dtype='float32', lod_level=0) body_feat_names = list(body_feats.keys()) body_feats, spatial_scale = fpn.get_output(body_feats) # rpn_head: RPNHead rpn_head = self.rpn_head() rois = rpn_head.get_proposals(body_feats, im_info, mode=phase) # train if phase == 'train': gt_bbox = fluid.layers.data( name='gt_bbox', shape=[4], dtype='float32', lod_level=1) is_crowd = fluid.layers.data( name='is_crowd', shape=[1], dtype='int32', lod_level=1) gt_class = fluid.layers.data( name='gt_class', shape=[1], dtype='int32', lod_level=1) rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd) # bbox_assigner: BBoxAssigner bbox_assigner = self.bbox_assigner(num_classes) outs = fluid.layers.generate_proposal_labels( rpn_rois=rois, gt_classes=gt_class, is_crowd=is_crowd, gt_boxes=gt_bbox, im_info=im_info, batch_size_per_im=bbox_assigner.batch_size_per_im, fg_fraction=bbox_assigner.fg_fraction, fg_thresh=bbox_assigner.fg_thresh, bg_thresh_hi=bbox_assigner.bg_thresh_hi, bg_thresh_lo=bbox_assigner.bg_thresh_lo, bbox_reg_weights=bbox_assigner.bbox_reg_weights, class_nums=bbox_assigner.class_nums, use_random=bbox_assigner.use_random) rois = outs[0] roi_extractor = self.roi_extractor() roi_feat = roi_extractor( head_inputs=body_feats, rois=rois, spatial_scale=spatial_scale) # head_feat bbox_head = self.bbox_head(num_classes) head_feat = bbox_head.head(roi_feat) if isinstance(head_feat, OrderedDict): head_feat = list(head_feat.values())[0] if phase == 'train': inputs = { 'image': var_prefix + image.name, 'im_info': var_prefix + im_info.name, 'im_shape': var_prefix + im_shape.name, 'gt_class': var_prefix + gt_class.name, 'gt_bbox': var_prefix + gt_bbox.name, 'is_crowd': var_prefix + is_crowd.name } outputs = { 'head_features': var_prefix + head_feat.name, 'rpn_cls_loss': var_prefix + rpn_loss['rpn_cls_loss'].name, 'rpn_reg_loss': var_prefix + rpn_loss['rpn_reg_loss'].name, 'generate_proposal_labels': [var_prefix + var.name for var in outs] } elif phase == 'predict': pred = bbox_head.get_prediction(roi_feat, rois, im_info, im_shape) inputs = { 'image': var_prefix + image.name, 'im_info': var_prefix + im_info.name, 'im_shape': var_prefix + im_shape.name } outputs = { 'head_features': var_prefix + head_feat.name, 'rois': var_prefix + rois.name, 'bbox_out': var_prefix + pred.name } add_vars_prefix(context_prog, var_prefix) add_vars_prefix(startup_program, var_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] for key, value in outputs.items() } for param in context_prog.global_block().iter_parameters(): param.trainable = trainable place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if pretrained: def _if_exist(var): if num_classes != 81: if 'bbox_pred' in var.name or 'cls_score' in var.name: return False return os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) fluid.io.load_vars( exe, self.default_pretrained_model_path, predicate=_if_exist) return inputs, outputs, context_prog
def test(args): """ Test """ if not os.path.exists(args.save_path): mkdir(args.save_path) if not os.path.exists(args.model_path): raise ValueError("Invalid model init path %s" % args.model_path) # data data_config data_conf = { "batch_size": args.batch_size, "max_turn_num": args.max_turn_num, "max_turn_len": args.max_turn_len, "_EOS_": args._EOS_, } dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size, args.emb_size, args.stack_num, args.channel1_num, args.channel2_num) dam.create_data_layers() loss, logits = dam.create_network() loss.persistable = True logits.persistable = True # gradient clipping fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0)) test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=400, decay_rate=0.9, staircase=True)) optimizer.minimize(loss) print("begin memory optimization ...") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.memory_optimize(fluid.default_main_program()) print("end memory optimization ...") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() #dev_count = multiprocessing.cpu_count() dev_count = 1 exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.io.load_persistables(exe, args.model_path) test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_program) print("start loading data ...") with open(args.data_path, 'rb') as f: if six.PY2: train_data, val_data, test_data = pickle.load(f) else: train_data, val_data, test_data = pickle.load(f, encoding="bytes") print("finish loading data ...") test_batches = reader.build_batches(test_data, data_conf) test_batch_num = len(test_batches["response"]) print("test batch num: %d" % test_batch_num) print("begin inference ...") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) score_path = os.path.join(args.save_path, 'score.txt') score_file = open(score_path, 'w') for it in six.moves.xrange(test_batch_num // dev_count): feed_list = [] for dev in six.moves.xrange(dev_count): index = it * dev_count + dev batch_data = reader.make_one_batch_input(test_batches, index) feed_dict = dict(zip(dam.get_feed_names(), batch_data)) feed_list.append(feed_dict) predicts = test_exe.run(feed=feed_list, fetch_list=[logits.name]) scores = np.array(predicts[0]) print("step = %d" % it) for dev in six.moves.xrange(dev_count): index = it * dev_count + dev for i in six.moves.xrange(args.batch_size): score_file.write( str(scores[args.batch_size * dev + i][0]) + '\t' + str(test_batches["label"][index][i]) + '\n') score_file.close() #write evaluation result if args.ext_eval: result = eva.evaluate_douban(score_path) else: result = eva.evaluate_ubuntu(score_path) result_file_path = os.path.join(args.save_path, 'result.txt') with open(result_file_path, 'w') as out_file: for metric in result: out_file.write(metric + '\t' + str(result[metric]) + '\n') print('finish test') print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
def quantization_scale(self, use_cuda, seed, activation_quant_type, weight_quant_type='abs_max', for_ci=False, act_preprocess_func=None, weight_preprocess_func=None, act_quantize_func=None, weight_quantize_func=None): def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') img.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.SGD(learning_rate=0.0001) opt.minimize(loss) return [img, label], loss def get_optimizer(): return fluid.optimizer.MomentumOptimizer(0.0001, 0.9) def load_dict(): with open('mapping_table_for_saving_inference_model', 'r') as file: data = file.read() data = json.loads(data) return data def save_dict(Dict): with open('mapping_table_for_saving_inference_model', 'w') as file: file.write(json.dumps(Dict)) random.seed(0) np.random.seed(0) main = fluid.Program() startup = fluid.Program() test_program = fluid.Program() feeds, loss = build_program(main, startup, False) build_program(test_program, startup, True) test_program = test_program.clone(for_test=True) main_graph = IrGraph(core.Graph(main.desc), for_test=False) test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup) train_transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type, act_preprocess_func=act_preprocess_func, weight_preprocess_func=weight_preprocess_func, act_quantize_func=act_quantize_func, weight_quantize_func=weight_quantize_func, optimizer_func=get_optimizer, executor=exe) train_transform_pass.apply(main_graph) test_transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type, act_preprocess_func=act_preprocess_func, weight_preprocess_func=weight_preprocess_func, act_quantize_func=act_quantize_func, weight_quantize_func=weight_quantize_func, optimizer_func=get_optimizer, executor=exe) test_transform_pass.apply(test_graph) save_dict(test_graph.out_node_mapping_table) add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place) add_quant_dequant_pass.apply(main_graph) add_quant_dequant_pass.apply(test_graph) scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place) scale_training_pass.apply(main_graph) dev_name = '_gpu' if use_cuda else '_cpu' build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) iters = 5 batch_size = 8 train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) out_scale_infer_pass = OutScaleForInferencePass(scope=scope) out_scale_infer_pass.apply(test_graph) freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_bits=8, activation_bits=8, weight_quantize_type=weight_quant_type) mapping_table = load_dict() test_graph.out_node_mapping_table = mapping_table if act_quantize_func == None and weight_quantize_func == None: freeze_pass.apply(test_graph)