def main(use_cuda): """ Advbox demo which demonstrate how to use advbox. """ class_dim = 1000 IMG_NAME = 'img' LABEL_NAME = 'label' #模型路径 http://paddle-imagenet-models.bj.bcebos.com/resnet_50_model.tar 下载并解压 #pretrained_model = "models/resnet_50/115" pretrained_model = "models/alexnet/116/" image_shape = [3, 224, 224] image = fluid.layers.data(name=IMG_NAME, shape=image_shape, dtype='float32') label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') # model definition model = AlexNet() out = model.net(input=image, class_dim=class_dim) # 根据配置选择使用CPU资源还是GPU资源 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) #加载模型参数 if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) logger.info("Load pretrained_model") fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) logging.info("Build advbox") # advbox demo 黑盒攻击 直接传入测试版本的program m = PaddleBlackBoxModel(fluid.default_main_program().clone(for_test=True), IMG_NAME, LABEL_NAME, out.name, (0, 1), channel_axis=0) #不定向攻击 # 形状为[1,28,28] channel_axis=0 形状为[28,28,1] channel_axis=2 attack = SinglePixelAttack(m) attack_config = {"max_pixels": 224 * 224, "isPreprocessed": True} test_data = get_image("cat.png") original_data = np.copy(test_data) # 猫对应的标签 imagenet 2012 对应链接https://blog.csdn.net/LegenDavid/article/details/73335578 original_label = None adversary = Adversary(original_data, original_label) logger.info("Non-targeted Attack...") adversary = attack(adversary, **attack_config) if adversary.is_successful(): print('attack success, original_label=%d, adversarial_label=%d' % (adversary.original_label, adversary.adversarial_label)) #对抗样本保存在adversary.adversarial_example adversary_image = np.copy(adversary.adversarial_example) #从[3,224,224]转换成[224,224,3] adversary_image *= img_std adversary_image += img_mean adversary_image = np.array(adversary_image * 255).astype("uint8").transpose([1, 2, 0]) im = Image.fromarray(adversary_image) im.save("adversary_image.jpg") else: print('attack failed, original_label=%d' % (adversary.original_label)) logger.info("SinglePixelAttack attack done")
def eval(): data_path = DatasetPath('val') test_list = data_path.get_file_list() image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch cocoGt = COCO(test_list) num_id_to_cat_id_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())} category_ids = cocoGt.getCatIds() label_list = { item['id']: item['name'] for item in cocoGt.loadCats(category_ids) } label_list[0] = ['background'] model = model_builder.RetinaNet( add_conv_body_func=resnet.add_ResNet50_conv_body, add_fpn_neck_func=fpn.add_fpn_neck, anchor_strides=[8, 16, 32, 64, 128], use_pyreader=False, mode='val') model.build_model(image_shape) pred_boxes = model.eval_bbox_out() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # yapf: disable if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) print('load pretrained model') fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) # yapf: enable test_reader = reader.test(total_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) dts_res = [] segms_res = [] fetch_list = [pred_boxes] eval_start = time.time() for batch_id, batch_data in enumerate(test_reader()): start = time.time() im_info = [] for data in batch_data: im_info.append(data[1]) #print("begin run") #print("im_id: {}".format(data[2])) results = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(batch_data), return_numpy=False) #print("after run") pred_boxes_v = results[0] new_lod = pred_boxes_v.lod() nmsed_out = pred_boxes_v dts_res += get_dt_res(total_batch_size, new_lod[0], nmsed_out, batch_data, num_id_to_cat_id_map) end = time.time() print('batch id: {}, time: {}'.format(batch_id, end - start)) #break eval_end = time.time() total_time = eval_end - eval_start print('average time of eval is: {}'.format(total_time / (batch_id + 1))) assert len(dts_res) > 0, "The number of valid bbox detected is zero.\n \ Please use reasonable model and check input data." with open("detection_bbox_result.json", 'w') as outfile: json.dump(dts_res, outfile) print("start evaluate bbox using coco api") cocoDt = cocoGt.loadRes("detection_bbox_result.json") cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def grad_check(x, y, x_init=None, place=None, program=None, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True): """ Check numerical and analytical gradients for dy/dx. Each Jacobian gradients is a 2-D array with shape [xi_size, yi_size]. Args: x (Variable|list[Variable]): input variables to the program. y (Variable|list[Variable]): output variables to the program. x_init (numpy.array|list[numpy.array]|None): the init value for input x. place (fluid.CPUPlace or fluid.CUDAPlace): the device. program (Program|None): a Program with forward pass. If None, use fluid.default_main_program(). eps (float): perturbation for finite differences. atol (float): absolute tolerance. rtol (float): relative tolerance. raise_exception (bool): whether to raise an exception if the check fails. Default is True. Returns: True if all differences satisfy numpy.allclose condition. """ def fail_test(msg): if raise_exception: raise RuntimeError(msg) return False # check input arguments x = _as_list(x) y = _as_list(y) for v in x: v.stop_gradient = False v.persistable = True if place is None: place = fluid.CPUPlace() if program is None: program = fluid.default_main_program() # init variable in strtup program scope = fluid.executor.global_scope() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) x_init = _as_list(x_init) # init inputs if x_init is not None if x_init: if len(x_init) != len(x): raise ValueError('len(x_init) (=%d) is not the same' ' as len(x) (= %d)' % (len(x_init), len(x))) # init variable in main program for var, arr in zip(x, x_init): assert var.shape == arr.shape feeds = {k.name: v for k, v in zip(x, x_init)} exe.run(program, feed=feeds, scope=scope) # [x_idx, y_idx] numerical = [ _compute_numerical_jacobian(program, xi, y, place, scope, eps) for xi in x ] # [y_idx, x_idx] analytical = [] for yi in y: prog = program.clone() clone_x = [] clone_y = None for b in prog.blocks: if b.has_var(yi.name): clone_y = b.var(yi.name) break for xi in x: for b in prog.blocks: if b.has_var(xi.name): clone_x.append(b.var(xi.name)) break analytical.append( _compute_analytical_jacobian(prog, clone_x, clone_y, place, scope)) for i, (x_idx, y_idx) in enumerate( product(*[range(len(x)), range(len(y))])): a = analytical[y_idx][x_idx] n = numerical[x_idx][y_idx] if not np.allclose(a, n, rtol, atol): msg = 'Jacobian mismatch for output %s ' \ 'with respect to input %s on %s,\n' \ 'numerical:%s\nanalytical:%s\n' \ % (y[y_idx].name, x[x_idx].name, str(place), n, a) return fail_test(msg) return True
initializer=fluid.initializer.Uniform(-stdv, stdv))) def forward(self, inputs): y = self.conv(inputs) y = self.pool2d_max(y) for bottleneck_block in self.bottleneck_block_list: y = bottleneck_block(y) y = self.pool2d_avg(y) y = fluid.layers.reshape(y, [y.shape[0], -1]) y = self.out(y) return y """开始训练""" use_gpu = True #使用GPU的方法 place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(): model = ResNet() train(model) fluid.save_dygraph(model.state_dict(), 'sloar_spot_ResNet') """================必须先执行前面的训练函数,最后执行 test 函数================""" """开始测试""" # 读取测试集 from PIL import Image import os import numpy as np def read_data_input(): file_path = "./test_img/"
def make_cnn_model_visualization(dirname): """ Train the cnn model on mnist datasets """ batch_size = 64 num_epochs = 5 use_cuda = 1 def optimizer_program(): return fluid.optimizer.Adam(learning_rate=0.001) def train_program(): label = fluid.layers.data(name='label', shape=[1], dtype='int64') img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') predict = mnist_mlp_model(img) # Calculate the cost from the prediction and label. cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=predict, label=label) return [avg_cost, acc] train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=batch_size) # set to True if training with GPU place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer(train_func=train_program, place=place, optimizer_func=optimizer_program) # Save the parameter into a directory. The Inferencer can load the parameters from it to do infer params_dirname = dirname lists = [] def event_handler(event): if isinstance(event, fluid.EndStepEvent): if event.step % 100 == 0: # event.metrics maps with train program return arguments. # event.metrics[0] will yeild avg_cost and event.metrics[1] will yeild acc in this example. print "step %d, epoch %d, Cost %f Acc %f " % ( event.step, event.epoch, event.metrics[0], event.metrics[1]) if isinstance(event, fluid.EndEpochEvent): avg_cost, acc = trainer.test(reader=test_reader, feed_order=['img', 'label']) print("Test with Epoch %d, avg_cost: %s, acc: %s" % (event.epoch, avg_cost, acc)) # save parameters print "save_params" trainer.save_params(params_dirname) lists.append((event.epoch, avg_cost, acc)) # Train the model now trainer.train(num_epochs=num_epochs, event_handler=event_handler, reader=train_reader, feed_order=['img', 'label']) # find the best pass best = sorted(lists, key=lambda list: float(list[1]))[0] print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1]) print 'The classification accuracy is %.2f%%' % (float(best[2]) * 100)
trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer print(job._target_names) trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id) trainer.start() print(trainer._step) test_program = trainer._main_program.clone(for_test=True) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace()) def train_test(train_test_program, train_test_feed, train_test_reader): acc_set = [] for test_data in train_test_reader(): acc_np = trainer.exe.run( program=train_test_program, feed=train_test_feed.feed(test_data), fetch_list=["accuracy_0.tmp_0"]) acc_set.append(float(acc_np[0])) acc_val_mean = numpy.array(acc_set).mean() return acc_val_mean epoch_id = 0 step = 0 epoch = 3000
def train(args): if args.enable_ce: SEED = 102 fluid.default_startup_program().random_seed = SEED fluid.default_main_program().random_seed = SEED use_cuda = True if args.use_cuda else False parallel = True if args.parallel else False print("use_cuda:", use_cuda, "parallel:", parallel) train_reader, vocab_size = utils.construct_train_data( args.train_dir, args.vocab_path, args.batch_size * get_cards(args)) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() ssr = SequenceSemanticRetrieval(vocab_size, args.embedding_dim, args.hidden_size) # Train program train_input_data, cos_pos, avg_cost, acc = ssr.train() # Optimization to minimize lost optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr) optimizer.minimize(avg_cost) data_list = [var.name for var in train_input_data] feeder = fluid.DataFeeder(feed_list=data_list, place=place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if parallel: train_exe = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=avg_cost.name) else: train_exe = exe total_time = 0.0 ce_info = [] for pass_id in range(args.epochs): epoch_idx = pass_id + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 for batch_id, data in enumerate(train_reader()): i += 1 loss_val, correct_val = train_exe.run( feed=feeder.feed(data), fetch_list=[avg_cost.name, acc.name]) ce_info.append(float(np.mean(correct_val)) / args.batch_size) if i % args.print_batch == 0: logger.info( "Train --> pass: {} batch_id: {} avg_cost: {}, acc: {}". format(pass_id, batch_id, np.mean(loss_val), float(np.mean(correct_val)) / args.batch_size)) if args.enable_ce and i > args.step_num: break t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (args.model_dir, epoch_idx) fluid.io.save_params(executor=exe, dirname=save_dir) print("model saved in %s" % save_dir) # only for ce if args.enable_ce: ce_acc = 0 try: ce_acc = ce_info[-2] except: print("ce info error") epoch_idx = args.epochs device = get_device(args) if args.use_cuda: gpu_num = device[1] print("kpis\teach_pass_duration_gpu%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_acc_gpu%s\t%s" % (gpu_num, ce_acc)) else: cpu_num = device[1] threads_num = device[2] print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" % (cpu_num, threads_num, total_time / epoch_idx)) print("kpis\ttrain_acc_cpu%s_thread%s\t%s" % (cpu_num, threads_num, ce_acc))
from paddle.fluid.dygraph import declarative class SimpleNet(fluid.dygraph.Layer): def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() self._linear = Linear(in_size, out_size) @declarative def forward(self, x): y = self._linear(x) z = self._linear(y) return z with fluid.dygraph.guard(fluid.CPUPlace()): net = SimpleNet(8, 8) adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32')) for i in range(10): out = net(x) loss = fluid.layers.mean(out) loss.backward() adam.minimize(loss) net.clear_gradients() # Save inference model. model_path = "./dy2stat_infer_model" fluid.dygraph.jit.save(net, model_path, [x])
def ptb_rnn_sort_gradient_cpu_float32(self, is_sparse): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.set_flags({'FLAGS_sort_sum_gradient': True}) paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) # TODO: marsyang1993 Change seed to ptb_model = PtbModel( hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale, is_sparse=is_sparse) sgd = SGDOptimizer( learning_rate=1e-3, parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() sgd.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() dy_loss_value = dy_loss.numpy() dy_last_cell_value = last_cell.numpy() dy_last_hidden_value = last_hidden.numpy() with new_program_scope(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) ptb_model = PtbModel( hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale, is_sparse=is_sparse) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) x = fluid.layers.data( name="x", shape=[-1, num_steps, 1], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data( name="init_hidden", shape=[1], dtype='float32') init_cell = fluid.layers.data( name="init_cell", shape=[1], dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) sgd.minimize(static_loss) static_param_updated = dict() static_param_init = dict() static_param_name_list = list() for param in ptb_model.parameters(): static_param_name_list.append(param.name) out = exe.run(framework.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init[static_param_name_list[i]] = out[i] static_loss_value = None static_last_cell_value = None static_last_hidden_value = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') fetch_list = [static_loss, static_last_hidden, static_last_cell] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "x": x_data, "y": y_data, "init_hidden": init_hidden_data, "init_cell": init_cell_data }, fetch_list=fetch_list) static_loss_value = out[0] static_last_hidden_value = out[1] static_last_cell_value = out[2] if i == batch_num - 1: for k in range(3, len(out)): static_param_updated[static_param_name_list[k - 3]] = out[k] self.assertTrue(np.array_equal(static_loss_value, dy_loss_value)) self.assertTrue( np.array_equal(static_last_cell_value, dy_last_cell_value)) self.assertTrue( np.array_equal(static_last_hidden_value, dy_last_hidden_value)) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): self.assertTrue(np.array_equal(value, dy_param_updated[key]))
def context(self, trainable=True, pretrained=True): """context for transfer learning. Args: trainable (bool): Set parameters in program to be trainable. pretrained (bool) : Whether to load pretrained model. Returns: inputs (dict): key is 'image', corresponding vaule is image tensor. outputs (dict): key is : 'classification', corresponding value is the result of classification. 'feature_map', corresponding value is the result of the layer before the fully connected layer. context_prog (fluid.Program): program for transfer learning. """ context_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(context_prog, startup_prog): with fluid.unique_name.guard(): image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") mobile_net = MobileNetV2() output, feature_map = mobile_net.net(input=image, class_dim=len( self.label_list)) name_prefix = '@HUB_{}@'.format(self.name) inputs = {'image': name_prefix + image.name} outputs = { 'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name } add_vars_prefix(context_prog, name_prefix) add_vars_prefix(startup_prog, name_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] for key, value in outputs.items() } place = fluid.CPUPlace() exe = fluid.Executor(place) # pretrained if pretrained: def _if_exist(var): b = os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) return b fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) else: exe.run(startup_prog) # trainable for param in context_prog.global_block().iter_parameters(): param.trainable = trainable return inputs, outputs, context_prog
def infer(): try: from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval, Params data_path = DatasetPath('val') test_list = data_path.get_file_list() coco_api = COCO(test_list) cid = coco_api.getCatIds() cat_id_to_num_id_map = { v: i + 1 for i, v in enumerate(coco_api.getCatIds()) } category_ids = coco_api.getCatIds() labels_map = { cat_id_to_num_id_map[item['id']]: item['name'] for item in coco_api.loadCats(category_ids) } labels_map[0] = 'background' except: print("The COCO dataset or COCO API is not exist, use the default " "mapping of class index and real category name on COCO17.") assert cfg.dataset == 'coco2017' labels_map = coco17_labels() image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=False, mode='infer') model.build_model(image_shape) pred_boxes = model.eval_bbox_out() if cfg.MASK_ON: masks = model.eval_mask_out() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # yapf: disable if not os.path.exists(cfg.pretrained_model): raise ValueError("Model path [%s] does not exist." % (cfg.pretrained_model)) def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) # yapf: enable infer_reader = reader.infer(cfg.image_path) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) dts_res = [] segms_res = [] if cfg.MASK_ON: fetch_list = [pred_boxes, masks] else: fetch_list = [pred_boxes] data = next(infer_reader()) im_info = [data[0][1]] result = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data), return_numpy=False) pred_boxes_v = result[0] if cfg.MASK_ON: masks_v = result[1] new_lod = pred_boxes_v.lod() nmsed_out = pred_boxes_v image = None if cfg.MASK_ON: segms_out = segm_results(nmsed_out, masks_v, im_info) image = draw_mask_on_image(cfg.image_path, segms_out, cfg.draw_threshold) draw_bounding_box_on_image(cfg.image_path, nmsed_out, cfg.draw_threshold, labels_map, image)
from paddle import fluid from paddle.fluid import layers import numpy # data = fluid.layers.fill_constant(shape=[3, 4], value=16, dtype='int64') a = fluid.data(name="a", shape=[None, 1], dtype='int64') b = fluid.data(name="b", shape=[None, 1], dtype='int64') result = layers.elementwise_add(a, b) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) # exe.run(fluid.default_startup_program()) # data_1 = int(input("Please enter an integer: a=")) # data_2 = int(input("Please enter an integer: b=")) data_1 = 1000 data_2 = 300 x = numpy.array([[data_1], [10]], dtype='int64') y = numpy.array([[data_2], [200]], dtype='int64') ret = exe.run( feed={ 'a': x, 'b': y }, # 将输入数据x, y分别赋值给变量a,b fetch_list=[result] # 通过fetch_list参数指定需要获取的变量结果 ) print('{}'.format(ret))
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() train_py_reader, train_cost, global_lr, train_feas, train_label = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_feas, image, label = build_program( is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [ global_lr.name, train_cost.name, train_feas.name, train_label.name ] test_fetch_list = [test_feas.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) logging.debug('after run startup program') if checkpoint is not None: fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars( exe, pretrained_model, main_program=train_prog, predicate=if_exist) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = int(os.environ.get('CPU_NUM', 1)) assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size / devicenum test_batch_size = args.test_batch_size train_reader = paddle.batch( reader.train(args), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch( reader.test(args), batch_size=test_batch_size, drop_last=False) test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader) train_exe = fluid.ParallelExecutor( main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 train_py_reader.start() iter_no = 0 train_info = [0, 0, 0] while iter_no <= args.total_iter_num: t1 = time.time() lr, loss, feas, label = train_exe.run(fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += recall_topk(feas, label, k=1) train_info[2] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[2] avg_recall = train_info[1] / train_info[2] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "recall %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, data in enumerate(test_reader()): t1 = time.time() [feas] = exe.run(test_prog, fetch_list=test_fetch_list, feed=test_feeder.feed(data)) label = np.asarray([x[1] for x in data]) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path, main_program=train_prog) iter_no += 1
def automatic_gpu_usage(): # 在使用GPU机器时,可以将use_gpu变量设置成True use_gpu = True place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() return place
betch_num += 1 # print(avg_loss_val) # print(acc_val) # 计算每个测试集的平均损失 test_avg_loss = all_avg_loss / betch_num test_avg_acc = all_acc / betch_num return test_avg_loss, test_avg_acc # 2.数据处理 ----- MNIST手写字已经经过了数据处理 # 3.构建reder ----paddlepaddle里面自动写好了reader # 4.构建训练场所 place = fluid.CPUPlace() # 5.配置网络结构 # 两个数据层 # 特征值数据层 # shape图像的三阶张量形式 img = fluid.layers.data(name="img", shape=[1, 28, 28], dtype="float32") # 目标值数据层 # shape目标值的张量形式 label = fluid.layers.data(name="label", shape=[1], dtype="int64") # 多层感知机模型---两个隐层--一个输出层 ---fc网络(全连接网络) # 第一个隐层 # size神经元个数 h1 = fluid.layers.fc(input=img, size=128, act="relu", name="h1")
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() if args.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 drop_last = True dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL) data_loader.set_sample_generator(data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions(precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_vdl: if not args.vdl_log_dir: print_info("Please specify the log directory by --vdl_log_dir.") exit(1) from visualdl import LogWriter log_writer = LogWriter(args.vdl_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 best_mIoU = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError(( "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_vdl: log_writer.add_scalar('Train/mean_iou', mean_iou, step) log_writer.add_scalar('Train/mean_acc', mean_acc, step) log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/step/sec', speed, step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run(program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) step += 1 if step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, step, lr[0], avg_loss, speed, calculate_eta(all_step - step, speed))) if args.use_vdl: log_writer.add_scalar('Train/loss', avg_loss, step) log_writer.add_scalar('Train/lr', lr[0], step) log_writer.add_scalar('Train/speed', speed, step) sys.stdout.flush() avg_loss = 0.0 timer.restart() # NOTE : used for benchmark, profiler tools if args.is_profiler and epoch == 1 and step == args.log_steps: profiler.start_profiler("All") elif args.is_profiler and epoch == 1 and step == args.log_steps + 5: profiler.stop_profiler("total", args.profiler_path) return except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, epoch) save_infer_program(test_prog, ckpt_dir) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate(cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_vdl: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step) if mean_iou > best_mIoU: best_mIoU = mean_iou update_best_model(ckpt_dir) print_info( "Save best model {} to {}, mIoU = {:.4f}".format( ckpt_dir, os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), mean_iou)) # Use VisualDL to visualize results if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None: visualize(cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model if cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(train_prog, 'final') save_infer_program(test_prog, ckpt_dir)
import time import unittest import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import declarative, ProgramTranslator from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D from test_resnet import ResNet, optimizer_setting, SEED # NOTE: Reduce batch_size from 8 to 2 to avoid unittest timeout. batch_size = 2 epoch_num = 1 place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() \ else fluid.CPUPlace() program_translator = ProgramTranslator() if fluid.is_compiled_with_cuda(): fluid.set_flags({'FLAGS_cudnn_deterministic': True}) def train(to_static, build_strategy=None): """ Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode. """ with fluid.dygraph.guard(place): np.random.seed(SEED) paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED)
def test_deefcf(self): seed = 90 if DATA_PATH: (users_np, items_np, labels_np, num_users, num_items, matrix) = load_data(DATA_PATH) else: (users_np, items_np, labels_np, num_users, num_items, matrix) = get_data() startup = fluid.Program() startup.random_seed = seed main = fluid.Program() main.random_seed = seed scope = fluid.core.Scope() with new_program_scope(main=main, startup=startup, scope=scope): users = fluid.layers.data('users', [1], dtype='int32') items = fluid.layers.data('items', [1], dtype='int32') labels = fluid.layers.data('labels', [1], dtype='float32') deepcf = DeepCF('deepcf', num_users, num_items, matrix) prediction = deepcf(users, items) loss = fluid.layers.reduce_sum( fluid.layers.log_loss(prediction, labels)) adam = fluid.optimizer.AdamOptimizer(0.01) adam.minimize(loss) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(startup) for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break static_loss = exe.run( main, feed={ users.name: users_np[slice:slice + BATCH_SIZE], items.name: items_np[slice:slice + BATCH_SIZE], labels.name: labels_np[slice:slice + BATCH_SIZE] }, fetch_list=[loss])[0] sys.stderr.write('static loss %s\n' % static_loss) with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed deepcf = DeepCF('deepcf', num_users, num_items, matrix) adam = fluid.optimizer.AdamOptimizer(0.01) for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break prediction = deepcf( to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss = fluid.layers.reduce_sum( fluid.layers.log_loss( prediction, to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss.backward() adam.minimize(loss) deepcf.clear_gradients() dy_loss = loss.numpy() sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss)) with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed deepcf2 = DeepCF('deepcf', num_users, num_items, matrix) adam2 = fluid.optimizer.AdamOptimizer(0.01) backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break prediction2 = deepcf2( to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss2 = fluid.layers.reduce_sum( fluid.layers.log_loss( prediction2, to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss2.backward(backward_strategy) adam2.minimize(loss2) deepcf2.clear_gradients() dy_loss2 = loss2.numpy() sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss2)) self.assertEqual(static_loss, dy_loss) self.assertEqual(static_loss, dy_loss2)
with io.open("detection_bbox_result.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(dts_res))) print("start evaluate bbox using coco api") cocoDt = cocoGt.loadRes("detection_bbox_result.json") cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if cfg.MASK_ON: with io.open("detection_segms_result.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(segms_res))) print("start evaluate mask using coco api") cocoDt = cocoGt.loadRes("detection_segms_result.json") cocoEval = COCOeval(cocoGt, cocoDt, 'segm') cocoEval.evaluate() cocoEval.accumulate() if __name__ == '__main__': args = parse_args() print_arguments(args) place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if cfg.parallel else fluid.CUDAPlace(0) \ if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): eval()
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) reader = task_reader.MisspellingReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, tokenizer=args.tokenizer, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: if args.batch_size < args.max_seq_len: raise ValueError( 'if in_tokens=True, batch_size should greater than max_sqelen, got batch_size:%d seqlen:%d' % (args.batch_size, args.max_seq_len)) max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) log.info("Device count: %d" % dev_count) log.info("Num train examples: %d" % num_train_examples) log.info("Max train steps: %d" % max_train_steps) log.info("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) log.info("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: log.info( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.set_batch_generator(train_data_generator) else: train_exe = None if args.do_val or args.do_test: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) if args.do_train: train_pyreader.start() steps = 0 graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: fetch_list = [ graph_vars["num_infer"].name, graph_vars["num_label"].name, graph_vars["num_correct"].name, graph_vars["loss"].name, graph_vars['learning_rate'].name, ] out = train_exe.run(fetch_list=fetch_list) num_infer, num_label, num_correct, np_loss, np_lr = out lr = float(np_lr[0]) loss = np_loss.mean() precision, recall, f1 = calculate_f1( num_label, num_infer, num_correct) if args.verbose: log.info( "train pyreader queue size: %d, learning rate: %f" % (train_pyreader.queue.size(), lr if warmup_steps > 0 else args.learning_rate)) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin log.info( "epoch: %d, progress: %d/%d, step: %d, loss: %f, " "f1: %f, precision: %f, recall: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, loss, f1, precision, recall, args.skip_steps / used_time)) time_begin = time.time() if nccl2_trainer_id == 0 and steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) latest_path = os.path.join( args.checkpoints, "latest" ) # Always save the current copy and cover with the latest copy fluid.io.save_persistables(exe, save_path, train_program) fluid.io.save_persistables(exe, latest_path, train_program) if nccl2_trainer_id == 0 and steps % args.validation_steps == 0: # evaluate dev set if args.do_val: evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # evaluate test set if args.do_test: predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if nccl2_trainer_id == 0 and args.do_val: current_example, current_epoch = reader.get_train_progress() evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, 'final') if nccl2_trainer_id == 0 and args.do_test: current_example, current_epoch = reader.get_train_progress() predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, 'final')
def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): """ Distill the Head Features, so as to perform transfer learning. Args: num_classes (int): number of classes. trainable (bool): whether to set parameters trainable. pretrained (bool): whether to load default pretrained model. phase (str): optional choices are 'train' and 'predict'. Returns: inputs(dict): the input variables. outputs(dict): the output variables. context_prog (Program): the program to execute transfer learning. """ context_prog = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): var_prefix = '@HUB_{}@'.format(self.name) # image image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32', lod_level=0) # im_info im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) # backbone backbone = ResNet(norm_type='affine_channel', freeze_at=2, norm_decay=0., depth=50, feature_maps=[3, 4, 5]) body_feats = backbone(image) # retina_head retina_head = RetinaHead( anchor_generator=AnchorGenerator( aspect_ratios=[1.0, 2.0, 0.5], variance=[1.0, 1.0, 1.0, 1.0]), target_assign=RetinaTargetAssign(positive_overlap=0.5, negative_overlap=0.4), output_decoder=RetinaOutputDecoder(score_thresh=0.05, nms_thresh=0.5, pre_nms_top_n=1000, detections_per_im=100, nms_eta=1.0), num_convs_per_octave=4, num_chan=256, max_level=7, min_level=3, prior_prob=0.01, base_scale=4, num_scales_per_octave=3) # fpn fpn = FPN(max_level=7, min_level=3, num_chan=256, spatial_scale=[0.03125, 0.0625, 0.125], has_extra_convs=True) # body_feats body_feats, spatial_scale = fpn.get_output(body_feats) # inputs, outputs, context_prog inputs = { 'image': var_prefix + image.name, 'im_info': var_prefix + im_info.name } if phase == 'predict': pred = retina_head.get_prediction(body_feats, spatial_scale, im_info) outputs = {'bbox_out': var_prefix + pred.name} else: outputs = { 'body_features': [ var_prefix + var.name for key, var in body_feats.items() ] } # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(fluid.default_startup_program(), var_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] for key, value in outputs.items() } place = fluid.CPUPlace() exe = fluid.Executor(place) for param in context_prog.global_block().iter_parameters(): param.trainable = trainable if pretrained: def _if_exist(var): return os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) return inputs, outputs, context_prog
def test_slowfast(args): config = parse_config(args.config_file) test_config = merge_configs(config, 'test', vars(args)) print_configs(test_config, "Test") if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) _nranks = ParallelEnv().nranks # num gpu bs_single = int(test_config.TEST.batch_size / _nranks) # batch_size of each gpu with fluid.dygraph.guard(place): #build model slowfast = SlowFast(cfg=test_config, num_classes=400) if args.weights: assert os.path.exists(args.weights + '.pdparams'),\ "Given weight dir {} not exist.".format(args.weights) logger.info('load test weights from {}'.format(args.weights)) model_dict, _ = fluid.load_dygraph(args.weights) slowfast.set_dict(model_dict) if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() slowfast = fluid.dygraph.parallel.DataParallel( slowfast, strategy, find_unused_parameters=False) #create reader test_data = KineticsDataset(mode="test", cfg=test_config) test_sampler = DistributedBatchSampler(test_data, batch_size=bs_single, shuffle=False, drop_last=False) test_loader = DataLoader(test_data, batch_sampler=test_sampler, places=place, feed_list=None, num_workers=8, return_list=True) # start eval num_ensemble_views = test_config.TEST.num_ensemble_views num_spatial_crops = test_config.TEST.num_spatial_crops num_cls = test_config.MODEL.num_classes num_clips = num_ensemble_views * num_spatial_crops num_videos = len(test_data) // num_clips video_preds = np.zeros((num_videos, num_cls)) video_labels = np.zeros((num_videos, 1), dtype="int64") clip_count = {} print( "[EVAL] eval start, number of videos {}, total number of clips {}". format(num_videos, num_clips * num_videos)) slowfast.eval() for batch_id, data in enumerate(test_loader): # call net model_inputs = [data[0], data[1]] preds = slowfast(model_inputs, training=False) labels = data[2] clip_ids = data[3] # gather mulit card, results of following process in each card is the same. if _nranks > 1: preds = _all_gather(preds, _nranks) labels = _all_gather(labels, _nranks) clip_ids = _all_gather(clip_ids, _nranks) # to numpy preds = preds.numpy() labels = labels.numpy().astype("int64") clip_ids = clip_ids.numpy() # preds ensemble for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // num_clips ts_idx = int(clip_ids[ind]) % num_clips if vid_id not in clip_count: clip_count[vid_id] = [] if ts_idx in clip_count[vid_id]: print( "[EVAL] Passed!! read video {} clip index {} / {} repeatedly." .format(vid_id, ts_idx, clip_ids[ind])) else: clip_count[vid_id].append(ts_idx) video_preds[vid_id] += preds[ind] # ensemble method: sum if video_labels[vid_id].sum() > 0: assert video_labels[vid_id] == labels[ind] video_labels[vid_id] = labels[ind] if batch_id % args.log_interval == 0: print("[EVAL] Processing batch {}/{} ...".format( batch_id, len(test_data) // test_config.TEST.batch_size)) # check clip index of each video for key in clip_count.keys(): if len(clip_count[key]) != num_clips or sum( clip_count[key]) != num_clips * (num_clips - 1) / 2: print( "[EVAL] Warning!! video [{}] clip count [{}] not match number clips {}" .format(key, clip_count[key], num_clips)) video_preds = to_variable(video_preds) video_labels = to_variable(video_labels) acc_top1 = fluid.layers.accuracy(input=video_preds, label=video_labels, k=1) acc_top5 = fluid.layers.accuracy(input=video_preds, label=video_labels, k=5) print('[EVAL] eval finished, avg_acc1= {}, avg_acc5= {} '.format( acc_top1.numpy(), acc_top5.numpy()))
def check_network_convergence(cls, method, use_cuda=True, iter=5, batch_size=None, feed_dict=None, feed_data_reader=None, get_data_from_feeder=None, use_parallel_executor=True, use_reduce=False, use_ir_memory_optimize=True, enable_inplace=True, fuse_elewise_add_act_ops=False, fuse_all_optimizer_ops=False, fuse_all_reduce_ops=False, fuse_relu_depthwise_conv=False, optimizer=fluid.optimizer.Adam, use_fast_executor=False, enable_sequential_execution=False): def run_executor(exe, binary, feed, fetch_list): if feed_data_reader is None: res = exe.run(binary, feed=feed, fetch_list=fetch_list) else: res = exe.run(binary, feed=feed_data_reader.get_next(exe, binary), fetch_list=fetch_list) return res if feed_data_reader is not None: assert isinstance( feed_data_reader, FeedDataReader ), "feed_data_reader must be type of FeedDataReader" paddle.seed(1) paddle.framework.random._manual_program_seed(1) main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): feed_dict, loss = cls.build_model(feed_dict, get_data_from_feeder, main, method, optimizer) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) build_strategy, exec_strategy = cls.set_strategy( enable_inplace, enable_sequential_execution, fuse_all_optimizer_ops, fuse_all_reduce_ops, fuse_elewise_add_act_ops, fuse_relu_depthwise_conv, use_fast_executor, use_ir_memory_optimize, use_reduce, use_cuda) if use_parallel_executor: binary = compiler.CompiledProgram(main).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: binary = main if batch_size is not None: batch_size *= fluid.core.get_cuda_device_count( ) if use_cuda else int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) begin = time.time() first_loss, = run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) for _ in range(iter): run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[]) last_loss, = run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) end = time.time() if batch_size is not None: print("%.4f Instance per second" % ((batch_size * iter + 2) / (end - begin))) avg_last_loss_val = np.array(last_loss).mean() avg_first_loss_val = np.array(first_loss).mean() if math.isnan(float(avg_last_loss_val)) or math.isnan( float(avg_first_loss_val)): sys.exit("got NaN loss, training failed.") print(first_loss, last_loss) # self.assertGreater(first_loss[0], last_loss[0]) return first_loss, last_loss
return fluid.optimizer.SGD(learning_rate=0.0001) def train_program(): # 定义网络 x = fluid.layers.data(name="x", shape=[1], dtype='float32') y = fluid.layers.data(name="y", shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) # 定义损失函数 cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) return avg_cost use_cuda = False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_program) #定义训练数据 train_data = [(x, x*2+1) for x in range(0, 100)] train_data = np.asarray(train_data, dtype=np.float32) feed_order=['x', 'y'] train_reader = paddle.batch( paddle.reader.creator.np_array(train_data), batch_size=2)
def train(conf_dict, data_reader, use_cuda=False): """ P分类模型训练 """ label_dict_len = data_reader.get_dict_size('label_dict') # 输入层 word = fluid.layers.data( name='word_data', shape=[1], dtype='int64', lod_level=1) postag = fluid.layers.data( name='token_pos', shape=[1], dtype='int64', lod_level=1) # 标签 target = fluid.layers.data( name='target', shape=[label_dict_len], dtype='float32', lod_level=0) # NN:词嵌入+ lstm + 池化 feature_out = p_model.db_lstm(data_reader, word, postag, conf_dict)#模型导入,这里面存放着一个网络 print("词嵌入+ lstm + 池化") # 多标签分类的损失函数 class_cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=feature_out, \ label=target)#sigmoid逻辑交叉熵损失,模型导入2 avg_cost = fluid.layers.mean(class_cost)#平均交叉熵损失,模型导入3,也是网络 #优化方法(sgd,adam) sgd_optimizer = fluid.optimizer.AdamOptimizer( learning_rate=2e-3, ) sgd_optimizer.minimize(avg_cost)#模型导入4,这里是优化了的网络 train_batch_reader = paddle.batch( paddle.reader.shuffle(data_reader.get_train_reader(), buf_size=8192), batch_size=conf_dict['batch_size'])#里面存放的是函数 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()#c/gpu feeder = fluid.DataFeeder(feed_list=[word, postag, target], place=place)#数据反馈 exe = fluid.Executor(place) save_dirname = conf_dict['p_model_save_dir'] def train_loop(main_program, trainer_id=0): """开始训练""" exe.run(fluid.default_startup_program())#开始 start_time = time.time()#计时 batch_id = 0#统计开始 for pass_id in six.moves.xrange(conf_dict['pass_num']):#xrange返回的是一个个数据,目前是100 pass_start_time = time.time()#计时 cost_sum, cost_counter = 0, 0 for data in train_batch_reader():#173 # print(data_reader.get_train_reader()) cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost])#损失值,模型导入4,这里是程序的执行程序 """fetch_list是返回值,这里使用avg_cost可以直接得到cost的平均值的返回值""" cost = cost[0] cost_sum += cost#损失值结果统计 cost_counter += 1#损失值次数统计 if batch_id % 10 == 0 and batch_id != 0: print("batch %d finished, second per batch: %02f" % ( batch_id, (time.time() - start_time) / batch_id),file=sys.stderr) # 根据损失值大小决定要不要结束训练 if float(cost) < 0.01: pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0 print( "%d pass end, cost time: %02f, avg_cost: %f" % ( pass_id, time.time() - pass_start_time, pass_avg_cost),file=sys.stderr) #大概一批次为3分钟,跑一轮大概9个小时 save_path = os.path.join(save_dirname, 'final') fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'], [feature_out], exe, params_filename='params') return batch_id = batch_id + 1 # 每次传递结束后保存模型 pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0 print( "%d pass end, cost time: %02f, avg_cost: %f" % ( pass_id, time.time() - pass_start_time, pass_avg_cost),file=sys.stderr) save_path = os.path.join(save_dirname, 'pass_%04d-%f' % (pass_id, pass_avg_cost)) fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'], [feature_out], exe, params_filename='params') else: # 通过时间结束,训练结束,保存模型 save_path = os.path.join(save_dirname, 'final') fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'], [feature_out], exe, params_filename='params') return train_loop(fluid.default_main_program())
def check_output_with_option(self, use_gpu, atol=1e-5, flatten=False, quant=False, rtol=1e-5): ''' Check whether calculating on CPU and GPU, enable TensorRT or disable TensorRT, enable MKLDNN or disable MKLDNN are all the same. ''' place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() executor = fluid.Executor(place) scope = fluid.Scope() device = "GPU" if use_gpu else "CPU" with fluid.scope_guard(scope): executor.run(self.startup_program) self._save_models(self.path, list(self.feeds.keys()), self.fetch_list, executor, self.main_program, scope) paddle_outs = self._get_paddle_outs(executor, self.main_program, scope) inference_outs = self._get_inference_outs( self._get_analysis_config(use_gpu=use_gpu)) # Check whether the results calculated on CPU and on GPU are the same. self.assertTrue( len(paddle_outs) == len(inference_outs), "The number of outputs is different between inference and training forward at {}". format(device)) for out, inference_out in zip(paddle_outs, inference_outs): paddle_out = np.array(out) if flatten: paddle_out = paddle_out.flatten() inference_out = inference_out.flatten() self.assertTrue( np.allclose( paddle_out, inference_out, atol=atol), "Output has diff between inference and training forward at {} ". format(device)) # Check whether the trt results and the GPU results are the same. if use_gpu and self.enable_trt: tensorrt_outputs = self._get_inference_outs( self._get_analysis_config( use_gpu=use_gpu, use_trt=self.enable_trt)) if self.trt_parameters.use_static: #deserialize tensorrt_outputs = self._get_inference_outs( self._get_analysis_config( use_gpu=use_gpu, use_trt=self.enable_trt)) self.assertTrue( len(tensorrt_outputs) == len(paddle_outs), "The number of outputs is different between GPU and TensorRT. ") for paddle_out, tensorrt_output in zip(paddle_outs, tensorrt_outputs): paddle_out = np.array(paddle_out) if flatten: paddle_out = paddle_out.flatten() tensorrt_output = tensorrt_output.flatten() self.assertTrue( np.allclose( paddle_out, tensorrt_output, rtol=rtol, atol=atol), "Output has diff between GPU and TensorRT. ") # Check whether the mkldnn results and the CPU results are the same. if (not use_gpu) and self.enable_mkldnn: mkldnn_outputs = self._get_inference_outs( self._get_analysis_config( use_gpu=use_gpu, use_mkldnn=self.enable_mkldnn)) self.assertTrue( len(paddle_outs) == len(mkldnn_outputs), "The number of outputs is different between CPU and MKLDNN. ") if self.enable_mkldnn_bfloat16: atol = 0.01 for paddle_out, mkldnn_output in zip(paddle_outs, mkldnn_outputs): self.assertTrue( np.allclose( np.array(paddle_out), mkldnn_output, atol=atol), "Output has diff between CPU and MKLDNN. ")
def compress(args): image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] image = fluid.data(name='image', shape=[None] + image_shape, dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=1000) # print(out) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone() # quantization usually use small learning rate values = [1e-4, 1e-5] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay(boundaries=[5000 * 12], values=values), regularization=fluid.regularizer.L2Decay(1e-4)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: assert os.path.exists( args.pretrained_model), "pretrained_model path doesn't exist" def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) val_feed_list = [('image', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feed_list = [('image', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, teacher_programs=[], train_optimizer=opt, prune_infer_model=[[image.name], [out.name]], distiller_optimizer=None) com_pass.config(args.config_file) com_pass.run() conv_op_num = 0 fake_quant_op_num = 0 for op in com_pass.context.eval_graph.ops(): if op._op.type == 'conv2d': conv_op_num += 1 elif op._op.type.startswith('fake_quantize'): fake_quant_op_num += 1 print('conv op num {}'.format(conv_op_num)) print('fake quant op num {}'.format(fake_quant_op_num))
def train(): if cfg.debug: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 random.seed(0) np.random.seed(0) if not os.path.exists(cfg.model_save_dir): os.makedirs(cfg.model_save_dir) model = YOLOv3() model.build_model() input_size = cfg.input_size loss = model.loss() loss.persistable = True devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) print("Found {} CUDA devices.".format(devices_num)) learning_rate = cfg.learning_rate boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] optimizer = fluid.optimizer.Momentum( learning_rate=exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor), regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrain: if not os.path.exists(cfg.pretrain): print("Pretrain weights not found: {}".format(cfg.pretrain)) def if_exist(var): return os.path.exists(os.path.join(cfg.pretrain, var.name)) fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = True compile_program = fluid.compiler.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) random_sizes = [cfg.input_size] if cfg.random_shape: random_sizes = [32 * i for i in range(10, 20)] total_iter = cfg.max_iter - cfg.start_iter mixup_iter = total_iter - cfg.no_mixup_iter train_reader = reader.train(input_size, batch_size=cfg.batch_size, shuffle=True, total_iter=total_iter * devices_num, mixup_iter=mixup_iter * devices_num, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) fetch_list = [loss] py_reader.start() smoothed_loss = SmoothedValue() try: start_time = time.time() prev_start_time = start_time snapshot_loss = 0 snapshot_time = 0 for iter_id in range(cfg.start_iter, cfg.max_iter): prev_start_time = start_time start_time = time.time() losses = exe.run(compile_program, fetch_list=[v.name for v in fetch_list]) smoothed_loss.add_value(np.mean(np.array(losses[0]))) snapshot_loss += np.mean(np.array(losses[0])) snapshot_time += start_time - prev_start_time lr = np.array( fluid.global_scope().find_var('learning_rate').get_tensor()) print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( iter_id, lr[0], smoothed_loss.get_mean_value(), start_time - prev_start_time)) sys.stdout.flush() if (iter_id + 1) % cfg.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) print("Snapshot {} saved, average loss: {}, \ average time: {}".format( iter_id + 1, snapshot_loss / float(cfg.snapshot_iter), snapshot_time / float(cfg.snapshot_iter))) snapshot_loss = 0 snapshot_time = 0 except fluid.core.EOFException: py_reader.reset() save_model('model_final')
def start_knowledge_service(self, feed_list, schema, program, reader_config, exe, buf_size=10, times=1): """ Start the knowledge service to generate and transfer knowledge data. In GPU mode, the devices to execute knowledge prediction will be determined by environment variable **FLAGS_selected_gpus**, or by **CUDA_VISIBLE_DEVICES** if it is not set, and by **CPU_NUM** (default 1) in CPU mode. Only supported in static graph. Args: feed_list (list): A list of feed Variables or their names for the input program. schema (dict): A dictionary to specify names and fetched Variables of knowledge. program (fluid.Program): Inference program for the teacher model. reader_config (dict): The config for data reader. Support all the three types of generators used by `fluid.io.PyReader` and `fluid.io.DataLoader`, and their configs contain the key-value pair of the generator type and a generator object, plus other necessary argument pairs. See the following: 1) sample generator: reader_config={"sample_generator": #some_sample_generator, "batch_size": #batch_size, "drop_last": #drop_last}, 'drop_last' set to True by default, 2) sample list generator: reader_config={"sample_list_generator": #some_sample_list_generator}, 3) batch generator: reader_config={"batch_generator": #some_batch_genrator}. The trial to parse config will be in the order of 1) -> 3), and any other unrelated keys in these configs will be ignored. exe (fluid.Executor): The executor to run the input program. buf_size (int): The size of buffers for data reader and knowledge writer on each device. times (int): The maximum repeated serving times. Default 1. Whenever the public method 'get_knowledge_generator()' in Student object called once, the serving times will be added one, until reaching the maximum and ending the service. """ if not self._started: raise ValueError("The method start() should be called first!") if not isinstance(program, fluid.Program): raise ValueError( "Input argument 'program' should be a fluid Program!") self._program = program._inference_optimize(prune_read_op=True) if not isinstance(feed_list, list): raise ValueError("Input argument 'feed_list' should be a list!") else: self._feed_list = [] for feed in feed_list: if isinstance(feed, fluid.framework.Variable): self._feed_list.append(feed) elif isinstance(feed, str) or isinstance(feed, unicode): self._feed_list.append( self._program.global_block().var(feed)) else: raise ValueError( "Input 'feed_list' should consist of feed " "Variables or their names!") if not isinstance(schema, dict) and not isinstance( schema, OrderedDict): raise ValueError( "Input argument 'schema' should be a dict or OrderedDict!") self._schema = schema if not isinstance(reader_config, dict): raise ValueError("The reader config must be a dictionary!") if not isinstance(exe, fluid.Executor): raise ValueError("Input argument should be a fluid Executor!") self._exe = exe if not buf_size > 0: raise ValueError("The buffer size should be positive!") self._buf_size = buf_size if not times > 0: raise ValueError("Repeated serving times should be positive!") self._times = times desc = {} for name, var in schema.items(): if not isinstance(var, fluid.framework.Variable): raise ValueError( "The member of schema must be fluid Variable.") desc[name] = { "shape": var.shape, "dtype": convert_dtype(var.dtype), "lod_level": var.lod_level } if not self._knowledge_desc: self._knowledge_desc = desc else: if self._out_file and not self._knowledge_desc == desc: raise ValueError("The knowledge description should be kept " "consistent in offline mode!") if isinstance(self._exe.place, fluid.CUDAPlace): places = fluid.cuda_places() else: places = fluid.cpu_places() dev_count = len(places) data_loader = fluid.io.DataLoader.from_generator( feed_list=self._feed_list, capacity=self._buf_size * dev_count, use_double_buffer=(dev_count == 1), iterable=True) places = [fluid.CPUPlace()] if dev_count > 1 else [self._exe.place] if "sample_generator" in reader_config: if "batch_size" not in reader_config: raise ValueError("batch size must be specified when using " "sample generator!") sample_generator = reader_config["sample_generator"] batch_size = reader_config["batch_size"] drop_last = reader_config[ "drop_last"] if "drop_last" in reader_config else True data_loader.set_sample_generator(reader=sample_generator, batch_size=batch_size, drop_last=drop_last, places=places) elif "sample_list_generator" in reader_config: sample_list_generator = reader_config["sample_list_generator"] data_loader.set_sample_list_generator(reader=sample_list_generator, places=places) elif "batch_generator" in reader_config: batch_generator = reader_config["batch_generator"] data_loader.set_batch_generator(reader=batch_generator, places=places) else: raise ValueError( "The reader config doesn't contain any valid " "generator type, which should be one of 'sample_generator', " "'sample_list_generator', and 'batch_generator'.") def writer(buf_queue, schema_keys): samples_sent, batches_sent = 0, 0 while True: outputs = buf_queue.get() buf_queue.task_done() if not isinstance(outputs, EndSignal): batch_samples = dict(zip(schema_keys, outputs)) if self._knowledge_queue: self._knowledge_queue.put(batch_samples) if self._out_file: self._out_file.write(pickle.dumps(batch_samples)) else: if self._knowledge_queue: self._knowledge_queue.put(EndSignal()) # Asynchronous output out_buf_queue = Queue.Queue(self._buf_size) schema_keys, schema_vars = zip(*self._schema.items()) out_thread = Thread(target=writer, args=(out_buf_queue, schema_keys)) out_thread.daemon = True out_thread.start() compiled_program = fluid.compiler.CompiledProgram( self._program).with_data_parallel() print("Knowledge description {}".format(self._knowledge_desc)) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + " Teacher begins to serve ...") # For offline dump, write the knowledge description to the head of file if self._out_file: self._out_file.write(pickle.dumps(self._knowledge_desc)) print("output path: %s" % self._out_path) data_reader = MixedDataReader(data_loader, dev_count) # For online mode, send knowledge description every time for repeated in range(self._times): if self._knowledge_queue: # wait for the accessing of knowledge desc and data while True: if self._sync_required: self._knowledge_queue.put(SyncSignal()) self._knowledge_queue.put(self._knowledge_desc) self._sync_required = False if self._data_required: self._data_required = False break self._knowledge_queue.join() print("No.{} time serving ... ".format(repeated)) num_batches_sent = 0 for dev_batches in data_reader.multi_dev_generator(): if self._sync_required: break outputs = self._exe.run(compiled_program, feed=dev_batches, fetch_list=schema_vars) out_buf_queue.put(outputs) num_batches_sent += dev_count if num_batches_sent % (100 * dev_count) == 0: log = "Processed {} batch samples.".format( num_batches_sent) if self._knowledge_queue: log += " Knowledge queue size {}.".format( self._knowledge_queue.qsize()) print(log) outputs = [] for index, batch in enumerate(data_reader.tail_generator()): if self._sync_required: break output = self._exe.run(self._program, feed=batch, fetch_list=schema_vars) if outputs: outputs = [ np.concatenate((outs, out), axis=0) for (outs, out) in zip(outputs, output) ] else: outputs = copy.deepcopy(output) if outputs: out_buf_queue.put(outputs) num_batches_sent += (index + 1) print("Processed {} batch samples in total.".format( num_batches_sent)) out_buf_queue.put(EndSignal()) out_buf_queue.join() if self._knowledge_queue: self._knowledge_queue.join() print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + " Teacher ends serving.")
def main(args): dataset = load(args.dataset) # normalize indegree = dataset.graph.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() test_program = fluid.Program() hidden_size = 16 with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.GraphWrapper( name="graph", place=place, node_feat=dataset.graph.node_feat_info()) output = pgl.layers.gcn(gw, gw.node_feat["words"], hidden_size, activation="relu", norm=gw.node_feat['norm'], name="gcn_layer_1") output = fluid.layers.dropout( output, 0.5, dropout_implementation='upscale_in_train') output = pgl.layers.gcn(gw, output, dataset.num_classes, activation=None, norm=gw.node_feat['norm'], name="gcn_layer_2") node_index = fluid.layers.data( "node_index", shape=[None, 1], dtype="int64", append_batch_size=False) node_label = fluid.layers.data( "node_label", shape=[None, 1], dtype="int64", append_batch_size=False) pred = fluid.layers.gather(output, node_index) loss, pred = fluid.layers.softmax_with_cross_entropy( logits=pred, label=node_label, return_softmax=True) acc = fluid.layers.accuracy(input=pred, label=node_label, k=1) loss = fluid.layers.mean(loss) test_program = train_program.clone(for_test=True) with fluid.program_guard(train_program, startup_program): adam = fluid.optimizer.Adam( learning_rate=1e-2, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005)) adam.minimize(loss) exe = fluid.Executor(place) exe.run(startup_program) feed_dict = gw.to_feed(dataset.graph) train_index = dataset.train_index train_label = np.expand_dims(dataset.y[train_index], -1) train_index = np.expand_dims(train_index, -1) val_index = dataset.val_index val_label = np.expand_dims(dataset.y[val_index], -1) val_index = np.expand_dims(val_index, -1) test_index = dataset.test_index test_label = np.expand_dims(dataset.y[test_index], -1) test_index = np.expand_dims(test_index, -1) dur = [] for epoch in range(200): if epoch >= 3: t0 = time.time() feed_dict["node_index"] = np.array(train_index, dtype="int64") feed_dict["node_label"] = np.array(train_label, dtype="int64") train_loss, train_acc = exe.run(train_program, feed=feed_dict, fetch_list=[loss, acc], return_numpy=True) if epoch >= 3: time_per_epoch = 1.0 * (time.time() - t0) dur.append(time_per_epoch) feed_dict["node_index"] = np.array(val_index, dtype="int64") feed_dict["node_label"] = np.array(val_label, dtype="int64") val_loss, val_acc = exe.run(test_program, feed=feed_dict, fetch_list=[loss, acc], return_numpy=True) log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) + "Train Loss: %f " % train_loss + "Train Acc: %f " % train_acc + "Val Loss: %f " % val_loss + "Val Acc: %f " % val_acc) feed_dict["node_index"] = np.array(test_index, dtype="int64") feed_dict["node_label"] = np.array(test_label, dtype="int64") test_loss, test_acc = exe.run(test_program, feed=feed_dict, fetch_list=[loss, acc], return_numpy=True) log.info("Accuracy: %f" % test_acc)