def decorate_with_data_loader(self): main_prog = paddle.static.Program() start_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, start_prog): with paddle.fluid.unique_name.guard(): image = fluid.layers.data( name='image', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') py_reader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=4, iterable=False, use_double_buffer=False) zero_var = fluid.layers.fill_constant( shape=[1], dtype='int64', value=0) one_var = fluid.layers.fill_constant( shape=[1], dtype='int64', value=1) with fluid.layers.control_flow.Switch() as switch: with switch.case(label != zero_var): fluid.layers.assign(input=zero_var, output=label) with switch.default(): fluid.layers.assign(input=one_var, output=label) net = resnet_cifar10(image) logits = fluid.layers.fc(input=net, size=10, act="softmax") block = main_prog.global_block() for op in block.ops: if op.type == "mul": op._set_attr('in_dtype', fluid.core.VarDesc.VarType.FP32) op._set_attr('out_dtype', fluid.core.VarDesc.VarType.FP32) op._set_attr('dtype', fluid.core.VarDesc.VarType.FP32) cast_model_to_fp16(main_prog, use_fp16_guard=False)
def _infer_pure_fp16_program(self): """ Lazy initialized property of _infer_pure_fp16_program. """ infer_pure_fp16_program = self._origin_main_program.clone() with program_guard(infer_pure_fp16_program): cast_model_to_fp16(infer_pure_fp16_program, self._amp_list, use_fp16_guard=False) return infer_pure_fp16_program
def create_model(architecture, image, classes_num, config, is_train): """ Create a model Args: architecture(dict): architecture information, name(such as ResNet50) is needed image(variable): model input variable classes_num(int): num of classes config(dict): model config Returns: out(variable): model output variable """ use_pure_fp16 = config.get("use_pure_fp16", False) name = architecture["name"] params = architecture.get("params", {}) data_format = "NCHW" if "data_format" in config: params["data_format"] = config["data_format"] data_format = config["data_format"] input_image_channel = config.get('image_shape', [3, 224, 224])[0] if input_image_channel != 3: logger.warning( "Input image channel is changed to {}, maybe for better speed-up". format(input_image_channel)) params["input_image_channel"] = input_image_channel if "is_test" in params: params['is_test'] = not is_train model = architectures.__dict__[name](class_dim=classes_num, **params) if use_pure_fp16 and not config.get("use_dali", False): image = image.astype('float16') if data_format == "NHWC": image = paddle.tensor.transpose(image, [0, 2, 3, 1]) image.stop_gradient = True out = model(image) if config.get("use_pure_fp16", False): cast_model_to_fp16(paddle.static.default_main_program()) out = out.astype('float32') return out
def train(use_pure_fp16=True, use_nesterov=False): classdim = 10 data_shape = [3, 32, 32] BATCH_SIZE = 128 PASS_NUM = 1 train_program = fluid.Program() startup_prog = fluid.Program() train_program.random_seed = 123 startup_prog.random_seed = 456 with fluid.program_guard(train_program, startup_prog): images = fluid.layers.data( name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = resnet_cifar10(images, 32) logits = fluid.layers.fc(input=net, size=classdim, act="softmax") if use_pure_fp16: cast_model_to_fp16(fluid.default_main_program()) logits_fp32 = fluid.layers.cast(x=logits, dtype="float32") else: logits_fp32 = logits cost = fluid.layers.softmax_with_cross_entropy( logits_fp32, label, return_softmax=False) sum_cost = fluid.layers.reduce_sum(cost) # Test program test_program = train_program.clone(for_test=True) optimizer = fluid.contrib.optimizer.Momentum( learning_rate=0.001, momentum=0.9, use_nesterov=use_nesterov, regularization=fluid.regularizer.L2Decay(1e-4), multi_precision=use_pure_fp16, rescale_grad=1.0 / BATCH_SIZE) optimizer.minimize(sum_cost) # no shuffle for unit test train_reader = paddle.batch( paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) def train_loop(main_program): exe.run(startup_prog) if use_pure_fp16: cast_parameters_to_fp16(place, train_program, fluid.global_scope()) compiled_program = compile(train_program, sum_cost.name) loss = 0.0 for pass_id in range(PASS_NUM): train_loss_list = [] for batch_id, data in enumerate(train_reader()): loss, = exe.run(compiled_program, feed=feeder.feed(data), fetch_list=[sum_cost]) print('PassID {0:1}, Train Batch ID {1:04}, train loss {2:2.4}'. format(pass_id, batch_id + 1, float(loss))) train_loss_list.append(float(loss)) if batch_id >= 4: # For speeding up CI test_loss_list = [] for tid, test_data in enumerate(test_reader()): loss_t, = exe.run(program=test_program, feed=feeder.feed(test_data), fetch_list=[sum_cost]) test_loss_list.append(float(loss_t)) print( 'PassID {0:1}, Test Batch ID {1:04}, test loss {2:2.4}'. format(pass_id, tid + 1, float(loss_t))) if tid >= 4: break # For speeding up CI return train_loss_list, test_loss_list return train_loop(train_program)