Exemplo n.º 1
0
 def _get_grad_vartype(self, name):
     assert self.program_desc is not None
     grad_name = name + core.grad_var_suffix()
     for i in six.moves.range(self.program_desc.num_blocks()):
         block = self.program_desc.block(i)
         var_desc = block.find_var_recursive(cpt.to_bytes(grad_name))
         return var_desc.type() if var_desc is not None else None
Exemplo n.º 2
0
def _load_persistable_vars_by_program(model_path,
                                      program_holder,
                                      params_filename=None):
    # make sure the path has been checked
    persistable_vars = _get_persistable_vars(program_holder.infer_program)
    load_var_dict = {}
    for each_var in persistable_vars:
        orig_each_name = program_holder._suffix_varname_dict[each_var.name()]
        if _is_parameter(each_var, program_holder.infer_program):
            # create output varbase
            new_var = framework.ParamBase(shape=each_var.shape(),
                                          dtype=each_var.dtype(),
                                          name=each_var.name(),
                                          type=each_var.type(),
                                          persistable=True)
        else:
            new_var = framework._varbase_creator(type=each_var.type(),
                                                 name=each_var.name(),
                                                 shape=each_var.shape(),
                                                 dtype=each_var.dtype(),
                                                 persistable=True)
        if params_filename is None:
            framework._dygraph_tracer().trace_op(
                type='load',
                inputs={},
                outputs={'Out': new_var},
                attrs={'file_path': os.path.join(model_path, orig_each_name)})
        new_var.stop_gradient = False
        load_var_dict[each_var.name()] = new_var

    if params_filename is not None:
        load_var_list = []
        for name in sorted(load_var_dict.keys()):
            load_var_list.append(load_var_dict[name])

        framework._dygraph_tracer().trace_op(
            type='load_combine',
            inputs={},
            outputs={'Out': load_var_list},
            attrs={'file_path': os.path.join(model_path, params_filename)})

        for each_var in persistable_vars:
            if not _is_parameter(each_var, program_holder.infer_program):
                continue
            param = load_var_dict[each_var.name()]
            param.stop_gradient = False

    # NOTE: [Recovery stop gradient information based on the program]
    # After loading the model, the stop_gradient information
    # of the original variable is lost, but if a parameter does not
    # have a corresponding @GRAD variable in the backward program,
    # it can be said that it is also stop_gradient
    all_var_names = _get_all_var_names(program_holder.train_program)
    for var_name in load_var_dict:
        grad_var_name = var_name + core.grad_var_suffix()
        if grad_var_name not in all_var_names:
            load_var_dict[var_name].stop_gradient = True

    return load_var_dict
        def run_dygraph():
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            ocr_attention = OCRAttention()

            if Config.learning_rate_decay == "piecewise_decay":
                learning_rate = fluid.layers.piecewise_decay(
                    [50000], [Config.LR, Config.LR * 0.01])
            else:
                learning_rate = Config.LR
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001, parameter_list=ocr_attention.parameters())
            dy_param_init_value = {}
            for param in ocr_attention.parameters():
                dy_param_init_value[param.name] = param.numpy()
            for epoch in range(epoch_num):
                for batch_id in range(batch_num):
                    label_in = to_variable(label_in_np)
                    label_out = to_variable(label_out_np)
                    label_out.stop_gradient = True
                    img = to_variable(image_np)
                    dy_prediction = ocr_attention(img, label_in)
                    label_out = fluid.layers.reshape(label_out, [-1, 1],
                                                     inplace=False)
                    dy_prediction = fluid.layers.reshape(
                        dy_prediction, [label_out.shape[0], -1], inplace=False)
                    loss = fluid.layers.cross_entropy(input=dy_prediction,
                                                      label=label_out)
                    avg_loss = fluid.layers.reduce_sum(loss)

                    dy_out = avg_loss.numpy()

                    if epoch == 0 and batch_id == 0:
                        for param in ocr_attention.parameters():
                            if param.name not in dy_param_init_value:
                                dy_param_init_value[param.name] = param.numpy()
                    avg_loss.backward()
                    dy_grad_value = {}
                    for param in ocr_attention.parameters():
                        if param.trainable:
                            np_array = np.array(
                                param._grad_ivar().value().get_tensor())
                            dy_grad_value[param.name +
                                          core.grad_var_suffix()] = np_array

                    optimizer.minimize(avg_loss)
                    ocr_attention.clear_gradients()
                    dy_param_value = {}
                    for param in ocr_attention.parameters():
                        dy_param_value[param.name] = param.numpy()

            return dy_out, dy_param_init_value, dy_param_value
Exemplo n.º 4
0
 def _set_grad_type(self, params, train_program):
     # NOTE: if user set sparse gradient mode, the param's gradient
     # will be SelectedRows, not LoDTensor. But tracer will just
     # set param grad VarBase by forward VarBase(LoDTensor)
     # If we don't change grad_var type here, RunProgramOp need
     # transform SelectedRows to LoDTensor forcibly, it may not
     # be user wanted result.
     for param in params:
         grad_name = param.name + core.grad_var_suffix()
         grad_var = train_program.desc.block(0).find_var(
             cpt.to_bytes(grad_name))
         # NOTE: cannot find var desc maybe no problem, such as in batch_norm
         if grad_var is None:
             continue
         param._set_grad_type(grad_var.type())
Exemplo n.º 5
0
    def test_resnet_sort_gradient_float32(self):
        seed = 90

        batch_size = train_parameters["batch_size"]
        batch_num = 10
        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True
            resnet = ResNet()
            optimizer = optimizer_setting(
                train_parameters, parameter_list=resnet.parameters())
            np.random.seed(seed)
            import random
            random.seed = seed
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size)

            dy_param_init_value = {}
            for param in resnet.parameters():
                dy_param_init_value[param.name] = param.numpy()

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= batch_num:
                    break

                dy_x_data = np.array(
                    [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
                    batch_size, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                out = resnet(img)
                loss = fluid.layers.cross_entropy(input=out, label=label)
                avg_loss = fluid.layers.mean(x=loss)

                dy_out = avg_loss.numpy()

                if batch_id == 0:
                    for param in resnet.parameters():
                        if param.name not in dy_param_init_value:
                            dy_param_init_value[param.name] = param.numpy()

                avg_loss.backward(backward_strategy)

                dy_grad_value = {}
                for param in resnet.parameters():
                    if param.trainable:
                        np_array = np.array(param._grad_ivar().value()
                                            .get_tensor())
                        dy_grad_value[param.name + core.grad_var_suffix(
                        )] = np_array

                optimizer.minimize(avg_loss)
                resnet.clear_gradients()

                dy_param_value = {}
                for param in resnet.parameters():
                    dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters)

            np.random.seed(seed)
            import random
            random.seed = seed
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size)

            img = fluid.layers.data(
                name='pixel', shape=[3, 224, 224], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = resnet(img)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            avg_loss = fluid.layers.mean(x=loss)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            static_grad_name_list = []
            for param in resnet.parameters():
                static_param_name_list.append(param.name)
            for param in resnet.parameters():
                if param.trainable:
                    static_grad_name_list.append(param.name +
                                                 core.grad_var_suffix())

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= batch_num:
                    break

                static_x_data = np.array(
                    [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
                    [batch_size, 1])

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                fetch_list.extend(static_grad_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={"pixel": static_x_data,
                                    "label": y_data},
                              fetch_list=fetch_list)

                static_param_value = {}
                static_grad_value = {}
                static_out = out[0]
                param_start_pos = 1
                grad_start_pos = len(static_param_name_list) + param_start_pos
                for i in range(param_start_pos,
                               len(static_param_name_list) + param_start_pos):
                    static_param_value[static_param_name_list[
                        i - param_start_pos]] = out[i]
                for i in range(grad_start_pos,
                               len(static_grad_name_list) + grad_start_pos):
                    static_grad_value[static_grad_name_list[
                        i - grad_start_pos]] = out[i]

        self.assertTrue(np.allclose(static_out, dy_out))

        self.assertEqual(len(dy_param_init_value), len(static_param_init_value))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_grad_value), len(static_grad_value))
        for key, value in six.iteritems(static_grad_value):
            self.assertTrue(np.allclose(value, dy_grad_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_param_value), len(static_param_value))
        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))
Exemplo n.º 6
0
    def test_while_op(self):
        seed = 90
        epoch_num = 2
        if core.is_compiled_with_cuda():
            batch_num = 20
        else:
            print("in CPU")
            batch_num = 2
        np.random.seed = seed
        image_np = np.random.randn(Config.batch_size, Config.DATA_SHAPE[0],
                                   Config.DATA_SHAPE[1],
                                   Config.DATA_SHAPE[2]).astype('float32')
        label_in_np = np.arange(
            0, Config.max_length,
            dtype='int64').reshape([1, Config.max_length])
        for i in range(2, Config.batch_size + 1):
            label_in_np = np.vstack((label_in_np, np.arange(
                (i - 1) * Config.max_length,
                i * Config.max_length,
                dtype='int64').reshape([1, Config.max_length])))

        label_out_np = np.arange(
            0, Config.max_length,
            dtype='int64').reshape([1, Config.max_length])
        for i in range(2, Config.batch_size + 1):
            label_out_np = np.vstack((label_out_np, np.arange(
                (i - 1) * Config.max_length,
                i * Config.max_length,
                dtype='int64').reshape([1, Config.max_length])))

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True
            ocr_attention = OCRAttention("ocr_attention")

            if Config.learning_rate_decay == "piecewise_decay":
                learning_rate = fluid.layers.piecewise_decay(
                    [50000], [Config.LR, Config.LR * 0.01])
            else:
                learning_rate = Config.LR
            optimizer = fluid.optimizer.SGD(learning_rate=0.001)
            dy_param_init_value = {}
            for param in ocr_attention.parameters():
                dy_param_init_value[param.name] = param.numpy()
            for epoch in range(epoch_num):
                for batch_id in range(batch_num):
                    label_in = to_variable(label_in_np)
                    label_out = to_variable(label_out_np)
                    label_out._stop_gradient = True
                    label_out.trainable = False
                    img = to_variable(image_np)
                    dy_prediction = ocr_attention(img, label_in)
                    label_out = fluid.layers.reshape(
                        label_out, [-1, 1], inplace=False)
                    dy_prediction = fluid.layers.reshape(
                        dy_prediction, [label_out.shape[0], -1], inplace=False)
                    loss = fluid.layers.cross_entropy(
                        input=dy_prediction, label=label_out)
                    avg_loss = fluid.layers.reduce_sum(loss)

                    dy_out = avg_loss.numpy()

                    if epoch == 0 and batch_id == 0:
                        for param in ocr_attention.parameters():
                            if param.name not in dy_param_init_value:
                                dy_param_init_value[param.name] = param.numpy()
                    avg_loss.backward(backward_strategy)
                    dy_grad_value = {}
                    for param in ocr_attention.parameters():
                        if param.trainable:
                            np_array = np.array(param._ivar._grad_ivar().value()
                                                .get_tensor())
                            dy_grad_value[param.name + core.grad_var_suffix(
                            )] = np_array

                    optimizer.minimize(avg_loss)
                    ocr_attention.clear_gradients()
                    dy_param_value = {}
                    for param in ocr_attention.parameters():
                        dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            # print("static start")
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            ocr_attention = OCRAttention("ocr_attention")

            if Config.learning_rate_decay == "piecewise_decay":
                learning_rate = fluid.layers.piecewise_decay(
                    [50000], [Config.LR, Config.LR * 0.01])
            else:
                learning_rate = Config.LR

            optimizer = fluid.optimizer.SGD(learning_rate=0.001)

            images = fluid.layers.data(
                name='pixel', shape=Config.DATA_SHAPE, dtype='float32')
            static_label_in = fluid.layers.data(
                name='label_in', shape=[1], dtype='int64', lod_level=0)
            static_label_out = fluid.layers.data(
                name='label_out', shape=[1], dtype='int64', lod_level=0)
            static_label_out._stop_gradient = True
            static_label_out.trainable = False

            static_prediction = ocr_attention(images, static_label_in)

            static_prediction = fluid.layers.reshape(
                static_prediction, shape=[-1, Config.num_classes + 2])

            cost = fluid.layers.cross_entropy(
                input=static_prediction, label=static_label_out)
            static_avg_loss = fluid.layers.reduce_sum(cost)
            # param_grad_list = fluid.backward.append_backward(static_avg_loss)
            optimizer.minimize(static_avg_loss)

            static_param_init_value = {}
            static_param_name_list = []
            static_grad_name_list = []
            for param in ocr_attention.parameters():
                static_param_name_list.append(param.name)
                if param.trainable:
                    static_grad_name_list.append(param.name +
                                                 core.grad_var_suffix())

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            fetch_list = [static_avg_loss.name]
            fetch_list.extend(static_param_name_list)
            fetch_list.extend(static_grad_name_list)
            for epoch in range(epoch_num):
                for batch_id in range(batch_num):
                    static_label_in = label_in_np
                    static_label_out = label_out_np
                    static_label_out = static_label_out.reshape((-1, 1))
                    out = exe.run(fluid.default_main_program(),
                                  feed={
                                      "pixel": image_np,
                                      "label_in": static_label_in,
                                      "label_out": static_label_out
                                  },
                                  fetch_list=fetch_list)
                    static_param_value = {}
                    static_grad_value = {}
                    static_out = out[0]
                    # static_test_grad = out[1]
                    for i in range(1, len(static_param_name_list) + 1):
                        static_param_value[static_param_name_list[i - 1]] = out[
                            i]
                    grad_start_pos = len(static_param_name_list) + 1
                    for i in range(grad_start_pos,
                                   len(static_grad_name_list) + grad_start_pos):
                        static_grad_value[static_grad_name_list[
                            i - grad_start_pos]] = out[i]

        self.assertTrue(np.array_equal(static_out, dy_out))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.array_equal(value, dy_param_init_value[key]))

        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-20))
Exemplo n.º 7
0
    def test_se_resnext_float32(self):
        seed = 90

        batch_size = train_parameters["batch_size"]
        batch_num = 1
        epoch_num = 1
        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            se_resnext = SeResNeXt("se_resnext")
            optimizer = optimizer_setting(train_parameters)
            np.random.seed(seed)
            import random
            random.seed = seed

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.flowers.train(use_xmap=False)),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            dy_param_init_value = {}
            for param in se_resnext.parameters():
                dy_param_init_value[param.name] = param.numpy()
            for epoch_id in range(epoch_num):
                for batch_id, data in enumerate(batch_py_reader()):

                    if batch_id >= batch_num and batch_num != -1:
                        break

                    img = data[0]
                    label = data[1]
                    label.stop_gradient = True
                    label.stop_gradient = True

                    out = se_resnext(img)
                    loss = fluid.layers.cross_entropy(input=out, label=label)
                    avg_loss = fluid.layers.mean(x=loss)

                    dy_out = avg_loss.numpy()

                    if batch_id == 0:
                        for param in se_resnext.parameters():
                            if param.name not in dy_param_init_value:
                                dy_param_init_value[param.name] = param.numpy()
                    avg_loss.backward()

                    #dy_grad_value = {}
                    #for param in se_resnext.parameters():
                    #    if param.trainable:
                    #        np_array = np.array(param._ivar._grad_ivar().value()
                    #                            .get_tensor())
                    #        dy_grad_value[param.name + core.grad_var_suffix()] = np_array

                    optimizer.minimize(avg_loss)
                    se_resnext.clear_gradients()

                    dy_param_value = {}
                    for param in se_resnext.parameters():
                        dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            se_resnext = SeResNeXt("se_resnext")
            optimizer = optimizer_setting(train_parameters)

            np.random.seed(seed)
            import random
            random.seed = seed
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size,
                drop_last=True)

            img = fluid.layers.data(name='pixel',
                                    shape=[3, 224, 224],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = se_resnext(img)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            avg_loss = fluid.layers.mean(x=loss)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            static_grad_name_list = []
            for param in se_resnext.parameters():
                static_param_name_list.append(param.name)
            for param in se_resnext.parameters():
                if param.trainable:
                    static_grad_name_list.append(param.name +
                                                 core.grad_var_suffix())

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]
            for epoch_id in range(epoch_num):
                for batch_id, data in enumerate(train_reader()):
                    if batch_id >= batch_num and batch_num != -1:
                        break

                    static_x_data = np.array([
                        x[0].reshape(3, 224, 224) for x in data
                    ]).astype('float32')
                    y_data = np.array([x[1]
                                       for x in data]).astype('int64').reshape(
                                           [batch_size, 1])

                    fetch_list = [avg_loss.name]
                    fetch_list.extend(static_param_name_list)
                    fetch_list.extend(static_grad_name_list)
                    out = exe.run(fluid.default_main_program(),
                                  feed={
                                      "pixel": static_x_data,
                                      "label": y_data
                                  },
                                  fetch_list=fetch_list)

                    static_param_value = {}
                    static_grad_value = {}
                    static_out = out[0]
                    param_start_pos = 1
                    grad_start_pos = len(
                        static_param_name_list) + param_start_pos
                    for i in range(
                            param_start_pos,
                            len(static_param_name_list) + param_start_pos):
                        static_param_value[static_param_name_list[
                            i - param_start_pos]] = out[i]
                    for i in range(grad_start_pos,
                                   len(static_grad_name_list) +
                                   grad_start_pos):
                        static_grad_value[static_grad_name_list[
                            i - grad_start_pos]] = out[i]
        self.assertTrue(np.allclose(static_out, dy_out))

        self.assertEqual(len(dy_param_init_value),
                         len(static_param_init_value))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))
        # FIXME(Yancey1989): np.array(_ivar.value().get_tensor()) leads to memory lake
        #self.assertEqual(len(dy_grad_value), len(static_grad_value))
        #for key, value in six.iteritems(static_grad_value):
        #    self.assertTrue(np.allclose(value, dy_grad_value[key]))
        #    self.assertTrue(np.isfinite(value.all()))
        #    self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_param_value), len(static_param_value))
        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))
Exemplo n.º 8
0
 def get_param_grad_names(self):
     grad_names = []
     for var_name in self.inputs['Params']:
         grad_names.append(var_name + core.grad_var_suffix())
     return grad_names
    def test_resnet_float32(self):
        seed = 90

        batch_size = train_parameters["batch_size"]
        batch_num = 10

        traced_layer = None

        with fluid.dygraph.guard():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters,
                                          parameter_list=resnet.parameters())
            np.random.seed(seed)

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.flowers.train(use_xmap=False)),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            dy_param_init_value = {}
            for param in resnet.parameters():
                dy_param_init_value[param.name] = param.numpy()

            helper = DyGraphProgramDescTracerTestHelper(self)
            program = None

            for batch_id, data in enumerate(batch_py_reader()):
                if batch_id >= batch_num:
                    break

                img = data[0]
                label = data[1]
                label.stop_gradient = True

                out = None
                if batch_id % 5 == 0:
                    out, traced_layer = TracedLayer.trace(resnet, img)
                    if program is not None:
                        self.assertTrue(
                            is_equal_program(program, traced_layer.program))

                    traced_layer.save_inference_model(
                        './infer_imperative_resnet')

                    program = traced_layer.program
                else:
                    out = resnet(img)

                if traced_layer is not None:
                    resnet.eval()
                    traced_layer._switch(is_test=True)
                    out_dygraph = resnet(img)
                    out_static = traced_layer([img])
                    traced_layer._switch(is_test=False)
                    helper.assertEachVar(out_dygraph, out_static)
                    resnet.train()

                loss = fluid.layers.cross_entropy(input=out, label=label)
                avg_loss = fluid.layers.mean(x=loss)

                dy_out = avg_loss.numpy()

                if batch_id == 0:
                    for param in resnet.parameters():
                        if param.name not in dy_param_init_value:
                            dy_param_init_value[param.name] = param.numpy()

                avg_loss.backward()

                dy_grad_value = {}
                for param in resnet.parameters():
                    if param.trainable:
                        np_array = np.array(
                            param._grad_ivar().value().get_tensor())
                        dy_grad_value[param.name +
                                      core.grad_var_suffix()] = np_array

                optimizer.minimize(avg_loss)
                resnet.clear_gradients()

                dy_param_value = {}
                for param in resnet.parameters():
                    dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters)

            np.random.seed(seed)
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size)

            img = fluid.layers.data(name='pixel',
                                    shape=[3, 224, 224],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = resnet(img)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            avg_loss = fluid.layers.mean(x=loss)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            static_grad_name_list = []
            for param in resnet.parameters():
                static_param_name_list.append(param.name)
            for param in resnet.parameters():
                if param.trainable:
                    static_grad_name_list.append(param.name +
                                                 core.grad_var_suffix())

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= batch_num:
                    break

                static_x_data = np.array([
                    x[0].reshape(3, 224, 224) for x in data
                ]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([batch_size, 1])

                if traced_layer is not None:
                    traced_layer([static_x_data])

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                fetch_list.extend(static_grad_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
                              fetch_list=fetch_list)

                static_param_value = {}
                static_grad_value = {}
                static_out = out[0]
                param_start_pos = 1
                grad_start_pos = len(static_param_name_list) + param_start_pos
                for i in range(param_start_pos,
                               len(static_param_name_list) + param_start_pos):
                    static_param_value[static_param_name_list[
                        i - param_start_pos]] = out[i]
                for i in range(grad_start_pos,
                               len(static_grad_name_list) + grad_start_pos):
                    static_grad_value[static_grad_name_list[
                        i - grad_start_pos]] = out[i]

        print("static", static_out)
        print("dygraph", dy_out)
        self.assertTrue(np.allclose(static_out, dy_out))

        self.assertEqual(len(dy_param_init_value),
                         len(static_param_init_value))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_grad_value), len(static_grad_value))
        for key, value in six.iteritems(static_grad_value):
            self.assertTrue(np.allclose(value, dy_grad_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_param_value), len(static_param_value))
        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))
Exemplo n.º 10
0
        def run_dygraph():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            se_resnext = SeResNeXt()
            optimizer = optimizer_setting(
                train_parameters, parameter_list=se_resnext.parameters())
            np.random.seed(seed)

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.flowers.train(use_xmap=False)),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            dy_param_init_value = {}
            for param in se_resnext.parameters():
                dy_param_init_value[param.name] = param.numpy()
            for epoch_id in range(epoch_num):
                for batch_id, data in enumerate(batch_py_reader()):

                    if batch_id >= batch_num and batch_num != -1:
                        break

                    img = data[0]
                    label = data[1]
                    label.stop_gradient = True
                    label.stop_gradient = True

                    out = se_resnext(img)
                    softmax_out = fluid.layers.softmax(out, use_cudnn=False)
                    loss = fluid.layers.cross_entropy(input=softmax_out,
                                                      label=label)
                    avg_loss = fluid.layers.mean(x=loss)

                    dy_out = avg_loss.numpy()

                    if batch_id == 0:
                        for param in se_resnext.parameters():
                            if param.name not in dy_param_init_value:
                                dy_param_init_value[param.name] = param.numpy()
                    avg_loss.backward()

                    dy_grad_value = {}
                    for param in se_resnext.parameters():
                        if param.trainable:
                            np_array = np.array(
                                param._grad_ivar().value().get_tensor())
                            dy_grad_value[param.name +
                                          core.grad_var_suffix()] = np_array

                    optimizer.minimize(avg_loss)
                    se_resnext.clear_gradients()

                    dy_param_value = {}
                    for param in se_resnext.parameters():
                        dy_param_value[param.name] = param.numpy()

                    return dy_out, dy_param_init_value, dy_param_value, dy_grad_value
Exemplo n.º 11
0
    def train_loop():
        keys = ['loss', 'loss_cls', 'loss_bbox']
        train_stats = TrainingStats(cfg.log_window, keys)

        retinanet.train()
        for iter_id, data in enumerate(train_reader()):
            start_time = time.time()

            gt_max_num = 0
            batch_size = len(data)
            x = data[0]
            for x in data:
                #print(x[1].shape[0])
                if x[1].shape[0] > gt_max_num:
                    gt_max_num = x[1].shape[0]
            image_data = np.array(
                [x[0] for x in data]).astype('float32')
            if cfg.enable_ce:
                print('image: {} {}'.format(abs(image_data).sum(), image_data.shape))
            gt_box_data = np.zeros([batch_size, gt_max_num, 4])
            gt_label_data = np.zeros([batch_size, gt_max_num])
            is_crowd_data = np.ones([batch_size, gt_max_num])
            for batch_id, x in enumerate(data):
                gt_num = x[1].shape[0]
                gt_box_data[batch_id, 0:gt_num, :] = x[1]
                gt_label_data[batch_id, 0:gt_num] = x[2]
                is_crowd_data[batch_id, 0:gt_num] = x[3]
            gt_box_data = gt_box_data.astype('float32')
            gt_label_data = gt_label_data.astype('int32')
            is_crowd_data = is_crowd_data.astype('int32')
            im_info_data = np.array(
                [x[4] for x in data]).astype('float32')
            im_id_data = np.array(
                [x[5] for x in data]).astype('int32')
            outputs= retinanet('train', image_data, im_info_data, \
                gt_box_data, gt_label_data, is_crowd_data)
            loss_cls = outputs['loss_cls']
            loss_bbox = outputs['loss_bbox']
            loss = outputs['loss']
            score_pred = outputs['score_pred']
            loc_pred = outputs['loc_pred']
            cls_pred_list = outputs['cls_score_list']
            bbox_pred_list = outputs['bbox_pred_list']
            cls_score = outputs['cls_score']
            bbox_pred = outputs['bbox_pred']
            loss_cls_data = loss_cls.numpy()
            loss_bbox_data = loss_bbox.numpy()
            loss_data = loss.numpy()
            
            if cfg.use_data_parallel:
                loss = retinanet.scale_loss(loss)
                loss.backward()
                retinanet.apply_collective_grads()
            else:
                loss.backward()
            optimizer.minimize(loss)
            if cfg.enable_ce:
                print('score_pred grad: {} {}'.format(abs(score_pred.gradient()).sum(), score_pred.gradient().shape))
                print('loc_pred grad: {} {}'.format(abs(loc_pred.gradient()).sum(), loc_pred.gradient().shape))
                for var in cls_pred_list:
                    print('cls grad reshape: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape))
                for var in bbox_pred_list:
                    print('bbox grad reshape: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape))
                for var in cls_score:
                    print('cls grad original: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape))
                for var in bbox_pred:
                    print('bbox grad original: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape))
                dy_grad_value = {}
                for param in retinanet.parameters():
                    if param.name == 'retnet_cls_conv_n3_fpn3/Conv2D_0.retnet_cls_conv_n3_fpn3_w' or \
                        param.name == 'retnet_cls_conv_n2_fpn3/Conv2D_0.retnet_cls_conv_n2_fpn3_w' or \
                        param.name == 'retnet_cls_conv_n1_fpn3/Conv2D_0.retnet_cls_conv_n1_fpn3_w' or \
                        param.name == 'retnet_cls_conv_n0_fpn3/Conv2D_0.retnet_cls_conv_n0_fpn3_w' or \
                        param.name == 'retnet_cls_pred_fpn3/Conv2D_0.retnet_cls_pred_fpn3_w' or \
                        param.name == 'conv1/Conv2D_0.conv1_weights':
                        np_array = np.array(param._ivar._grad_ivar().value()
                                        .get_tensor())
                        dy_grad_value[param.name + core.grad_var_suffix(
                        )] = [abs(np_array).sum(), np_array.shape]
                        np_array = np.array(param._ivar.value().get_tensor())
                        dy_grad_value[param.name] = [abs(np_array).sum(), np_array.shape]
                for key, value in dy_grad_value.items():
                    print('{key}: {value}'.format(key = key, value = value))
            
            retinanet.clear_gradients()

            outs = [loss_data, loss_cls_data, loss_bbox_data]
            stats = {k: v.mean() for k, v in zip(keys, outs)}
            train_stats.update(stats)
            logs = train_stats.log()
            lr = optimizer._global_learning_rate().numpy()
            end_time = time.time()
            strs = '{}, iter: {}, lr: {} {}, time: {:.3f}'.format(
                now_time(), iter_id, lr,
                logs, end_time - start_time)
            print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model(retinanet.state_dict(), "model_iter{}".format(iter_id), optimizer)
            if (iter_id + 1) == cfg.max_iter:
                break
Exemplo n.º 12
0
        label.stop_gradient = True

        out = resnet(img)

        loss = fluid.layers.cross_entropy(input=out, label=label)
        avg_loss = fluid.layers.mean(x=loss)

        dy_out = avg_loss.numpy()

        if batch_id == 0:
            for param in resnet.parameters():
                if param.name not in dy_param_init_value:
                    dy_param_init_value[param.name] = param.numpy()

        avg_loss.backward()

        dy_grad_value = {}
        for param in resnet.parameters():
            if param.trainable:
                np_array = np.array(param._grad_ivar().value().get_tensor())
                dy_grad_value[param.name + core.grad_var_suffix()] = np_array

        optimizer.minimize(avg_loss)
        resnet.clear_gradients()

        dy_param_value = {}
        for param in resnet.parameters():
            dy_param_value[param.name] = param.numpy()

print("dygraph", dy_out)
Exemplo n.º 13
0
        def __impl__(self, *input):
            # 1. prepare inputs, outputs, attrs
            input_vars = []
            for i, value in enumerate(input):
                if not isinstance(value, (np.ndarray, core.VarBase)):
                    raise TypeError(
                        "The type of input in TranslatedLayer must be numpy array or Variable(VarBase), but received %s."
                        % type(value))
                # NOTE: In order to unify the API, firstly convert the input to VarBase
                if isinstance(value, np.ndarray):
                    var = core.VarBase(
                        value=value,
                        name=program_holder.input_names[i],
                        persistable=False,
                        place=framework._current_expected_place(),
                        zero_copy=True)
                else:
                    var = value
                    # NOTE: we changed var name here,
                    # but it may be an important name set by user
                    var.name = program_holder.input_names[i]
                input_vars.append(var)

            persistable_vars = []
            for var_name in program_holder.persistable_names:
                dy_var_name = self._persistable_var_name_dict[var_name]
                if dy_var_name in self._parameters:
                    persistable_vars.append(self._parameters[dy_var_name])
                elif dy_var_name in self._buffers:
                    persistable_vars.append(self._buffers[dy_var_name])
                else:
                    raise ValueError(
                        "The persistable variable %s is not exists in current TranslatedLayer."
                        % var_name)

            output_vars = []
            for var_desc in program_holder.output_decs:
                var = core.VarBase(var_desc.dtype(), var_desc.shape(),
                                   var_desc.name(), var_desc.type(), False)
                output_vars.append(var)

            # hold forward variables
            tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
                                         "program_out_scope",
                                         core.VarDesc.VarType.STEP_SCOPES,
                                         True)
            tmp_scope_vec.value().set_scope(program_holder.scope)

            # 2. run program by op
            trace_program = program_holder.infer_program if self._is_test else program_holder.train_program
            end_op_index = program_holder.infer_program.block(0).op_size()
            framework._dygraph_tracer().trace_op(type='run_program',
                                                 inputs={
                                                     'X': input_vars,
                                                     'Params': persistable_vars
                                                 },
                                                 outputs={
                                                     'Out': output_vars,
                                                     'OutScope': tmp_scope_vec
                                                 },
                                                 attrs={
                                                     'global_block':
                                                     trace_program.block(0),
                                                     'start_op_index':
                                                     0,
                                                     'end_op_index':
                                                     end_op_index,
                                                     'is_test':
                                                     self._is_test
                                                 })

            # NOTE: [ why need set param's gradient type here ]
            # if user set sparse gradient mode, the param's gradient
            # will be SelectedRows, not LoDTensor. But tracer will just
            # set param grad VarBase by forward VarBase(LoDTensor)
            # If we don't change grad_var type here, RunProgramOp need
            # transform SelectedRows to LoDTensor forcibly, it may not
            # be user wanted result.
            for persistable_var in persistable_vars:
                grad_var_name = var.name + core.grad_var_suffix()
                grad_var = trace_program.block(0).find_var(
                    cpt.to_bytes(grad_var_name))
                # NOTE: cannot find var desc maybe not problem,
                # such as in batch_norm
                if grad_var is None:
                    continue
                persistable_var._set_grad_type(grad_var.type())

            # 3. prepare output, keep same form with inputs
            outs = output_vars
            if len(output_vars) == 1:
                outs = output_vars[0]
            return outs