Пример #1
0
    def test_deep_copy(self):
        with fluid.dygraph.guard():
            empty_var = core.VarBase()
            empty_var_copy = copy.deepcopy(empty_var)
            self.assertEqual(empty_var.stop_gradient,
                             empty_var_copy.stop_gradient)
            self.assertEqual(empty_var.persistable, empty_var_copy.persistable)
            self.assertEqual(empty_var.type, empty_var_copy.type)
            self.assertEqual(empty_var.dtype, empty_var_copy.dtype)

            x = paddle.to_tensor([2.], stop_gradient=False)
            y = paddle.to_tensor([3.], stop_gradient=False)
            z = x * y
            memo = {}
            x_copy = copy.deepcopy(x, memo)
            y_copy = copy.deepcopy(y, memo)

            self.assertEqual(x_copy.stop_gradient, y_copy.stop_gradient)
            self.assertEqual(x_copy.persistable, y_copy.persistable)
            self.assertEqual(x_copy.type, y_copy.type)
            self.assertEqual(x_copy.dtype, y_copy.dtype)
            self.assertTrue(np.array_equal(x.numpy(), x_copy.numpy()))
            self.assertTrue(np.array_equal(y.numpy(), y_copy.numpy()))

            self.assertNotEqual(id(x), id(x_copy))
            x_copy[:] = 5.
            self.assertTrue(np.array_equal(x_copy.numpy(), [5.]))
            self.assertTrue(np.array_equal(x.numpy(), [2.]))

            with self.assertRaises(RuntimeError):
                copy.deepcopy(z)

            x_copy2 = copy.deepcopy(x, memo)
            y_copy2 = copy.deepcopy(y, memo)
            self.assertEqual(id(x_copy), id(x_copy2))
            self.assertEqual(id(y_copy), id(y_copy2))

            # test copy selected rows
            x = core.VarBase(core.VarDesc.VarType.FP32, [3, 100],
                             "selected_rows",
                             core.VarDesc.VarType.SELECTED_ROWS, True)
            selected_rows = x.value().get_selected_rows()
            selected_rows.get_tensor().set(np.random.rand(3, 100),
                                           core.CPUPlace())
            selected_rows.set_height(10)
            selected_rows.set_rows([3, 5, 7])
            x_copy = copy.deepcopy(x)

            self.assertEqual(x_copy.stop_gradient, x.stop_gradient)
            self.assertEqual(x_copy.persistable, x.persistable)
            self.assertEqual(x_copy.type, x.type)
            self.assertEqual(x_copy.dtype, x.dtype)

            copy_selected_rows = x_copy.value().get_selected_rows()
            self.assertEqual(copy_selected_rows.height(),
                             selected_rows.height())
            self.assertEqual(copy_selected_rows.rows(), selected_rows.rows())
            self.assertTrue(
                np.array_equal(np.array(copy_selected_rows.get_tensor()),
                               np.array(selected_rows.get_tensor())))
Пример #2
0
    def _add_grad_as_view(self, param, align):
        assert np.prod(
            self.buffer.shape
        ) > 0, "Cannot add a gradient to a released InternalStorage, please rebuild"
        assert param.dtype == self.buffer.dtype

        grad_end = self._fill + np.prod(param.shape)
        offset = grad_end + align
        assert offset <= np.prod(self.buffer.shape)

        # Copy the current grad value to InternalStorage
        dev_id = 0 if paddle.get_device() == "cpu" else int(
            paddle.get_device().split(":")[1])
        if self._device == "cpu":
            with device_guard(dev_id, self._device):
                tmp_var = core.VarBase(self.buffer._slice(
                    self._fill, grad_end))
                param._copy_gradient_from(tmp_var)
                tmp_var.value().get_tensor()._clear()

        elif self._device == "gpu":
            tmp_var = core.VarBase(self.buffer._slice(self._fill, grad_end))
            param._copy_gradient_from(tmp_var)
            tmp_var.value().get_tensor()._clear()

        self._fill = offset
Пример #3
0
    def _param_storage(self, param, buffer_size):
        assert isinstance(buffer_size, int)
        value = np.zeros(
            buffer_size,
            dtype=np.float16) if Type.fp16.value == param.dtype else np.zeros(
                buffer_size, dtype=np.float32)
        buffer = core.VarBase(value=value, place=core.CPUPlace())

        param_shape = param.shape
        origin_state = param.stop_gradient
        param.stop_gradient = True
        param.flatten_()
        param.stop_gradient = origin_state
        start, end = self._param2buffer[param.name][self._rank]

        # Copy the current param value
        tmp_var = core.VarBase(tensor=buffer._slice(0, param._numel()),
                               place=core.CPUPlace())
        param_cpu = param.cpu()
        tmp_var.value().get_tensor().set(param_cpu.value().get_tensor(),
                                         core.CPUPlace())
        param.value().get_tensor()._set_dims(param_shape)
        param._clear()

        # Current rank param_storage
        param.fw_storage = core.VarBase(buffer._slice(start, end),
                                        "slice@" + param.name)
        param.status = "part"

        # Updata optimizer master weights
        if param.dtype == Type.fp16.value:
            self._optim._master_weights[param.fw_storage.name] = paddle.cast(
                param.fw_storage, Type.fp32.value)
Пример #4
0
        def allreduce_(*_):
            if param.name in self._task_flow.full_grad.keys():
                full_grad = self._task_flow.full_grad[param.name]
                # Only support sync allreduce current rank's layer now
                dist.all_reduce(tensor=full_grad,
                                group=self._group,
                                use_calc_stream=True)
                dist.wait(tensor=full_grad,
                          group=self._group,
                          use_calc_stream=True)

                start, end = self._param2buffer[param.name][self._rank]
                if param.bw_storage is None:
                    param.bw_storage = core.VarBase(
                        full_grad._slice(start, end)).detach().clone()
                    if self._offload:
                        param.bw_storage = _device2cpu(param.bw_storage, True)
                else:
                    if self._offload:
                        cpu_grad = _device2cpu(
                            core.VarBase(full_grad._slice(
                                start, end)).detach().clone(), True)
                        with device_guard(device="cpu"):
                            param.bw_storage = paddle.add(
                                param.bw_storage, cpu_grad)
                    else:
                        # param.bw_storage.add_(
                        #     core.VarBase(full_grad._slice(start, end))
                        #     .detach().clone())
                        param.bw_storage = paddle.add(
                            param.bw_storage,
                            core.VarBase(full_grad._slice(
                                start, end)).detach().clone())
                param.clear_gradient(False)
                param._gradient_set_empty(False)
                tmp_var = self._task_flow.full_grad.pop(param.name)
                tmp_var._clear()

            if param.name in self._task_flow.full_param.keys():
                if param.status == "all":
                    param.use_count = 0
                    param._clear()
                    start, end = self._param2buffer[param.name][self._rank]
                    param.fw_storage = core.VarBase(
                        self._task_flow.full_param[param.name]._slice(
                            start, end),
                        param.name + "@slice").detach().clone()
                    param.status = "part"
                    tmp_var = self._task_flow.full_param.pop(param.name)
                    tmp_var._clear()

                    if self._offload:
                        param.fw_storage._clear()
                        param.master_weight._share_buffer_to(param.fw_storage)
Пример #5
0
 def create_var_base(is_input, name, np_value, stop_gradient):
     var = core.VarBase(value=np_value,
                        name=name,
                        place=place,
                        zero_copy=True)
     var.stop_gradient = stop_gradient
     return var
Пример #6
0
    def _add_param_as_view(self, param, align, convert_gpu=True):

        assert (
            param.dtype == self.buffer.dtype
        ), "Different types for the InternalStorage and the param, cannot proceed: {} - {}".format(
            param.dtype, self.buffer.dtype)

        var_end = self._fill + np.prod(param.shape)
        offset = var_end + align
        assert offset <= np.prod(self.buffer.shape)

        p_shape = param.shape

        origin_state = param.stop_gradient
        param.stop_gradient = True
        param.flatten_()
        param.stop_gradient = origin_state

        # Copy the current param value
        dev_id = 0 if paddle.get_device() == "cpu" else int(
            paddle.get_device().split(":")[1])
        with device_guard(dev_id, "cpu"):
            tmp_var = core.VarBase(
                tensor=self.buffer._slice(self._fill, var_end))
            if convert_gpu:
                param_cpu = param.cpu()
                param.value().get_tensor()._clear()
                tmp_var.set_value(param_cpu)
            else:
                tmp_var.set_value(param)

        self._fill = offset
        return p_shape
Пример #7
0
    def get_all_parameters(self):
        assert len(self._trainable_params.keys()) > 0
        current_layer_params = self._layer.parameters(include_sublayers=True)
        trainable_params = list(
            filter(lambda x: x.trainable, current_layer_params))
        for param in trainable_params:
            if param.use_count > 0:
                continue
            assert hasattr(param, "fw_storage"
                           ), "Find {} don't have fw_storage attribute".format(
                               param.name)

            full_param = _all_gather(param.fw_storage,
                                     self._group,
                                     use_calc_stream=True)
            dist.wait(tensor=full_param,
                      group=self._group,
                      use_calc_stream=True)
            core.VarBase(full_param._slice(
                0, param._numel()))._share_buffer_to(param)
            param.value().get_tensor()._set_dims(param.shape)
            param.fw_storage._clear()
            param.fw_storage = None
            param.status = "all"
            param.use_count += 1

        self._optim._parameter_list = self._ori_parameter_list
        self._optim._param_groups = self._ori_param_groups
Пример #8
0
def _wait_layer(trainable_params, layer_id, task_flow, group, use_calc_stream):
    for param in trainable_params[layer_id]:
        if param.status == "all":
            param.use_count += 1
            continue
        if param.name in task_flow.full_param.keys():
            full_param = task_flow.full_param[param.name]
            with paddle.amp.auto_cast(enable=False):
                paddle.device.cuda.synchronize()
            core.VarBase(full_param._slice(
                0, param._numel()))._share_buffer_to(param)
            param.value().get_tensor()._set_dims(param.shape)
            param.fw_storage._clear()
            param.fw_storage = None
            param.status = "all"
            param.use_count += 1
        else:
            _allgather_buffer(layer_id,
                              trainable_params,
                              group,
                              use_calc_stream,
                              task_flow,
                              sync_wait=True)
            break
    return task_flow
Пример #9
0
 def create_out(var_id):
     var = self._outputs[var_id]
     assert isinstance(var, framework.Variable)
     var_desc = var.desc
     var_base = core.VarBase(var_desc.dtype(), var_desc.shape(),
                             var_desc.name(), var_desc.type(), False)
     return var_base
Пример #10
0
def _allgather_buffer(layer_id,
                      trainable_params,
                      group,
                      use_calc_stream,
                      task_flow,
                      sync_wait=False):
    for param in trainable_params[layer_id]:
        if param.status == "all":
            param.use_count += 1
            continue
        with paddle.amp.auto_cast(enable=False):
            full_param = _all_gather(param.fw_storage,
                                     group,
                                     use_calc_stream=use_calc_stream)
        if sync_wait:
            with paddle.amp.auto_cast(enable=False):
                dist.wait(tensor=full_param,
                          group=group,
                          use_calc_stream=use_calc_stream)
            core.VarBase(full_param._slice(
                0, param._numel()))._share_buffer_to(param)
            param.value().get_tensor()._set_dims(param.shape)
            param.fw_storage._clear()
            param.fw_storage = None
            param.status = "all"
            param.use_count += 1
        task_flow.full_param[param.name] = full_param
    return task_flow
Пример #11
0
def _wait_layer(trainable_params,
                task_flow,
                group,
                use_calc_stream,
                offload=False):
    paddle.device.cuda.synchronize()
    for param in trainable_params:
        if param.status == "all":
            param.use_count += 1
            continue
        if param.name in task_flow.full_param.keys():
            full_param = task_flow.full_param[param.name]
            core.VarBase(full_param._slice(
                0, param._numel()))._share_buffer_to(param)
            param.fw_storage._clear()
            param.fw_storage = None
            param.status = "all"
            param.use_count += 1
        else:
            _allgather_buffer(trainable_params,
                              group,
                              use_calc_stream=True,
                              task_flow=task_flow,
                              sync_wait=True,
                              offload=offload)
            break
    return task_flow
Пример #12
0
    def _prepare(self, inputs):
        """
        Prepare inputs, outputs, attrs.
        """
        assert isinstance(inputs, (tuple, list))
        # Flatten inputs with nested structure into single list.
        flatten_inputs = flatten(inputs)
        # Convert variable into VarBase and feed in training data.
        input_vars = []
        expected_place = framework._current_expected_place()
        for i, value in enumerate(flatten_inputs):
            if isinstance(value, np.ndarray):
                var = None
                if not framework._in_eager_mode_:
                    var = core.VarBase(value=value,
                                       name=self._inputs[i].desc.name(),
                                       persistable=False,
                                       place=expected_place,
                                       zero_copy=True)
                else:
                    var = core.eager.Tensor(value=value,
                                            name=self._inputs[i].desc.name(),
                                            persistable=False,
                                            place=expected_place,
                                            zero_copy=True)
            elif isinstance(value, (core.VarBase, core.eager.Tensor)):
                # NOTE(Aurelius84): If var is on CPUPlace, it will be transformed multi times
                # into CUDAPlace when it's as input of multi Ops. so we move it in advance
                # to avoid this problem.
                if value.stop_gradient and not value.place._equals(
                        expected_place):
                    var = value._copy_to(expected_place, False)
                    var.stop_gradient = True
                else:
                    var = value
                var.name = self._inputs[i].desc.name()
            else:
                continue
            input_vars.append(var)

        def create_out(var_id):
            var = self._outputs[var_id]
            assert isinstance(var, framework.Variable)
            var_desc = var.desc
            varbase = None
            if not framework._in_eager_mode_:
                var_base = core.VarBase(var_desc.dtype(), var_desc.shape(),
                                        var_desc.name(), var_desc.type(),
                                        False)
            else:
                var_base = core.eager.Tensor(var_desc.dtype(),
                                             var_desc.shape(), var_desc.name(),
                                             var_desc.type(), False)
            return var_base

        # Create VarBase to receive output data.
        out_vars = list(map(create_out, self._outputs.var_ids))

        return input_vars, out_vars
Пример #13
0
def _create_fake_var():
    """
    Create a fake_var (force on CPU) to handle empty input or output
    """
    return [
        core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var",
                     core.VarDesc.VarType.RAW, False)
    ]
Пример #14
0
    def _create_scope_vec(self):
        # Hold forward variables
        tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
                                     "program_out_scope",
                                     core.VarDesc.VarType.STEP_SCOPES, True)

        inner_scope = core.Scope()
        tmp_scope_vec.value().set_scope(inner_scope)
        return tmp_scope_vec
Пример #15
0
    def _param_storage(self, param, buffer_size):
        """
        This is a function to simplify the handling of parameter InternalStorages.
        """
        assert isinstance(buffer_size, int)
        value = np.zeros(
            buffer_size,
            dtype=np.float16) if Type.fp16.value == param.dtype else np.zeros(
                buffer_size, dtype=np.float32)
        buffer = core.VarBase(value=value, place=core.CPUPlace())

        param_shape = param.shape
        origin_state = param.stop_gradient
        param.stop_gradient = True
        param.flatten_()
        param.stop_gradient = origin_state
        start, end = self._param2buffer[param.name][self._rank]

        # Copy the current param value
        tmp_var = core.VarBase(tensor=buffer._slice(0, param._numel()),
                               place=core.CPUPlace())
        param_cpu = param.cpu()
        tmp_var.value().get_tensor().set(param_cpu.value().get_tensor(),
                                         core.CPUPlace())
        param.value().get_tensor()._set_dims(param_shape)

        # Current rank param_storage
        if self._offload:
            param.fw_storage = core.VarBase(buffer._slice(start, end),
                                            core.CPUPlace(),
                                            "slice@" + param.name)
            with device_guard(device="cpu"):
                param.master_weight = paddle.cast(param.fw_storage,
                                                  Type.fp32.value)
        else:
            param.fw_storage = core.VarBase(buffer._slice(start, end),
                                            "slice@" + param.name)
        param.status = "part"

        # Updata optimizer master weights
        if param.dtype == Type.fp16.value and not self._offload:
            self._optim._master_weights[param.fw_storage.name] = paddle.cast(
                param.fw_storage, Type.fp32.value)
        param._clear()
Пример #16
0
 def test_input_cuda_pinned_var(self):
     with fluid.dygraph.guard():
         data = np.random.random((2, 80, 16128)).astype('float32')
         var = core.VarBase(value=data,
                            name='',
                            persistable=False,
                            place=fluid.CUDAPinnedPlace(),
                            zero_copy=False)
         sliced = var[:, 10:, :var.shape[1]]
         self.assertEqual(sliced.shape, [2, 70, 80])
Пример #17
0
 def _get_double_grads(self, program):
     double_grads = []
     for block in program.blocks:
         for name in block.vars:
             if "@GRAD" in name:
                 var_desc = block.vars[name].desc
                 var_base = core.VarBase(var_desc.dtype(), var_desc.shape(),
                                         var_desc.name(), var_desc.type(),
                                         False)
                 double_grads.append(var_base)
     return self._valid_vars(double_grads)
Пример #18
0
def _create_params_grad(layer, trainable_params, param2buffer_size, task_flow):
    for param in trainable_params[id(layer)]:
        if param.name in task_flow.full_grad.keys():
            continue
        assert isinstance(param2buffer_size[param.name], int)
        temp_grad = paddle.zeros([param2buffer_size[param.name]],
                                 dtype=param.dtype)
        param._copy_gradient_from(
            core.VarBase(temp_grad._slice(0, param._numel())))
        task_flow.full_grad[param.name] = temp_grad
    return task_flow
Пример #19
0
    def _prepare(self, inputs):
        """
        Prepare inputs, outputs, attrs.
        """
        assert isinstance(inputs, (tuple, list))
        # Flatten inputs with nested structure into single list.
        flatten_inputs = flatten(inputs)
        # Convert variable into VarBase and feed in training data.
        input_vars = []
        for i, value in enumerate(flatten_inputs):
            if isinstance(value, np.ndarray):
                var = core.VarBase(value=value,
                                   name=self._inputs[i].desc.name(),
                                   persistable=False,
                                   place=framework._current_expected_place(),
                                   zero_copy=True)
            elif isinstance(value, core.VarBase):
                var = value
                var.name = self._inputs[i].desc.name()
            else:
                continue
            input_vars.append(var)

        # Create VarBase to receive output data.
        out_vars = []
        for idx in self._outputs.var_ids:
            var = self._outputs[idx]
            assert isinstance(var, framework.Variable)
            var_desc = var.desc
            var_base = core.VarBase(var_desc.dtype(), var_desc.shape(),
                                    var_desc.name(), var_desc.type(), False)
            out_vars.append(var_base)

        # Hold forward variables
        tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
                                     "program_out_scope",
                                     core.VarDesc.VarType.STEP_SCOPES, True)

        tmp_scope_vec.value().set_scope(self._inner_scope)

        return input_vars, out_vars, tmp_scope_vec
Пример #20
0
    def test_type_core(self):
        paddle.disable_static()
        inx = np.array([1, 2])
        tensorx = core.VarBase(inx)
        typex_str = str(type(tensorx))
        expectx = "<class 'paddle.Tensor'>"
        self.assertEqual((typex_str == expectx), True)

        tensorx = paddle.framework.VarBase(inx)
        typex_str = str(type(tensorx))
        expectx = "<class 'paddle.Tensor'>"
        self.assertEqual((typex_str == expectx), True)
Пример #21
0
        def reduce(*_):
            if param.name in self._task_flow.full_grad.keys():
                full_grad = self._task_flow.full_grad[param.name]
                with paddle.amp.auto_cast(enable=False):
                    if not self._accumulate_grads:
                        full_grad.scale_(scale=self._world_size_scaling)
                    # Only support sync allreduce current rank's layer now
                    dist.all_reduce(tensor=full_grad,
                                    group=self._group,
                                    use_calc_stream=True)
                    dist.wait(tensor=full_grad,
                              group=self._group,
                              use_calc_stream=True)

                    start, end = self._param2buffer[param.name][self._rank]
                    if not self._accumulate_grads or param.bw_storage is None:
                        param.bw_storage = core.VarBase(
                            full_grad._slice(start, end)).detach().clone()
                    else:
                        param.bw_storage.add_(
                            core.VarBase(full_grad._slice(
                                start, end)).detach().clone())
                param.clear_gradient(False)
                param._gradient_set_empty(False)
                tmp_var = self._task_flow.full_grad.pop(param.name)
                tmp_var._clear()

            if param.name in self._task_flow.full_param.keys():
                if param.status == "all":
                    param.use_count = 0
                    param._clear()
                    start, end = self._param2buffer[param.name][self._rank]
                    with paddle.amp.auto_cast(enable=False):
                        param.fw_storage = core.VarBase(
                            self._task_flow.full_param[param.name]._slice(
                                start, end),
                            param.name + "@slice").detach().clone()
                    param.status = "part"
                    tmp_var = self._task_flow.full_param.pop(param.name)
                    tmp_var._clear()
Пример #22
0
def to_variable(value, name=None, zero_copy=None):
    """
    The API will create a ``Variable`` object from numpy\.ndarray or Variable object.

    Parameters:
        value(ndarray|Variable): The numpy\.ndarray or Variable object that needs to be converted, it can be multi-dimension, and the data type is one of numpy\.{float16, float32, float64, int16, int32, int64, uint8, uint16}.
        name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
        zero_copy(bool, optional): Whether to share memory with the input numpy array. This parameter only works with CPUPlace and will be set to True when it is None. Default: None.

    Returns:
        Variable: If ``value`` is a numpy\.ndarray object, return ``Tensor`` created from the specified numpy\.ndarray object, which has same data type and shape with ``value``. If ``value`` is a Variable object, just return ``value``.


    Examples:

     .. code-block:: python

        import numpy as np
        import paddle.fluid as fluid

        with fluid.dygraph.guard(fluid.CPUPlace()):
            x = np.ones([2, 2], np.float32)
            y = fluid.dygraph.to_variable(x, zero_copy=False)
            x[0][0] = -1
            y[0][0].numpy()  # array([1.], dtype=float32)
            y = fluid.dygraph.to_variable(x)
            x[0][0] = 0
            y[0][0].numpy()  # array([0.], dtype=float32)

    """
    if isinstance(value, np.ndarray):
        assert framework.in_dygraph_mode(
        ), "to_variable could only be called in dygraph mode"
        if isinstance(framework._current_expected_place(),
                      framework.core.CPUPlace):
            if zero_copy is None:
                zero_copy = True
        else:
            assert not zero_copy, "zero_copy mode can only be used with CPUPlace"
            zero_copy = False
        py_var = core.VarBase(value=value,
                              place=framework._current_expected_place(),
                              persistable=False,
                              zero_copy=zero_copy,
                              name=name if name else '')
        return py_var
    elif isinstance(value, (core.VarBase, framework.Variable)):
        return value
    else:
        raise TypeError(
            "to_variable only accepts 'ndarray' and 'Variable' as value's input"
        )
Пример #23
0
 def _create_scope_vec(self):
     # Hold forward variables
     tmp_scope_vec = None
     inner_scope = core.Scope()
     if not framework._in_eager_mode_:
         tmp_scope_vec = core.VarBase(core.VarDesc.VarType.FP32, [],
                                      "program_out_scope",
                                      core.VarDesc.VarType.STEP_SCOPES,
                                      True)
         tmp_scope_vec.value().set_scope(inner_scope)
     else:
         tmp_scope_vec = [inner_scope]
     return tmp_scope_vec
Пример #24
0
 def create_var_base(is_input, name, np_value, stop_gradient):
     if _in_eager_mode_:
         var = core.eager.Tensor(value=np_value,
                                 name=name,
                                 place=place,
                                 zero_copy=True)
     else:
         var = core.VarBase(value=np_value,
                            name=name,
                            place=place,
                            zero_copy=True)
     var.stop_gradient = stop_gradient
     return var
Пример #25
0
def _create_out(var):
    assert isinstance(var, Variable)
    var_desc = var.desc
    varbase = None
    if _in_legacy_dygraph():
        var_base = core.VarBase(var_desc.dtype(),
                                var_desc.shape(),
                                var_desc.name(), var_desc.type(), False)
    else:
        var_base = core.eager.Tensor(var_desc.dtype(),
                                     var_desc.shape(),
                                     var_desc.name(), var_desc.type(), False)
    return var_base
Пример #26
0
def valid_vars(vars):
    """
    Note: run_program_op.InferShape requires `X`/'Out' not be null.
    But it's common in dy2static, fake varBase is created to handle the
    problem.
    """
    if vars:
        return vars
    return [
        core.VarBase(value=[1],
                     name='Fake_var',
                     place=framework._current_expected_place())
    ]
Пример #27
0
def _create_fake_var():
    """
    Create a fake_var (force on CPU) to handle empty input or output
    """
    if not framework._in_eager_mode_:
        return [
            core.VarBase(core.VarDesc.VarType.FP32, [], "Fake_var",
                         core.VarDesc.VarType.RAW, False)
        ]
    else:
        return [
            core.eager.Tensor(core.VarDesc.VarType.FP32, [], "Fake_var",
                              core.VarDesc.VarType.RAW, False)
        ]
Пример #28
0
 def create_out(var_id):
     var = self._outputs[var_id]
     assert isinstance(var, framework.Variable)
     var_desc = var.desc
     varbase = None
     if not framework._in_eager_mode_:
         var_base = core.VarBase(var_desc.dtype(), var_desc.shape(),
                                 var_desc.name(), var_desc.type(),
                                 False)
     else:
         var_base = core.eager.Tensor(var_desc.dtype(),
                                      var_desc.shape(), var_desc.name(),
                                      var_desc.type(), False)
     return var_base
 def _generate_master_params(self, trainable_params):
     if self.offload:
         for param in trainable_params:
             if param.name not in self._master_params.keys():
                 self._master_params[param.name] = core.VarBase(
                     name=param.name,
                     value=param.cast(dtype=Type.fp32.value).numpy(),
                     place=core.CPUPlace(),
                     stop_gradient=param.stop_gradient)
     else:
         for param in trainable_params:
             if param.dtype == Type.fp16.value:
                 self._optim._master_weights[param.name] = paddle.cast(
                     param, Type.fp32.value)
Пример #30
0
def _release_param(layer, trainable_params, param2buffer, rank, task_flow):
    for param in trainable_params[id(layer)]:
        # async communicate share weight not clear
        param.use_count -= 1
        if param.use_count == 0:
            param._clear()
            if param.name in task_flow.full_param.keys():
                start, end = param2buffer[param.name][rank]
                with paddle.amp.auto_cast(enable=False):
                    param.fw_storage = core.VarBase(
                        task_flow.full_param[param.name]._slice(start, end),
                        param.name + "@slice").detach().clone()
                param.status = "part"
                tmp_var = task_flow.full_param.pop(param.name)
                tmp_var._clear()
    return