Exemplo n.º 1
0
 def SerializeMultiPassDesc(self):
     switch_static_mode = paddle.in_dynamic_mode()
     if switch_static_mode:
         paddle.enable_static()
     multi_pass_desc = pass_desc_pb2.MultiPassDesc()
     multi_pass_desc.pass_type = self._pass_type
     # Traverse all pass pairs and convert them to PassDesc data.
     # Here need to add cache in the future.
     for (pattern, replace) in self._pass_pairs:
         pass_desc = multi_pass_desc.pass_descs.add()
         # Convert ProgramDescs of pattern and replace subgraphs.
         pattern_vars, pattern_ops = self._func_to_program_desc(
             pattern, pass_desc.pattern)
         replace_vars, replace_ops = self._func_to_program_desc(
             replace, pass_desc.replace)
         self._convert_vars_to_pass_desc(pattern_vars, replace_vars,
                                         pass_desc)
         self._convert_ops_to_pass_desc(pattern_ops, replace_ops, pass_desc)
     if switch_static_mode:
         paddle.disable_static()
     return multi_pass_desc.SerializeToString()
Exemplo n.º 2
0
    def save_pretrained(self, save_dir):
        """
        Saves model configuration and related resources (model state) as files
        under `save_dir`. The model configuration would be saved into a file named
        "model_config.json", and model state would be saved into a file
        named "model_state.pdparams".

        The `save_dir` can be used in `from_pretrained` as argument value
        of `pretrained_model_name_or_path` to re-load the trained model.

        Args:
            save_dir (str): Directory to save files into.

        Example:
            .. code-block::

                from paddlenlp.transformers import BertForSequenceClassification

                model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
                model.save_pretrained('./trained_model/')
                # reload from save_directory
                model = BertForSequenceClassification.from_pretrained('./trained_model/')
        """
        assert not os.path.isfile(
            save_dir
        ), "Saving directory ({}) should be a directory, not a file".format(
            save_dir)
        os.makedirs(save_dir, exist_ok=True)
        # Save model config
        self.save_model_config(save_dir)
        # Save model
        if paddle.in_dynamic_mode():
            file_name = os.path.join(
                save_dir,
                list(self.resource_files_names.values())[0])
            paddle.save(self.state_dict(), file_name)
        else:
            logger.warning(
                "Save pretrained model only supported dygraph mode for now!")
Exemplo n.º 3
0
        def hook(layer, input, output):
            class_name = str(layer.__class__).split(".")[-1].split("'")[0]

            try:
                layer_idx = int(layer._full_name.split('_')[-1])
            except:
                layer_idx = len(summary)

            m_key = "%s-%i" % (class_name, layer_idx + 1)
            summary[m_key] = OrderedDict()
            summary[m_key]["input_shape"] = list(input[0].shape)
            summary[m_key]["input_shape"][0] = batch_size
            if isinstance(output, (list, tuple)):
                summary[m_key]["output_shape"] = [[-1] + list(o.shape)[1:]
                                                  for o in output]
            else:
                summary[m_key]["output_shape"] = list(output.shape)
                summary[m_key]["output_shape"][0] = batch_size

            params = 0

            if paddle.in_dynamic_mode():
                layer_state_dict = layer._parameters
            else:
                layer_state_dict = layer.state_dict()

            for k, v in layer_state_dict.items():
                params += np.prod(v.shape)

                try:
                    if (getattr(getattr(layer, k), 'trainable')) and (
                            not getattr(getattr(layer, k), 'stop_gradient')):
                        summary[m_key]["trainable"] = True
                    else:
                        summary[m_key]["trainable"] = False
                except:
                    summary[m_key]["trainable"] = True

            summary[m_key]["nb_params"] = params
Exemplo n.º 4
0
    def read_file_decode_jpeg(self):
        if not paddle.is_compiled_with_cuda():
            return

        img_bytes = read_file('fake.jpg')

        img = decode_jpeg(img_bytes, mode='gray')
        img = decode_jpeg(img_bytes, mode='rgb')

        img = decode_jpeg(img_bytes)

        img_cv2 = cv2.imread('fake.jpg')
        if paddle.in_dynamic_mode():
            np.testing.assert_equal(img.shape, img_cv2.transpose(2, 0, 1).shape)
        else:
            place = paddle.CUDAPlace(0)
            exe = paddle.static.Executor(place)
            exe.run(paddle.static.default_startup_program())
            out = exe.run(paddle.static.default_main_program(),
                          fetch_list=[img])

            np.testing.assert_equal(out[0].shape,
                                    img_cv2.transpose(2, 0, 1).shape)
Exemplo n.º 5
0
    def setUp(self):
        self.op_type = "lu_unpack"
        self.config()
        x = np.random.random(self.x_shape).astype(self.dtype)
        if paddle.in_dynamic_mode():
            xt = paddle.to_tensor(x)
            lu, pivots = paddle.linalg.lu(xt)
            lu = lu.numpy()
            pivots = pivots.numpy()
        else:
            with fluid.program_guard(fluid.Program(), fluid.Program()):
                place = fluid.CPUPlace()
                if core.is_compiled_with_cuda():
                    place = fluid.CUDAPlace(0)
                xv = paddle.fluid.data(name="input",
                                       shape=self.x_shape,
                                       dtype=self.dtype)
                lu, p = paddle.linalg.lu(xv)
                exe = fluid.Executor(place)
                fetches = exe.run(fluid.default_main_program(),
                                  feed={"input": x},
                                  fetch_list=[lu, p])
                lu, pivots = fetches[0], fetches[1]

        self.inputs = {'X': lu, 'Pivots': pivots}

        self.attrs = {
            'unpack_ludata': self.unpack_ludata,
            'unpack_pivots': self.unpack_pivots
        }
        self.set_output(x)
        self.outputs = {
            'Pmat': self.P,
            'L': self.L,
            'U': self.U,
        }
Exemplo n.º 6
0
def sin(x, name=None):
    """
    Calculate sin of x, requiring x to be a sparse coo or sparse csr tensor.

    .. math::

        out = sin(x)

    Parameters:
        x (Tensor): The input Sparse Tensor with data type float32, float64.
        name (str, optional): Name for the operation (optional, default is None).
            For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        A Sparse Tensor with the same data type and shape as ``x`` .

    Examples:
        .. code-block:: python

            import paddle
            from paddle.fluid.framework import _test_eager_guard

            with _test_eager_guard():
                dense_x = paddle.to_tensor([-2, 0, 3], dtype='float32')
                sparse_x = dense_x.to_sparse_coo(1)
                out = paddle.incubate.sparse.sin(sparse_x)
    """

    assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode"

    if x.is_sparse_coo() or x.is_sparse_csr():
        return _C_ops.final_state_sparse_sin(x)
    else:
        raise ValueError(
            "Currently, sparse.sin only support the input of SparseCooTensor or SparseCsrTensor"
        )
Exemplo n.º 7
0
def randint_like(x, low=0, high=None, dtype=None, name=None):
    """
    Returns a Tensor filled with random integers from a discrete uniform
    distribution in the range [``low``, ``high``), with the same shape as ``x``.
    (use ``dtype`` if ``dtype`` is not None) 
    If ``high`` is None (the default), the range is [0, ``low``).

    Args:
        x (Tensor): The input tensor which specifies shape. The dtype of ``x`` 
            can be bool, int32, int64, float16, float32, float64.
        low (int): The lower bound on the range of random values to generate.
            The ``low`` is included in the range. If ``high`` is None, the
            range is [0, ``low``). Default is 0.
        high (int, optional): The upper bound on the range of random values to
            generate, the ``high`` is excluded in the range. Default is None
            (see above for behavior if high = None). Default is None.
        dtype (str|np.dtype, optional): The data type of the
            output tensor. Supported data types: bool, int32, int64, float16, 
            float32, float64. If ``dytpe`` is None, the data type is the
            same as x's data type. Default is None.
        name (str, optional): The default value is None.  Normally there is no
            need for user to set this property.  For more information, please
            refer to :ref:`api_guide_Name`.

    Returns: 
        Tensor: A Tensor filled with random integers from a discrete uniform
        distribution in the range [``low``, ``high``), with ``shape`` and ``dtype``.

    Examples:
        .. code-block:: python

            import paddle

            # example 1:
            # dtype is None and the dtype of x is float16
            x = paddle.zeros((1,2)).astype("float16")
            out1 = paddle.randint_like(x, low=-5, high=5)
            print(out1)
            print(out1.dtype)
            # [[0, -3]]  # random
            # paddle.float16

            # example 2:
            # dtype is None and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out2 = paddle.randint_like(x, low=-5, high=5)
            print(out2)
            print(out2.dtype)
            # [[0, -3]]  # random
            # paddle.float32

            # example 3:
            # dtype is None and the dtype of x is float64
            x = paddle.zeros((1,2)).astype("float64")
            out3 = paddle.randint_like(x, low=-5, high=5)
            print(out3)
            print(out3.dtype)
            # [[0, -3]]  # random
            # paddle.float64

            # example 4:
            # dtype is None and the dtype of x is int32
            x = paddle.zeros((1,2)).astype("int32")
            out4 = paddle.randint_like(x, low=-5, high=5)
            print(out4)
            print(out4.dtype)
            # [[0, -3]]  # random
            # paddle.int32

            # example 5:
            # dtype is None and the dtype of x is int64
            x = paddle.zeros((1,2)).astype("int64")
            out5 = paddle.randint_like(x, low=-5, high=5)
            print(out5)
            print(out5.dtype)
            # [[0, -3]]  # random
            # paddle.int64

            # example 6:
            # dtype is float64 and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out6 = paddle.randint_like(x, low=-5, high=5, dtype="float64")
            print(out6)
            print(out6.dtype)
            # [[0, -1]]  # random
            # paddle.float64

            # example 7:
            # dtype is bool and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out7 = paddle.randint_like(x, low=-5, high=5, dtype="bool")
            print(out7)
            print(out7.dtype)
            # [[0, -1]]  # random
            # paddle.bool

            # example 8:
            # dtype is int32 and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out8 = paddle.randint_like(x, low=-5, high=5, dtype="int32")
            print(out8)
            print(out8.dtype)
            # [[0, -1]]  # random
            # paddle.int32

            # example 9:
            # dtype is int64 and the dtype of x is float32
            x = paddle.zeros((1,2)).astype("float32")
            out9 = paddle.randint_like(x, low=-5, high=5, dtype="int64")
            print(out9)
            print(out9.dtype)
            # [[0, -1]]  # random
            # paddle.int64

            # example 10:
            # dtype is int64 and the dtype of x is bool
            x = paddle.zeros((1,2)).astype("bool")
            out10 = paddle.randint_like(x, low=-5, high=5, dtype="int64")
            print(out10)
            print(out10.dtype)
            # [[0, -1]]  # random
            # paddle.int64

    """
    if high is None:
        if low <= 0:
            raise ValueError(
                "If high is None, low must be greater than 0, but received low = {0}."
                .format(low))
        high = low
        low = 0
    if dtype is None:
        dtype = x.dtype
    if not isinstance(dtype, core.VarDesc.VarType):
        dtype = convert_np_dtype_to_dtype_(dtype)
    shape = x.shape

    if low >= high:
        raise ValueError(
            "randint_like's low must less then high, but received low = {0}, "
            "high = {1}".format(low, high))

    if paddle.in_dynamic_mode():
        shape = utils.convert_shape_to_list(shape)
        out = _C_ops.randint('shape', shape, 'low', low, 'high', high, 'seed',
                             0, 'dtype', core.VarDesc.VarType.INT64)
        out = paddle.cast(out, dtype)
        return out

    check_shape(shape, 'randint_like')
    check_dtype(dtype, 'dtype',
                ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'],
                'randint_like')

    inputs = dict()
    attrs = {
        'low': low,
        'high': high,
        'seed': 0,
        'dtype': core.VarDesc.VarType.INT64
    }
    utils.get_shape_tensor_inputs(inputs=inputs,
                                  attrs=attrs,
                                  shape=shape,
                                  op_type='randint_like')

    helper = LayerHelper("randint", **locals())
    out = helper.create_variable_for_type_inference(
        dtype=core.VarDesc.VarType.INT64)
    helper.append_op(type='randint',
                     inputs=inputs,
                     outputs={'Out': out},
                     attrs=attrs)
    out.stop_gradient = True
    out = paddle.cast(out, dtype)
    return out
Exemplo n.º 8
0
def construct_samples_and_shuffle_data(name, data_prefix, documents, sizes,
                                       num_samples, seq_length, seed,
                                       build_data_file):
    """
    documents: document index from 0 to len(docs)
    sizes: the length list of all docs.
    num_samples: total step*bs iterations of data.
    seq_length: the sequence length.


    sum(sizes) = tokens_per_epoch
    data_nums = num_samples *  micro_batch_size
    num_epochs = (data_nums + 1) // sum(sizes)
    len(doc_idx) = num_epochs * sum(sizes)

    """
    # Number of tokens in each epoch and number of required epochs.
    tokens_per_epoch = _num_tokens(documents, sizes)
    num_epochs = _num_epochs(tokens_per_epoch, seq_length, num_samples)
    # Rng state
    np_rng = np.random.RandomState(seed=seed)

    # Filename of the index mappings.
    _filename = data_prefix
    _filename += '_{}_indexmap'.format(name)
    _filename += '_{}ns'.format(num_samples)
    _filename += '_{}sl'.format(seq_length)
    doc_idx_filename = _filename + '_doc_idx.npy'
    sample_idx_filename = _filename + '_sample_idx.npy'
    shuffle_idx_filename = _filename + '_shuffle_idx.npy'

    # Build the indexed mapping if not exist.
    if build_data_file:
        if (not os.path.isfile(doc_idx_filename)) or \
           (not os.path.isfile(sample_idx_filename)) or \
           (not os.path.isfile(shuffle_idx_filename)):
            if num_epochs == 1:
                separate_last_epoch = False
            else:
                num_samples_from_epochs_minus_one = (
                    (num_epochs - 1) * tokens_per_epoch - 1) // seq_length
                last_epoch_num_samples = num_samples - \
                                         num_samples_from_epochs_minus_one
                assert last_epoch_num_samples >= 0, \
                    'last epoch number of samples should be non-negative.'
                num_samples_per_epoch = (tokens_per_epoch - 1) // seq_length
                assert last_epoch_num_samples < (num_samples_per_epoch + 1), \
                    'last epoch number of samples exceeded max value.'
                separate_last_epoch = (last_epoch_num_samples < int(
                    0.80 * num_samples_per_epoch))
            # Note. len(doc_idx) = num_epochs * len(doc)
            start_time = time.time()
            doc_idx = _build_doc_idx(documents, num_epochs, np_rng,
                                     separate_last_epoch)
            np.save(doc_idx_filename, doc_idx, allow_pickle=True)
            print(' > elasped time to build and save doc-idx mapping '
                  '(seconds): {:4f}'.format(time.time() - start_time))
            # sample-idx. pos of each seq_len of data.
            start_time = time.time()
            assert doc_idx.dtype == np.int32
            assert sizes.dtype == np.int32

            import data_tools.helpers as helpers

            sample_idx = helpers.build_sample_idx(sizes, doc_idx, seq_length,
                                                  num_epochs, tokens_per_epoch)
            np.save(sample_idx_filename, sample_idx, allow_pickle=True)
            print(' > elasped time to build and save sample-idx mapping '
                  '(seconds): {:4f}'.format(time.time() - start_time))

            # shuffle-idx.
            start_time = time.time()

            if separate_last_epoch:
                num_samples_ = num_samples_from_epochs_minus_one
            else:
                num_samples_ = sample_idx.shape[0] - 1

            # Shuffle all seq len data.
            shuffle_idx = _build_shuffle_idx(num_samples_,
                                             sample_idx.shape[0] - 1, np_rng)
            np.save(shuffle_idx_filename, shuffle_idx, allow_pickle=True)
            print(' > elasped time to build and save shuffle-idx mapping'
                  ' (seconds): {:4f}'.format(time.time() - start_time))

    else:
        while True:
            if (not os.path.isfile(doc_idx_filename)) or \
               (not os.path.isfile(sample_idx_filename)) or \
               (not os.path.isfile(shuffle_idx_filename)):
                time.sleep(3)
            else:
                try:
                    np.load(shuffle_idx_filename,
                            allow_pickle=True,
                            mmap_mode='r')
                    break
                except Exception as e:
                    print(
                        "%s file is still writing or damaged, please wait a moment."
                        % shuffle_idx_filename)
                    time.sleep(3)

    if paddle.distributed.get_world_size() > 1:
        if paddle.in_dynamic_mode():
            paddle.distributed.barrier()

    # Load mappings.
    doc_idx = np.load(doc_idx_filename, allow_pickle=True, mmap_mode='r')
    sample_idx = np.load(sample_idx_filename, allow_pickle=True, mmap_mode='r')
    shuffle_idx = np.load(shuffle_idx_filename,
                          allow_pickle=True,
                          mmap_mode='r')
    return doc_idx, sample_idx, shuffle_idx
Exemplo n.º 9
0
def where(condition, x=None, y=None, name=None):
    r"""
    Return a tensor of elements selected from either $x$ or $y$, depending on $condition$.

    **Note**:
        ``paddle.where(condition)`` is identical to ``paddle.nonzero(condition, as_tuple=True)``.

    .. math::

      out_i =
      \begin{cases}
      x_i, \quad  \text{if}  \ condition_i \  is \ True \\
      y_i, \quad  \text{if}  \ condition_i \  is \ False \\
      \end{cases}


    Args:
        condition(Tensor): The condition to choose x or y. When True(nonzero), yield x, otherwise yield y.
        x(Tensor or Scalar, optional): x is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given.
        y(Tensor or Scalar, optional): y is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given.

        name(str, optional): The default value is None. Normally there is no
            need for user to set this property. For more information, please
            refer to :ref:`api_guide_Name`.

    Returns:
        Tensor: A Tensor with the same data dype as x. 

    Examples:
        .. code-block:: python

          import paddle

          x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2])
          y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0])
          out = paddle.where(x>1, x, y)

          print(out)
          #out: [1.0, 1.0, 3.2, 1.2]

          out = paddle.where(x>1)
          print(out)
          #out: (Tensor(shape=[2, 1], dtype=int64, place=CPUPlace, stop_gradient=True,
          #            [[2],
          #             [3]]),)
    """
    if np.isscalar(x):
        x = paddle.full([1], x, np.array([x]).dtype.name)

    if np.isscalar(y):
        y = paddle.full([1], y, np.array([y]).dtype.name)

    if x is None and y is None:
        return nonzero(condition, as_tuple=True)

    if x is None or y is None:
        raise ValueError("either both or neither of x and y should be given")

    if not paddle.in_dynamic_mode():
        check_variable_and_dtype(condition, 'condition', ['bool'], 'where')
        check_variable_and_dtype(x, 'x',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'where')
        check_variable_and_dtype(y, 'y',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'where')

    condition_shape = list(condition.shape)
    x_shape = list(x.shape)
    y_shape = list(y.shape)

    if x_shape == y_shape and condition_shape == x_shape:
        broadcast_condition = condition
        broadcast_x = x
        broadcast_y = y
    else:
        if core.is_compiled_with_xpu():
            cond_int = paddle.cast(condition, x.dtype)
            cond_not_int = paddle.cast(logical_not(condition), x.dtype)
            out1 = paddle.multiply(x, cond_int)
            out2 = paddle.multiply(y, cond_not_int)
            out = paddle.add(out1, out2)
            return out

        zeros_like_x = paddle.zeros_like(x)
        zeros_like_y = paddle.zeros_like(y)
        zeros_like_condition = paddle.zeros_like(condition)
        zeros_like_condition = paddle.cast(zeros_like_condition, x.dtype)
        cast_cond = paddle.cast(condition, x.dtype)

        broadcast_zeros = paddle.add(zeros_like_x, zeros_like_y)
        broadcast_zeros = paddle.add(broadcast_zeros, zeros_like_condition)
        broadcast_x = paddle.add(x, broadcast_zeros)
        broadcast_y = paddle.add(y, broadcast_zeros)
        broadcast_condition = paddle.add(cast_cond, broadcast_zeros)
        broadcast_condition = paddle.cast(broadcast_condition, 'bool')

    if in_dygraph_mode():
        return _C_ops.final_state_where(broadcast_condition, broadcast_x,
                                        broadcast_y)
    else:
        if _in_legacy_dygraph():
            return _C_ops.where(broadcast_condition, broadcast_x, broadcast_y)
        else:
            helper = LayerHelper("where", **locals())
            out = helper.create_variable_for_type_inference(dtype=x.dtype)

            helper.append_op(type='where',
                             inputs={
                                 'Condition': broadcast_condition,
                                 'X': broadcast_x,
                                 'Y': broadcast_y
                             },
                             outputs={'Out': [out]})

            return out
Exemplo n.º 10
0
def get_samples_mapping(indexed_dataset, data_prefix, num_epochs,
                        max_num_samples, max_seq_length, short_seq_prob, seed,
                        name, binary_head, share_folder):
    """Get a list that maps a sample index to a starting sentence index, end sentence index, and length"""

    if not num_epochs:
        if not max_num_samples:
            raise ValueError("Need to specify either max_num_samples "
                             "or num_epochs")
        num_epochs = np.iinfo(np.int32).max - 1
    if not max_num_samples:
        max_num_samples = np.iinfo(np.int64).max - 1

    # Filename of the index mapping
    indexmap_filename = data_prefix
    indexmap_filename += '_{}_indexmap'.format(name)
    if num_epochs != (np.iinfo(np.int32).max - 1):
        indexmap_filename += '_{}ep'.format(num_epochs)
    if max_num_samples != (np.iinfo(np.int64).max - 1):
        indexmap_filename += '_{}mns'.format(max_num_samples)
    indexmap_filename += '_{}msl'.format(max_seq_length)
    indexmap_filename += '_{:0.2f}ssp'.format(short_seq_prob)
    indexmap_filename += '_{}s'.format(seed)
    indexmap_filename += '.npy'

    local_rank = 0 if fleet.local_rank() is None else int(fleet.local_rank())
    if share_folder:
        local_rank = fleet.worker_index()
    # Build the indexed mapping if not exist.

    if local_rank == 0 and \
       not os.path.isfile(indexmap_filename):
        print(' > WARNING: could not find index map file {}, building '
              'the indices on rank 0 ...'.format(indexmap_filename))

        # Make sure the types match the helpers input types.
        assert indexed_dataset.doc_idx.dtype == np.int64
        print(indexed_dataset.sizes.dtype)
        assert indexed_dataset.sizes.dtype == np.int32

        # Build samples mapping
        verbose = local_rank == 0
        start_time = time.time()
        print_rank_0(
            ' > building sapmles index mapping for {} ...'.format(name))
        # First compile and then import.
        if local_rank == 0:
            compile_helper()
        import data_tools.helpers as helpers
        samples_mapping = helpers.build_mapping(indexed_dataset.doc_idx,
                                                indexed_dataset.sizes,
                                                num_epochs, max_num_samples,
                                                max_seq_length, short_seq_prob,
                                                seed, verbose,
                                                2 if binary_head else 1)
        print_rank_0(' > done building sapmles index maping')
        np.save(indexmap_filename, samples_mapping, allow_pickle=True)
        print_rank_0(
            ' > saved the index mapping in {}'.format(indexmap_filename))
        # Make sure all the ranks have built the mapping
        print_rank_0(' > elasped time to build and save samples mapping '
                     '(seconds): {:4f}'.format(time.time() - start_time))

    else:
        while True:
            if (not os.path.isfile(indexmap_filename)):
                time.sleep(3)
            else:
                try:
                    np.load(indexmap_filename,
                            allow_pickle=True,
                            mmap_mode='r')
                    break
                except Exception as e:
                    print(
                        "%s file is still writing or damaged, please wait a moment."
                        % indexmap_filename)
                    time.sleep(3)

    # This should be a barrier but nccl barrier assumes
    # device_index=rank which is not the case for model
    # parallel case
    if paddle.distributed.get_world_size() > 1:
        if paddle.in_dynamic_mode():
            paddle.distributed.barrier()

    # Load indexed dataset.
    print_rank_0(
        ' > loading indexed mapping from {}'.format(indexmap_filename))
    start_time = time.time()
    samples_mapping = np.load(indexmap_filename,
                              allow_pickle=True,
                              mmap_mode='r')
    print_rank_0(
        '    loaded indexed file in {:3.3f} seconds'.format(time.time() -
                                                            start_time))
    print_rank_0('    total number of samples: {}'.format(
        samples_mapping.shape[0]))

    return samples_mapping
Exemplo n.º 11
0
    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
        Save the quantized model for the inference.

        Args:
            layer (Layer): The Layer to be saved.
            path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward 
                method, which can be described by InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of the saved model. Default None.
            **configs (dict, optional): Other save configuration options for compatibility. We do not 
                recommend using these configurations, they may be removed in the future. If not necessary, 
                DO NOT use them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of the saved model.
                By default, all return variables of original Layer's forward method are kept as the 
                output of the saved model. If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given ``output_spec`` list. 

        Returns:
            None
        """

        assert isinstance(
            layer,
            dygraph.Layer), "model must be the instance of dygraph.Layer"
        is_dynamic_mode = False
        with dygraph.guard():
            layer.eval()
            for handle in self._register_hook_handle_list:
                handle.remove()
            for key in self._out_scale_dict:
                self._out_scale_dict[key] = float(
                    self._out_scale_dict[key].numpy())

        paddle.jit.save(layer=layer,
                        path=path,
                        input_spec=input_spec,
                        **config)

        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)

        file_prefix = os.path.basename(path)
        dirname = os.path.dirname(path)
        model_filename = file_prefix + INFER_MODEL_SUFFIX
        params_filename = file_prefix + INFER_PARAMS_SUFFIX

        [inference_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        # Traverse all ops in the program and find out the op matching
        # the Layer in the dynamic graph.
        layer_var_dict = {}
        for block in inference_program.blocks:
            for op in block.ops:
                if op.type in _op_real_in_out_name:
                    output_var_names = quantization_pass._get_op_output_var_names(
                        op)
                    for output_var_name in output_var_names:
                        output_var_tensor = block.var(output_var_name)
                        if output_var_tensor.dtype not in [
                                core.VarDesc.VarType.FP64,
                                core.VarDesc.VarType.FP32
                        ]:
                            continue
                        # Because the Layer in dygraph may correspond to multiple ops
                        # in static program after being saved. To ensure correctness,
                        # the outscale collected for output of dygraph Layer can only
                        # be set to the last op in the corresponding ops in static program.
                        #
                        # We can judge the execution order of the ops which corresponding
                        # to dygraph Layer by the name of output. And use dict to save
                        # the corresponding relationship between the dygraph Layer and the
                        # static graph op that needs to set the outscale attribute.
                        if '.' not in output_var_name:
                            continue
                        dynamic_layer_name, var_name_suffix = output_var_name.split(
                            ".")
                        if dynamic_layer_name in layer_var_dict:
                            if layer_var_dict[dynamic_layer_name][
                                    0] < var_name_suffix:
                                layer_var_dict[dynamic_layer_name] = [
                                    var_name_suffix, op
                                ]
                        else:
                            layer_var_dict[dynamic_layer_name] = [
                                var_name_suffix, op
                            ]

        # Because the naming styles of static and dynamic graph are different,
        # in order to avoid mistakes, we unify the name here.
        for (layer_name, var_name_op_list) in layer_var_dict.items():
            if 'prelu' in layer_name:
                layer_name = layer_name.replace('prelu', 'p_re_lu')
            if 'relu' in layer_name:
                layer_name = layer_name.replace('relu', 're_lu')
            if layer_name not in self._out_scale_dict:
                continue
            var_name_op_list[1]._set_attr('out_threshold',
                                          self._out_scale_dict[layer_name])

        # Save the processed program.
        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=inference_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename)

        if is_dynamic_mode:
            paddle.disable_static()
 def test_dygraph_mode(self):
     self.assertTrue(
         paddle.in_dynamic_mode(),
         'Default Mode of Unittest should be dygraph mode, but get static mode.'
     )
Exemplo n.º 13
0
def local_response_norm(x,
                        size,
                        alpha=1e-4,
                        beta=0.75,
                        k=1.,
                        data_format="NCHW",
                        name=None):
    r"""
        Local Response Normalization performs a type of "lateral inhibition" by normalizing over local input regions.
        For more information, please refer to `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_

        The formula is as follows:

        .. math::

            Output(i, x, y) = Input(i, x, y) / \left(k + \alpha \sum\limits^{\min(C-1, i + size/2)}_{j = \max(0, i - size/2)}(Input(j, x, y))^2\right)^{\beta}

        In the above equation:

        - :math:`size` : The number of channels to sum over.
        - :math:`k` : The offset (avoid being divided by 0).
        - :math:`\\alpha` : The scaling parameter.
        - :math:`\\beta` : The exponent parameter.


        Args:
            x (Tensor): The input 3-D/4-D/5-D tensor. The data type is float32.
            size (int): The number of channels to sum over.
            alpha (float, optional): The scaling parameter, positive. Default:1e-4
            beta (float, optional): The exponent, positive. Default:0.75
            k (float, optional): An offset, positive. Default: 1.0
            data_format (str, optional): Specify the data format of the input, and the data format of the output
                will be consistent with that of the input. An optional string from:
                If x is 3-D Tensor, the string could be `"NCL"` or `"NLC"` . When it is `"NCL"`,
                the data is stored in the order of: `[batch_size, input_channels, feature_length]`.
                If x is 4-D Tensor, the string could be  `"NCHW"`, `"NHWC"`. When it is `"NCHW"`,
                the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`.
                If x is 5-D Tensor, the string could be  `"NCDHW"`, `"NDHWC"` . When it is `"NCDHW"`,
                the data is stored in the order of: `[batch_size, input_channels, input_depth, input_height, input_width]`.
            name (str, optional): Name for the operation (optional, default is None). For more information,
                please refer to :ref:`api_guide_Name`.

        Returns:
            A tensor storing the transformation result with the same shape and data type as input.


        Examples:

        .. code-block:: python

            import paddle

            x = paddle.rand(shape=(3, 3, 112, 112), dtype="float32")
            y = paddle.nn.functional.local_response_norm(x, size=5)
            print(y.shape)  # [3, 3, 112, 112]
        """
    if not in_dynamic_mode():
        check_variable_and_dtype(x, 'x', ['float32'], 'local_response_norm')
    if data_format not in ['NCL', 'NLC', 'NCHW', 'NHWC', 'NCDHW', 'NDHWC']:
        raise ValueError(
            "data_format should be in one of [NCL, NCHW, NCDHW, NLC, NHWC, NDHWC], " \
            "but got {}".format(data_format))

    sizes = x.shape
    dim = len(sizes)
    if dim < 3:
        raise ValueError(
            'Expected 3D or higher dimensionality input, but got {} dimensions'
            .format(dim))

    for i, sz in enumerate(sizes):
        if not sz > 0 and i > 0:
            raise ValueError("Expected every dim's size to be larger than 0, "
                             "but the size of the {}-th dim is {}".format(
                                 i, sz))

    channel_last = True if data_format[-1] == "C" else False

    from functools import reduce
    sum_sizes = reduce(lambda x, y: x * y, sizes[1:])

    div = paddle.unsqueeze(paddle.multiply(x, x), axis=1)
    if not channel_last:
        pad4d_shape = [0, 0, size // 2, (size - 1) // 2]
        pool2d_shape = (size, 1)
        reshape_shape = [
            sizes[0], 1, sizes[1], sizes[2],
            int(sum_sizes / (sizes[1] * sizes[2]))
        ]
        pad5d_shape = [0, 0, 0, 0, size // 2, (size - 1) // 2]
        pool3d_shape = (size, 1, 1)
    else:
        pad4d_shape = [size // 2, (size - 1) // 2, 0, 0]
        pool2d_shape = (1, size)
        reshape_shape = [
            sizes[0], 1, sizes[1],
            int(sum_sizes / (sizes[1] * sizes[-1])), sizes[-1]
        ]
        pad5d_shape = [size // 2, (size - 1) // 2, 0, 0, 0, 0]
        pool3d_shape = (1, 1, size)

    if dim == 3:
        div = paddle.nn.functional.pad(div, pad=pad4d_shape)
        div = paddle.nn.functional.avg_pool2d(div,
                                              kernel_size=pool2d_shape,
                                              stride=1)
        div = paddle.squeeze(div, axis=1)
    else:
        div = paddle.reshape(div, shape=reshape_shape)
        div = paddle.nn.functional.pad(div,
                                       pad=pad5d_shape,
                                       data_format='NCDHW')
        div = paddle.nn.functional.avg_pool3d(div,
                                              kernel_size=pool3d_shape,
                                              stride=1)
        div = paddle.reshape(paddle.squeeze(div, axis=1), sizes)

    div = paddle.scale(div, scale=alpha, bias=k)
    div = paddle.pow(div, beta)
    res = paddle.divide(x, div, name=name)
    return res
Exemplo n.º 14
0
def affine_grid(theta, out_shape, align_corners=True, name=None):
    """
    It generates a grid of (x,y) coordinates using the parameters of
    the affine transformation that correspond to a set of points where
    the input feature map should be sampled to produce the transformed
    output feature map.

    Args:
        theta (Tensor) - A tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters.
                           The data type can be float32 or float64.
        out_shape (Tensor | list | tuple): The shape of target output with format [batch_size, channel, height, width].
                                             ``out_shape`` can be a Tensor or a list or tuple. The data
                                             type must be int32.
        align_corners(bool): Whether to align corners of target feature map and source feature map. Default: True.
        name(str|None): The default value is None.  Normally there is no need for user to set this property.  For more information, please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor, A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`.

    Raises:
        ValueError: If the type of arguments is not supported.

    Examples:

        .. code-block:: python

            import paddle
            import paddle.nn.functional as F
            import numpy as np
            # theta shape = [1, 2, 3]
            theta = np.array([[[-0.7, -0.4, 0.3],
                               [ 0.6,  0.5, 1.5]]]).astype("float32")
            theta_t = paddle.to_tensor(theta)
            y_t = F.affine_grid(
                    theta_t,
                    [1, 2, 3, 3],
                    align_corners=False)
            print(y_t)
            
            #[[[[ 1.0333333   0.76666665]
            #   [ 0.76666665  1.0999999 ]
            #   [ 0.5         1.4333333 ]]
            #
            #  [[ 0.5666667   1.1666666 ]
            #   [ 0.3         1.5       ]
            #   [ 0.03333333  1.8333334 ]]
            #
            #  [[ 0.10000002  1.5666667 ]
            #   [-0.16666666  1.9000001 ]
            #   [-0.43333334  2.2333333 ]]]]
    """
    if not isinstance(theta, Variable):
        raise ValueError("The theta should be a Tensor.")

    cudnn_version = get_cudnn_version()
    if cudnn_version is not None and cudnn_version >= 6000 and align_corners:
        use_cudnn = True
    else:
        use_cudnn = False
    if is_compiled_with_rocm():
        use_cudnn = False  # ROCM platform do not have MIOPEN kernel for affine_grid

    if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \
            isinstance(out_shape, Variable)):
        raise ValueError("The out_shape should be a list, tuple or Tensor.")

    if in_dynamic_mode():
        _out_shape = out_shape.numpy().tolist() if isinstance(
            out_shape, Variable) else out_shape
        return _C_ops.affine_grid(theta, "output_shape", _out_shape,
                                  "align_corners", align_corners, "use_cudnn",
                                  use_cudnn)

    helper = LayerHelper('affine_grid')
    check_variable_and_dtype(theta, 'theta', ['float32', 'float64'],
                             'affine_grid')
    out = helper.create_variable_for_type_inference(theta.dtype)
    ipts = {'Theta': theta}
    attrs = {"align_corners": align_corners, "use_cudnn": use_cudnn}
    if isinstance(out_shape, Variable):
        ipts['OutputShape'] = out_shape
        check_variable_and_dtype(out_shape, 'out_shape', ['int32'],
                                 'affine_grid')
    else:
        attrs['output_shape'] = out_shape

    helper.append_op(type='affine_grid',
                     inputs=ipts,
                     outputs={'Output': out},
                     attrs=None if len(attrs) == 0 else attrs)
    return out
Exemplo n.º 15
0
def grid_sample(x,
                grid,
                mode='bilinear',
                padding_mode='zeros',
                align_corners=True,
                name=None):
    """
    This operation samples input X by using bilinear interpolation or
    nearest interpolation based on flow field grid, which is usually
    generated by :code:`affine_grid` . The grid of shape [N, H, W, 2]
    is the concatenation of (x, y) coordinates with shape [N, H, W] each,
    where x is indexing the 4th dimension (in width dimension) of input
    data x and y is indexing the 3rd dimension (in height dimension),
    finally results is the bilinear interpolation or nearest value of 4 nearest corner
    points. The output tensor shape will be [N, C, H, W].


    Step 1:

    Get (x, y) grid coordinates and scale to [0, H-1/W-1].

    .. code-block:: text

        grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1)
        grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1)

    Step 2:
    
    Indices input data X with grid (x, y) in each [H, W] area, and bilinear
    interpolate point value by 4 nearest points or nearest interpolate point value
    by nearest point.

    .. code-block:: text

        wn ------- y_n ------- en
        |           |           |
        |          d_n          |
        |           |           |
        x_w --d_w-- grid--d_e-- x_e
        |           |           |
        |          d_s          |
        |           |           |
        ws ------- y_s ------- wn

        For bilinear interpolation:
        x_w = floor(x)              // west side x coord
        x_e = x_w + 1               // east side x coord
        y_n = floor(y)              // north side y coord
        y_s = y_s + 1               // south side y coord
        d_w = grid_x - x_w          // distance to west side
        d_e = x_e - grid_x          // distance to east side
        d_n = grid_y - y_n          // distance to north side
        d_s = y_s - grid_y          // distance to south side
        wn = X[:, :, y_n, x_w]      // north-west point value
        en = X[:, :, y_n, x_e]      // north-east point value
        ws = X[:, :, y_s, x_w]      // south-east point value
        es = X[:, :, y_s, x_w]      // north-east point value

        output = wn * d_e * d_s + en * d_w * d_s
                + ws * d_e * d_n + es * d_w * d_n

    Args:
        x(Tensor): The input tensor, which is a 4-d tensor with shape
                     [N, C, H, W], N is the batch size, C is the channel
                     number, H and W is the feature height and width.
                     The data type is float32 or float64.
        grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The
                        data type is float32 or float64.
        mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'.
                         Default: 'bilinear'.
        padding_mode(str, optional) The padding method used when source index
                   is out of input images. It can be 'zeros', 'reflection' and 'border'.
                   Default: zeros.
        align_corners(bool, optional): If `align_corners` is true, it will projects
                   -1 and 1 to the centers of the corner pixels. Otherwise, it will
                   projects -1 and 1 to the image edges.
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.

    Returns:
        Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor.

    Examples:

        .. code-block:: python
        
            import paddle
            import paddle.nn.functional as F
            import numpy as np
            
            # shape=[1, 1, 3, 3]
            x = np.array([[[[-0.6,  0.8, -0.5],
                            [-0.5,  0.2,  1.2],
                            [ 1.4,  0.3, -0.2]]]]).astype("float64")
            
            # grid shape = [1, 3, 4, 2]
            grid = np.array(
                         [[[[ 0.2,  0.3],
                            [-0.4, -0.3],
                            [-0.9,  0.3],
                            [-0.9, -0.6]],
                           [[ 0.4,  0.1],
                            [ 0.9, -0.8],
                            [ 0.4,  0.5],
                            [ 0.5, -0.2]],
                           [[ 0.1, -0.8],
                            [-0.3, -1. ],
                            [ 0.7,  0.4],
                            [ 0.2,  0.8]]]]).astype("float64")
            
            
            x = paddle.to_tensor(x)
            grid = paddle.to_tensor(grid)
            y_t = F.grid_sample(
                x,
                grid,
                mode='bilinear',
                padding_mode='border',
                align_corners=True)
            print(y_t)
            
            # output shape = [1, 1, 3, 4]
            # [[[[ 0.34   0.016  0.086 -0.448]
            #    [ 0.55  -0.076  0.35   0.59 ]
            #    [ 0.596  0.38   0.52   0.24 ]]]]
    """

    _modes = ['bilinear', 'nearest']
    _padding_modes = ['zeros', 'reflection', 'border']
    if mode not in _modes:
        raise ValueError(
            "The mode of grid sample function should be in {}, but got: {}".
            format(_modes, mode))
    if padding_mode not in _padding_modes:
        raise ValueError(
            "The padding mode of grid sample function should be in {}, but got: {}"
            .format(_padding_modes, padding_mode))

    if not isinstance(align_corners, bool):
        raise ValueError(
            "The align corners should be bool, but got: {}".format(
                align_corners))

    cudnn_version = get_cudnn_version()
    use_cudnn = False
    if not is_compiled_with_rocm() and (
            cudnn_version is not None
    ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros':
        use_cudnn = True
        # CUDNN always computes gradients for all inputs
        x.stop_gradient = False
        grid.stop_gradient = False

    if in_dynamic_mode():
        attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners',
                 align_corners, 'use_cudnn', use_cudnn)
        out = getattr(_C_ops, 'grid_sampler')(x, grid, *attrs)
    else:
        helper = LayerHelper("grid_sample", **locals())
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sample')
        check_variable_and_dtype(grid, 'grid', ['float32', 'float64'],
                                 'grid_sample')
        ipts = {'X': x, 'Grid': grid}
        attrs = {
            'mode': mode,
            'padding_mode': padding_mode,
            'align_corners': align_corners,
            'use_cudnn': use_cudnn
        }
        out = helper.create_variable_for_type_inference(x.dtype)
        helper.append_op(type='grid_sampler',
                         inputs=ipts,
                         attrs=attrs,
                         outputs={'Output': out})
    return out
Exemplo n.º 16
0
def diag_embed(input, offset=0, dim1=-2, dim2=-1):
    """
    This OP creates a tensor whose diagonals of certain 2D planes (specified by dim1 and dim2) 
    are filled by ``input``. By default, a 2D plane formed by the last two dimensions 
    of the returned tensor will be selected.

    The argument ``offset`` determines which diagonal is generated:

    - If offset = 0, it is the main diagonal.
    - If offset > 0, it is above the main diagonal.
    - If offset < 0, it is below the main diagonal.

    Args:
        input(Tensor|numpy.ndarray): The input tensor. Must be at least 1-dimensional. The input data type should be float32, float64, int32, int64.
        offset(int, optional): Which diagonal to consider. Default: 0 (main diagonal).
        dim1(int, optional): The first dimension with respect to which to take diagonal. Default: -2.
        dim2(int, optional): The second dimension with respect to which to take diagonal. Default: -1.
    
    Returns:
        Tensor, the output data type is the same as input data type.
    
    Examples:
        .. code-block:: python

            import paddle.nn.functional as F
            import numpy as np
            
            diag_embed = np.random.randn(2, 3).astype('float32')
            # [[ 0.7545889 , -0.25074545,  0.5929117 ],
            #  [-0.6097662 , -0.01753256,  0.619769  ]]

            data1 = F.diag_embed(diag_embed)
            data1.numpy()
            # [[[ 0.7545889 ,  0.        ,  0.        ],
            #  [ 0.        , -0.25074545,  0.        ],
            #   [ 0.        ,  0.        ,  0.5929117 ]],

            # [[-0.6097662 ,  0.        ,  0.        ],
            #  [ 0.        , -0.01753256,  0.        ],
            #  [ 0.        ,  0.        ,  0.619769  ]]]

            data2 = F.diag_embed(diag_embed, offset=-1, dim1=0, dim2=2)
            data2.numpy()
            # [[[ 0.        ,  0.        ,  0.        ,  0.        ],
            #   [ 0.7545889 ,  0.        ,  0.        ,  0.        ],
            #   [ 0.        , -0.25074545,  0.        ,  0.        ],
            #   [ 0.        ,  0.        ,  0.5929117 ,  0.        ]],
            #
            #  [[ 0.        ,  0.        ,  0.        ,  0.        ],
            #   [-0.6097662 ,  0.        ,  0.        ,  0.        ],
            #   [ 0.        , -0.01753256,  0.        ,  0.        ],
            #   [ 0.        ,  0.        ,  0.619769  ,  0.        ]]]

            data3 = F.diag_embed(diag_embed, offset=1, dim1=0, dim2=2)
            data3.numpy()
            # [[[ 0.        ,  0.7545889 ,  0.        ,  0.        ],
            #   [ 0.        , -0.6097662 ,  0.        ,  0.        ]],
            #
            #  [[ 0.        ,  0.        , -0.25074545,  0.        ],
            #   [ 0.        ,  0.        , -0.01753256,  0.        ]],
            #
            #  [[ 0.        ,  0.        ,  0.        ,  0.5929117 ],
            #   [ 0.        ,  0.        ,  0.        ,  0.619769  ]],
            #
            #  [[ 0.        ,  0.        ,  0.        ,  0.        ],
            #   [ 0.        ,  0.        ,  0.        ,  0.        ]]]
    """
    inputs = {'Input': [input]}
    attrs = {'offset': offset, 'dim1': dim1, 'dim2': dim2}

    if not isinstance(input, Variable):
        input = assign(input)

    def __check_input(input, offset, dim1, dim2):
        check_dtype(input.dtype, 'Input',
                    ['int32', 'int64', 'float16', 'float32', 'float64'],
                    'diag_embed')

        input_shape = list(input.shape)
        assert len(input_shape) >= 1,                     \
                "Input must be at least 1-dimensional, "   \
                "But received Input's dimensional: %s.\n" %  \
                len(input_shape)

        assert np.abs(dim1) <= len(input_shape),    \
            "Dim1 is out of range (expected to be in range of [%d, %d], but got %d).\n"  \
            % (-(len(input_shape) + 1), len(input_shape), dim1)

        assert np.abs(dim2) <= len(input_shape),      \
            "Dim2 is out of range (expected to be in range of [%d, %d], but got %d).\n"  \
            % (-(len(input_shape) + 1), len(input_shape), dim2)

        dim1_ = dim1 if dim1 >= 0 else len(input_shape) + dim1 + 1
        dim2_ = dim2 if dim2 >= 0 else len(input_shape) + dim2 + 1
        assert dim1_ != dim2_,       \
               "dim1 and dim2 cannot be the same dimension." \
                "But received dim1 = %d, dim2 = %d\n"%(dim1, dim2)

    if not in_dynamic_mode():
        __check_input(input, offset, dim1, dim2)
    helper = LayerHelper("diag_embed", **locals())

    out = helper.create_variable_for_type_inference(dtype=input.dtype)

    helper.append_op(type='diag_embed',
                     inputs={'Input': [input]},
                     attrs={
                         'offset': offset,
                         'dim1': dim1,
                         'dim2': dim2
                     },
                     outputs={'Out': [out]})
    out.stop_gradient = True
    return out
Exemplo n.º 17
0
def summary(net, input_size, batch_size=None, dtypes=None):
    """Prints a string summary of the network.

    Args:
        net (Layer): the network which must be a subinstance of Layer.
        input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only 
                    have one input, input_size can be tuple or InputSpec. if model
                    have multiple input, input_size must be a list which contain 
                    every input's shape.
        batch_size (int, optional): batch size of input tensor, Default: None.
        dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None.

    Returns:
        Dict: a summary of the network including total params and total trainable params.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.nn as nn

            class LeNet(nn.Layer):
                def __init__(self, num_classes=10):
                    super(LeNet, self).__init__()
                    self.num_classes = num_classes
                    self.features = nn.Sequential(
                        nn.Conv2d(
                            1, 6, 3, stride=1, padding=1),
                        nn.ReLU(),
                        nn.MaxPool2d(2, 2),
                        nn.Conv2d(
                            6, 16, 5, stride=1, padding=0),
                        nn.ReLU(),
                        nn.MaxPool2d(2, 2))

                    if num_classes > 0:
                        self.fc = nn.Sequential(
                            nn.Linear(400, 120),
                            nn.Linear(120, 84),
                            nn.Linear(
                                84, 10))

                def forward(self, inputs):
                    x = self.features(inputs)

                    if self.num_classes > 0:
                        x = paddle.flatten(x, 1)
                        x = self.fc(x)
                    return x

            lenet = LeNet()

            params_info = paddle.summary(lenet, (1, 28, 28))
            print(params_info)

    """
    if isinstance(input_size, InputSpec):
        _input_size = tuple(input_size.shape[1:])
        if batch_size is None:
            batch_size = input_size.shape[0]
    elif isinstance(input_size, list):
        _input_size = []
        for item in input_size:
            if isinstance(item, int):
                item = (item, )
            assert isinstance(item,
                              (tuple, InputSpec)), 'When input_size is list, \
            expect item in input_size is a tuple or InputSpec, but got {}'.format(
                                  type(item))

            if isinstance(item, InputSpec):
                _input_size.append(tuple(item.shape[1:]))
                if batch_size is None:
                    batch_size = item.shape[0]
            else:
                _input_size.append(item)
    elif isinstance(input_size, int):
        _input_size = (input_size, )
    else:
        _input_size = input_size

    if batch_size is None:
        batch_size = -1

    if not paddle.in_dynamic_mode():
        warnings.warn(
            "Your model was created in static mode, this may not get correct summary information!"
        )

    result, params_info = summary_string(net, _input_size, batch_size, dtypes)
    print(result)

    return params_info
Exemplo n.º 18
0
    def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
        """
        Creates an instance of `PretrainedModel`. Model weights are loaded
        by specifying name of a built-in pretrained model, or a community contributed model,
        or a local file directory path.

        Args:
            pretrained_model_name_or_path (str): Name of pretrained model or dir path
                to load from. The string can be:

                - Name of a built-in pretrained model
                - Name of a community-contributed pretrained model.
                - Local directory path which contains model weights file("model_state.pdparams")
                  and model config file ("model_config.json").
            *args (tuple): Position arguments for model `__init__`. If provided,
                use these as position argument values for model initialization.
            **kwargs (dict): Keyword arguments for model `__init__`. If provided,
                use these to update pre-defined keyword argument values for model
                initialization. If the keyword is in `__init__` argument names of
                base model, update argument values of the base model; else update
                argument values of derived model.

        Returns:
            PretrainedModel: An instance of `PretrainedModel`.

        Example:
            .. code-block::

                from paddlenlp.transformers import BertForSequenceClassification

                # Name of built-in pretrained model
                model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

                # Name of community-contributed pretrained model
                model = BertForSequenceClassification.from_pretrained('yingyibiao/bert-base-uncased-sst-2-finetuned')

                # Load from local directory path
                model = BertForSequenceClassification.from_pretrained('./my_bert/')
        """
        pretrained_models = list(cls.pretrained_init_configuration.keys())
        resource_files = {}
        init_configuration = {}

        # From built-in pretrained models
        if pretrained_model_name_or_path in pretrained_models:
            for file_id, map_list in cls.pretrained_resource_files_map.items():
                resource_files[file_id] = map_list[
                    pretrained_model_name_or_path]
            init_configuration = copy.deepcopy(
                cls.
                pretrained_init_configuration[pretrained_model_name_or_path])
        # From local dir path
        elif os.path.isdir(pretrained_model_name_or_path):
            for file_id, file_name in cls.resource_files_names.items():
                full_file_name = os.path.join(pretrained_model_name_or_path,
                                              file_name)
                resource_files[file_id] = full_file_name
            resource_files["model_config_file"] = os.path.join(
                pretrained_model_name_or_path, cls.model_config_file)
        else:
            # Assuming from community-contributed pretrained models
            for file_id, file_name in cls.resource_files_names.items():
                full_file_name = os.path.join(COMMUNITY_MODEL_PREFIX,
                                              pretrained_model_name_or_path,
                                              file_name)
                resource_files[file_id] = full_file_name
            resource_files["model_config_file"] = os.path.join(
                COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path,
                cls.model_config_file)

        default_root = os.path.join(MODEL_HOME, pretrained_model_name_or_path)
        resolved_resource_files = {}
        for file_id, file_path in resource_files.items():
            if file_path is None or os.path.isfile(file_path):
                resolved_resource_files[file_id] = file_path
                continue
            path = os.path.join(default_root, file_path.split('/')[-1])
            if os.path.exists(path):
                logger.info("Already cached %s" % path)
                resolved_resource_files[file_id] = path
            else:
                logger.info("Downloading %s and saved to %s" %
                            (file_path, default_root))
                try:
                    resolved_resource_files[file_id] = get_path_from_url(
                        file_path, default_root)
                except RuntimeError as err:
                    logger.error(err)
                    raise RuntimeError(
                        f"Can't load weights for '{pretrained_model_name_or_path}'.\n"
                        f"Please make sure that '{pretrained_model_name_or_path}' is:\n"
                        "- a correct model-identifier of built-in pretrained models,\n"
                        "- or a correct model-identifier of community-contributed pretrained models,\n"
                        "- or the correct path to a directory containing relevant modeling files(model_weights and model_config).\n"
                    )

        # Prepare model initialization kwargs
        # Did we saved some inputs and kwargs to reload ?
        model_config_file = resolved_resource_files.pop(
            "model_config_file", None)
        if model_config_file is not None:
            with io.open(model_config_file, encoding="utf-8") as f:
                init_kwargs = json.load(f)
        else:
            init_kwargs = init_configuration
        # position args are stored in kwargs, maybe better not include
        init_args = init_kwargs.pop("init_args", ())
        # class name corresponds to this configuration
        init_class = init_kwargs.pop("init_class",
                                     cls.base_model_class.__name__)
        # Check if the loaded config matches the current model class's __init__
        # arguments. If not match, the loaded config is for the base model class.
        if init_class == cls.base_model_class.__name__:
            base_args = init_args
            base_kwargs = init_kwargs
            derived_args = ()
            derived_kwargs = {}
            base_arg_index = None
        else:  # extract config for base model
            derived_args = list(init_args)
            derived_kwargs = init_kwargs
            base_arg = None
            for i, arg in enumerate(init_args):
                if isinstance(arg, dict) and "init_class" in arg:
                    assert arg.pop(
                        "init_class") == cls.base_model_class.__name__, (
                            "pretrained base model should be {}").format(
                                cls.base_model_class.__name__)
                    base_arg_index = i
                    base_arg = arg
                    break
            for arg_name, arg in init_kwargs.items():
                if isinstance(arg, dict) and "init_class" in arg:
                    assert arg.pop(
                        "init_class") == cls.base_model_class.__name__, (
                            "pretrained base model should be {}").format(
                                cls.base_model_class.__name__)
                    base_arg_index = arg_name
                    base_arg = arg
                    break

            base_args = base_arg.pop("init_args", ())
            base_kwargs = base_arg
        if cls == cls.base_model_class:
            # Update with newly provided args and kwargs for base model
            base_args = base_args if not args else args
            base_kwargs.update(kwargs)
            model = cls(*base_args, **base_kwargs)
        else:
            # Update with newly provided args and kwargs for derived model
            base_parameters_dict = inspect.signature(
                cls.base_model_class.__init__).parameters
            for k, v in kwargs.items():
                if k in base_parameters_dict:
                    base_kwargs[k] = v
            base_model = cls.base_model_class(*base_args, **base_kwargs)
            if base_arg_index is not None:
                derived_args[base_arg_index] = base_model
            else:
                derived_args = (base_model, )  # assume at the first position
            derived_args = derived_args if not args else args
            derived_parameters_dict = inspect.signature(
                cls.__init__).parameters
            for k, v in kwargs.items():
                if k in derived_parameters_dict:
                    derived_kwargs[k] = v
            model = cls(*derived_args, **derived_kwargs)

        # Maybe need more ways to load resources.
        weight_path = resolved_resource_files["model_state"]
        assert weight_path.endswith(
            ".pdparams"), "suffix of weight must be .pdparams"

        state_dict = paddle.load(weight_path)

        # Make sure we are able to load base models as well as derived models
        # (with heads)
        start_prefix = ""
        model_to_load = model
        state_to_load = state_dict
        unexpected_keys = []
        missing_keys = []
        if not hasattr(model, cls.base_model_prefix) and any(
                s.startswith(cls.base_model_prefix)
                for s in state_dict.keys()):
            # base model
            state_to_load = {}
            start_prefix = cls.base_model_prefix + "."
            for k, v in state_dict.items():
                if k.startswith(cls.base_model_prefix):
                    state_to_load[k[len(start_prefix):]] = v
                else:
                    unexpected_keys.append(k)
        if hasattr(model, cls.base_model_prefix) and not any(
                s.startswith(cls.base_model_prefix)
                for s in state_dict.keys()):
            # derived model (base model with heads)
            model_to_load = getattr(model, cls.base_model_prefix)
            for k in model.state_dict().keys():
                if not k.startswith(cls.base_model_prefix):
                    missing_keys.append(k)
        if len(missing_keys) > 0:
            logger.info(
                "Weights of {} not initialized from pretrained model: {}".
                format(model.__class__.__name__, missing_keys))
        if len(unexpected_keys) > 0:
            logger.info(
                "Weights from pretrained model not used in {}: {}".format(
                    model.__class__.__name__, unexpected_keys))
        if paddle.in_dynamic_mode():
            model_to_load.set_state_dict(state_to_load)
            return model
        return model, state_to_load
Exemplo n.º 19
0
    def __call__(self, var, block=None):
        """Initialize the input tensor with dirac initializer.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The most critical OP(scatter) in this initializer, which contains 7~8 ops in total.
        """
        block = self._check_block(block)
        assert isinstance(var, framework.Parameter)
        assert isinstance(block, framework.Block)
        check_variable_and_dtype(var, "Out",
                                 ['float16', 'bfloat16', 'float32', 'float64'],
                                 'Dirac')

        assert len(var.shape) in [
            3, 4, 5
        ], "Only Tensor with 3/4/5 dimensions can be initialized by Dirac"
        assert (var.shape[0] % self._groups
                ) == 0, "Tensor 0-dimension must be divisible by groups"

        if var.dtype != VarDesc.VarType.FP32:
            out_var = block.create_var(name=unique_name.generate(".".join(
                ['dirac', var.name, 'tmp'])),
                                       shape=var.shape,
                                       dtype=VarDesc.VarType.FP32,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False)
        else:
            out_var = var
        op = None
        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu',
                                     False,
                                     'dtype', out_var.dtype, 'str_value',
                                     str(float(0)), 'shape', out_var.shape)
        else:
            block.append_op(type='fill_constant',
                            inputs={},
                            outputs={'Out': out_var},
                            attrs={
                                'value': float(0),
                                'dtype': out_var.dtype,
                                'shape': out_var.shape,
                            },
                            stop_gradient=True)

        origin_shape = var.shape
        num_per_group = origin_shape[0] // self._groups
        min_shape = min(num_per_group, origin_shape[1])

        idx_list = []
        value_list = []
        strides = []
        prod = 1
        for dim in reversed(origin_shape):
            strides.insert(0, prod)
            prod *= dim
        for i in range(self._groups):
            for j in range(min_shape):
                value_list.append(1.0)
                offset = 0
                for (k, stride) in enumerate(strides):
                    if (k == 0):
                        offset += (j + i * num_per_group) * stride
                    elif (k == 1):
                        offset += j * stride
                    else:
                        offset += origin_shape[k] // 2 * stride
                idx_list.append(offset)
        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_out, _ = _C_ops.reshape2(out_var, None, 'shape', [-1])
                tmp_out._share_underline_tensor_to(out_var)
        else:
            x_shape = block.create_var(name=unique_name.generate(".".join(
                [out_var.name, "XShape"])),
                                       dtype=out_var.dtype,
                                       shape=out_var.shape,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False,
                                       stop_gradient=True)
            block.append_op(type="reshape2",
                            inputs={"X": out_var},
                            attrs={'shape': [-1]},
                            outputs={
                                "Out": out_var,
                                "XShape": x_shape
                            },
                            stop_gradient=True)

        index_tensor = block.create_var(
            name=unique_name.generate('scatter_index'),
            persistable=False,
            stop_gradient=True)

        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_tensor = framework._varbase_creator()
                _C_ops.assign_value(tmp_tensor, 'shape', [len(idx_list)],
                                    'dtype', VarDesc.VarType.INT64,
                                    'int64_values', idx_list)
                tmp_tensor._share_underline_tensor_to(index_tensor)
        else:
            block.append_op(type='assign_value',
                            outputs={'Out': index_tensor},
                            attrs={
                                'dtype': VarDesc.VarType.INT64,
                                'shape': [len(idx_list)],
                                'int64_values': idx_list
                            },
                            stop_gradient=True)

        value_tensor = block.create_var(
            name=unique_name.generate('scatter_value'),
            persistable=False,
            stop_gradient=True)

        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_tensor = framework._varbase_creator()
                _C_ops.assign_value(tmp_tensor, 'shape', [len(value_list)],
                                    'dtype', VarDesc.VarType.FP32,
                                    'fp32_values', value_list)
                tmp_tensor._share_underline_tensor_to(value_tensor)
        else:
            block.append_op(type='assign_value',
                            outputs={'Out': value_tensor},
                            attrs={
                                'dtype': VarDesc.VarType.FP32,
                                'shape': [len(value_list)],
                                'fp32_values': value_list
                            },
                            stop_gradient=True)

        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
                tmp_out = _C_ops.final_state_scatter(out_var, index_tensor,
                                                     value_tensor, True)
                tmp_out._share_underline_tensor_to(out_var)
                tmp_reshape_out, _ = _C_ops.reshape2(out_var, None, 'shape',
                                                     origin_shape)
                tmp_reshape_out._share_underline_tensor_to(out_var)
                if var.dtype != VarDesc.VarType.FP32:
                    tmp_cast_out = _C_ops.cast(out_var, 'in_dtype',
                                               out_var.dtype, 'out_dtype',
                                               var.dtype)
                    tmp_cast_out._share_underline_tensor_to(var)

        else:
            op = block.append_op(type="scatter",
                                 inputs={
                                     "X": out_var,
                                     "Ids": index_tensor,
                                     "Updates": value_tensor
                                 },
                                 attrs={'overwrite': True},
                                 outputs={"Out": out_var},
                                 stop_gradient=True)
            x_shape = block.create_var(name=unique_name.generate(".".join(
                [out_var.name, "XShape"])),
                                       dtype=out_var.dtype,
                                       shape=out_var.shape,
                                       type=VarDesc.VarType.LOD_TENSOR,
                                       persistable=False,
                                       stop_gradient=True)
            block.append_op(type="reshape2",
                            inputs={"X": out_var},
                            attrs={'shape': origin_shape},
                            outputs={
                                "Out": out_var,
                                "XShape": x_shape
                            },
                            stop_gradient=True)
            if var.dtype != VarDesc.VarType.FP32:
                block.append_op(type="cast",
                                inputs={"X": out_var},
                                outputs={"Out": var},
                                attrs={
                                    "in_dtype": out_var.dtype,
                                    "out_dtype": var.dtype
                                },
                                stop_gradient=True)
        if not in_dynamic_mode():
            var.op = op
        return op
Exemplo n.º 20
0
    def save_quantized_model(self,
                             model,
                             path,
                             input_spec=None,
                             onnx_format=False,
                             **config):
        """
        Save the quantized model for the inference.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            onnx_format (bool, optional): Whether to export the quantized model 
                with format of ONNX. Default is False.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        paddle.jit.save(layer=model,
                        path=path,
                        input_spec=input_spec,
                        **config)

        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CPUPlace()
        scope = global_scope()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        self._gather_scales(infer_program, scope, fetch_targets)

        # Remove `moving_average_abs_max_scale` node in sub graphs.
        graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
        for sub_graph in graph.all_sub_graphs():
            for _op in sub_graph.all_op_nodes():
                if _op.name() == "moving_average_abs_max_scale":
                    sub_graph.safe_remove_nodes(_op)
            sub_graph.resolve_hazard()
        infer_program = graph.to_program()

        self._set_skip_quant_attr(infer_program)

        clip_extra = False
        if onnx_format:
            graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
            transform_pass = ReplaceFakeQuantDequantPass(scope, place)
            transform_pass.apply(graph)

            quant_weight_pass = QuantWeightPass(scope, place)
            quant_weight_pass.apply(graph)
            infer_program = graph.to_program()

            clip_extra = True

        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename,
                             clip_extra=clip_extra)

        if is_dynamic_mode:
            paddle.disable_static()
Exemplo n.º 21
0
def max_pool3d(x,
               kernel_size,
               stride=None,
               padding=0,
               ceil_mode=False,
               data_format="NDHWC",
               name=None):
    """
    Implements sparse max pooling 3d operation.
    See more details in :ref:`api_sparse_pooling_MaxPool3d` .

    Args:
        x (Tensor): The input SparseCooTensor of pooling operator, which is a 5-D tensor with
                          shape [N, D, H, W, C]. The format of input tensor `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively.
        kernel_size (int|list|tuple): The pool kernel size. If the kernel size
            is a tuple or list, it must contain three integers,
            (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
            Otherwise, the pool kernel size will be the cube of an int.
        stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
            it must contain three integers, [stride_Depth, stride_Height, stride_Width).
            Otherwise, the pool stride size will be a cube of an int.
        padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
            1. A string in ['valid', 'same'].
            2. An int, which means the feature map is zero padded by size of `padding` on every sides.
            3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
            4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
            5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
            The default value is 0.
        ceil_mode (bool): ${ceil_mode_comment}
        data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
                        The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
                        `[batch_size, input_channels, input_depth, input_height, input_width]`. Currently only support `"NDHWC"` .
        name(str, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
    
    Returns:
        Tensor: The output tensor of pooling result. The data type is same as input tensor.
    
    Examples:
        .. code-block:: python

            import paddle
            from paddle.fluid.framework import _test_eager_guard

            with _test_eager_guard():
                dense_x = paddle.randn((1, 4, 4, 4, 3))
                sparse_x = dense_x.to_sparse_coo(4)
                kernel_sizes = [3, 3, 3]
                paddings = [0, 0, 0]
                strides = [1, 1, 1]
                out = paddle.incubate.sparse.nn.functional.max_pool3d(sparse_x, kernel_sizes, stride=strides, padding=paddings)
                #[1, 2, 2, 2, 3]
    """

    assert in_dynamic_mode(), "Currently, Sparse API only support dynamic mode"
    assert x.is_sparse_coo(
    ), "Currently, sparse.relu only support the input of SparseCooTensor"
    assert data_format == 'NDHWC', "Currently, sparse.max_pool3d only support data format of 'NDHWC'"

    kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
    if stride is None:
        stride = kernel_size
    else:
        stride = utils.convert_to_list(stride, 3, 'pool_stride')

    channel_last = True

    padding, padding_algorithm = _update_padding_nd(padding,
                                                    3,
                                                    channel_last=channel_last,
                                                    ceil_mode=ceil_mode)

    #TODO(zkh2016): remove the dependency on dilation from the backend
    dilation = [1, 1, 1]

    return _C_ops.final_state_sparse_maxpool(x, kernel_size, padding, dilation,
                                             stride)
Exemplo n.º 22
0
    def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
        """
        Creates an instance of `PretrainedModel`. Model weights are loaded
        by specifying name of a built-in pretrained model, or a community contributed model,
        or a local file directory path.

        Args:
            pretrained_model_name_or_path (str): Name of pretrained model or dir path
                to load from. The string can be:

                - Name of a built-in pretrained model
                - Name of a community-contributed pretrained model.
                - Local directory path which contains model weights file("model_state.pdparams")
                  and model config file ("model_config.json").
            *args (tuple): Position arguments for model `__init__`. If provided,
                use these as position argument values for model initialization.
            **kwargs (dict): Keyword arguments for model `__init__`. If provided,
                use these to update pre-defined keyword argument values for model
                initialization. If the keyword is in `__init__` argument names of
                base model, update argument values of the base model; else update
                argument values of derived model.
            load_state_as_np (bool, optional): The weights read in can be choosed
                to place on CPU or GPU though the model is on the default device.
                If `True`, load the model weights as `numpy.ndarray` on CPU.
                Otherwise, weights would be loaded as tensors on the default
                device. Note that if on GPU, the latter would creates extra
                temporary tensors in addition to the model weights, which
                doubles the memory usage . Thus it is suggested to use `True`
                for big models on GPU. Default to `False`.

        Returns:
            PretrainedModel: An instance of `PretrainedModel`.

        Example:
            .. code-block::

                from paddlenlp.transformers import BertForSequenceClassification

                # Name of built-in pretrained model
                model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

                # Name of community-contributed pretrained model
                model = BertForSequenceClassification.from_pretrained('yingyibiao/bert-base-uncased-sst-2-finetuned')

                # Load from local directory path
                model = BertForSequenceClassification.from_pretrained('./my_bert/')
        """
        pretrained_models = list(cls.pretrained_init_configuration.keys())
        resource_files = {}
        init_configuration = {}
        load_state_as_np = kwargs.pop("load_state_as_np", False)

        # From built-in pretrained models
        if pretrained_model_name_or_path in pretrained_models:
            for file_id, map_list in cls.pretrained_resource_files_map.items():
                resource_files[file_id] = map_list[
                    pretrained_model_name_or_path]
            init_configuration = copy.deepcopy(
                cls.
                pretrained_init_configuration[pretrained_model_name_or_path])
        # From local dir path
        elif os.path.isdir(pretrained_model_name_or_path):
            for file_id, file_name in cls.resource_files_names.items():
                full_file_name = os.path.join(pretrained_model_name_or_path,
                                              file_name)
                resource_files[file_id] = full_file_name
            resource_files["model_config_file"] = os.path.join(
                pretrained_model_name_or_path, cls.model_config_file)
        else:
            # Assuming from community-contributed pretrained models
            for file_id, file_name in cls.resource_files_names.items():
                full_file_name = os.path.join(COMMUNITY_MODEL_PREFIX,
                                              pretrained_model_name_or_path,
                                              file_name)
                resource_files[file_id] = full_file_name
            resource_files["model_config_file"] = os.path.join(
                COMMUNITY_MODEL_PREFIX, pretrained_model_name_or_path,
                cls.model_config_file)

        default_root = os.path.join(MODEL_HOME, pretrained_model_name_or_path)
        resolved_resource_files = {}
        for file_id, file_path in resource_files.items():
            if file_path is None or os.path.isfile(file_path):
                resolved_resource_files[file_id] = file_path
                continue
            path = os.path.join(default_root, file_path.split('/')[-1])
            if os.path.exists(path):
                logger.info("Already cached %s" % path)
                resolved_resource_files[file_id] = path
            else:
                logger.info("Downloading %s and saved to %s" %
                            (file_path, default_root))
                try:
                    resolved_resource_files[file_id] = get_path_from_url(
                        file_path, default_root)
                except RuntimeError as err:
                    logger.error(err)
                    raise RuntimeError(
                        f"Can't load weights for '{pretrained_model_name_or_path}'.\n"
                        f"Please make sure that '{pretrained_model_name_or_path}' is:\n"
                        "- a correct model-identifier of built-in pretrained models,\n"
                        "- or a correct model-identifier of community-contributed pretrained models,\n"
                        "- or the correct path to a directory containing relevant modeling files(model_weights and model_config).\n"
                    )

        # Prepare model initialization kwargs
        # Did we saved some inputs and kwargs to reload ?
        model_config_file = resolved_resource_files.pop(
            "model_config_file", None)
        if model_config_file is not None:
            with io.open(model_config_file, encoding="utf-8") as f:
                init_kwargs = json.load(f)
        else:
            init_kwargs = init_configuration
        # position args are stored in kwargs, maybe better not include
        init_args = init_kwargs.pop("init_args", ())
        # class name corresponds to this configuration
        init_class = init_kwargs.pop("init_class",
                                     cls.base_model_class.__name__)
        # Check if the loaded config matches the current model class's __init__
        # arguments. If not match, the loaded config is for the base model class.
        if init_class == cls.base_model_class.__name__:
            base_args = init_args
            base_kwargs = init_kwargs
            derived_args = ()
            derived_kwargs = {}
            base_arg_index = None
        else:  # extract config for base model
            derived_args = list(init_args)
            derived_kwargs = init_kwargs
            base_arg = None
            for i, arg in enumerate(init_args):
                if isinstance(arg, dict) and "init_class" in arg:
                    assert arg.pop(
                        "init_class") == cls.base_model_class.__name__, (
                            "pretrained base model should be {}").format(
                                cls.base_model_class.__name__)
                    base_arg_index = i
                    base_arg = arg
                    break
            for arg_name, arg in init_kwargs.items():
                if isinstance(arg, dict) and "init_class" in arg:
                    assert arg.pop(
                        "init_class") == cls.base_model_class.__name__, (
                            "pretrained base model should be {}").format(
                                cls.base_model_class.__name__)
                    base_arg_index = arg_name
                    base_arg = arg
                    break

            base_args = base_arg.pop("init_args", ())
            base_kwargs = base_arg

        if cls == cls.base_model_class:
            # Update with newly provided args and kwargs for base model
            base_args = base_args if not args else args
            base_kwargs.update(kwargs)
            model = cls(*base_args, **base_kwargs)
        else:
            # Update with newly provided args and kwargs for derived model
            base_parameters_dict = inspect.signature(
                cls.base_model_class.__init__).parameters
            for k, v in kwargs.items():
                if k in base_parameters_dict:
                    base_kwargs[k] = v
            base_model = cls.base_model_class(*base_args, **base_kwargs)
            if base_arg_index is not None:
                derived_args[base_arg_index] = base_model
            else:
                derived_args = (base_model, )  # assume at the first position
            derived_args = derived_args if not args else args
            derived_parameters_dict = inspect.signature(
                cls.__init__).parameters
            for k, v in kwargs.items():
                if k in derived_parameters_dict:
                    derived_kwargs[k] = v
            model = cls(*derived_args, **derived_kwargs)

        # Maybe need more ways to load resources.
        weight_path = resolved_resource_files["model_state"]
        assert weight_path.endswith(
            ".pdparams"), "suffix of weight must be .pdparams"

        # NOTE: Allow to load partial model for model parallel.
        # TODO(guosheng): To make model loading for the model parallel automatic,
        # maybe we should make rank 0 worker load weights of the full model on
        # CPU, then split weights into multiple parts and pickle separately.
        # The other workers wait util pickle finish and then load the corresponding
        # partial weights. Also we can directly use separate weight files for
        # simplicity.
        state_dict = paddle.load(weight_path, return_numpy=load_state_as_np)

        # Make sure we are able to load base models as well as derived models
        # (with heads)
        start_prefix = ""
        model_to_load = model
        state_to_load = state_dict
        unexpected_keys = []
        missing_keys = []
        if not hasattr(model, cls.base_model_prefix) and any(
                s.startswith(cls.base_model_prefix)
                for s in state_dict.keys()):
            # base model
            state_to_load = {}
            start_prefix = cls.base_model_prefix + "."
            for k, v in state_dict.items():
                if k.startswith(cls.base_model_prefix):
                    state_to_load[k[len(start_prefix):]] = v
                else:
                    unexpected_keys.append(k)
        if hasattr(model, cls.base_model_prefix) and not any(
                s.startswith(cls.base_model_prefix)
                for s in state_dict.keys()):
            # derived model (base model with heads)
            model_to_load = getattr(model, cls.base_model_prefix)
            for k in model.state_dict().keys():
                if not k.startswith(cls.base_model_prefix):
                    missing_keys.append(k)
        if len(missing_keys) > 0:
            logger.info(
                "Weights of {} not initialized from pretrained model: {}".
                format(model.__class__.__name__, missing_keys))
        if len(unexpected_keys) > 0:
            logger.info(
                "Weights from pretrained model not used in {}: {}".format(
                    model.__class__.__name__, unexpected_keys))
        # Allow the float16 model to load float32 weights, which decreases memory
        # usage in model loading stage and is useful to big models.
        dtype_prefix_len = len("paddle.")  # paddle.float16
        for k, v in model_to_load.state_dict().items():
            if not isinstance(v, np.ndarray):
                dtype = str(v.dtype)[dtype_prefix_len:]
            # TODO(guosheng): add warnings for unmatched dtypes
            if k in state_to_load:
                state_to_load[k] = state_to_load[k].astype(dtype)
        # Logging model download statistics
        download_check(pretrained_model_name_or_path, "from_pretrained")
        # For model parallel if FasterGeneration
        # To avoid recursive import temporarily.
        import paddlenlp.ops.faster_transformer.transformer.decoding as ft_decoding
        state_to_load = ft_decoding.get_ft_para_conf().fit_partial_model(
            model_to_load, state_to_load)
        if paddle.in_dynamic_mode():
            model_to_load.set_state_dict(state_to_load)
            return model
        return model, state_to_load
Exemplo n.º 23
0
def summary(net, input_size, dtypes=None):
    """Prints a string summary of the network.

    Args:
        net (Layer): the network which must be a subinstance of Layer.
        input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only 
                    have one input, input_size can be tuple or InputSpec. if model
                    have multiple input, input_size must be a list which contain 
                    every input's shape. Note that input_size only dim of
                    batch_size can be None or -1.
        dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None.

    Returns:
        Dict: a summary of the network including total params and total trainable params.

    Examples:
        .. code-block:: python

            import paddle
            import paddle.nn as nn

            class LeNet(nn.Layer):
                def __init__(self, num_classes=10):
                    super(LeNet, self).__init__()
                    self.num_classes = num_classes
                    self.features = nn.Sequential(
                        nn.Conv2D(
                            1, 6, 3, stride=1, padding=1),
                        nn.ReLU(),
                        nn.MaxPool2D(2, 2),
                        nn.Conv2D(
                            6, 16, 5, stride=1, padding=0),
                        nn.ReLU(),
                        nn.MaxPool2D(2, 2))

                    if num_classes > 0:
                        self.fc = nn.Sequential(
                            nn.Linear(400, 120),
                            nn.Linear(120, 84),
                            nn.Linear(
                                84, 10))

                def forward(self, inputs):
                    x = self.features(inputs)

                    if self.num_classes > 0:
                        x = paddle.flatten(x, 1)
                        x = self.fc(x)
                    return x

            lenet = LeNet()

            params_info = paddle.summary(lenet, (1, 1, 28, 28))
            print(params_info)

    """
    if isinstance(input_size, InputSpec):
        _input_size = tuple(input_size.shape)
    elif isinstance(input_size, list):
        _input_size = []
        for item in input_size:
            if isinstance(item, int):
                item = (item, )
            assert isinstance(item,
                              (tuple, InputSpec)), 'When input_size is list, \
            expect item in input_size is a tuple or InputSpec, but got {}'.format(
                                  type(item))

            if isinstance(item, InputSpec):
                _input_size.append(tuple(item.shape))
            else:
                _input_size.append(item)
    elif isinstance(input_size, int):
        _input_size = (input_size, )
    else:
        _input_size = input_size

    if not paddle.in_dynamic_mode():
        warnings.warn(
            "Your model was created in static mode, this may not get correct summary information!"
        )
        in_train_mode = False
    else:
        in_train_mode = net.training

    if in_train_mode:
        net.eval()

    def _is_shape(shape):
        for item in shape:
            if isinstance(item, (list, tuple)):
                return False
        return True

    def _check_shape(shape):
        num_unknown = 0
        new_shape = []
        for i in range(len(shape)):
            item = shape[i]
            if item is None or item == -1:
                num_unknown += 1
                if num_unknown > 1:
                    raise ValueError(
                        'Option input_size only the dim of batch_size can be None or -1.'
                    )
                item = 1
            elif isinstance(item, numbers.Number):
                if item <= 0:
                    raise ValueError(
                        "Expected element in input size greater than zero, but got {}".
                        format(item))
            new_shape.append(item)
        return tuple(new_shape)

    def _check_input(input_size):
        if isinstance(input_size, (list, tuple)) and _is_shape(input_size):
            return _check_shape(input_size)
        else:
            return [_check_input(i) for i in input_size]

    _input_size = _check_input(_input_size)
    result, params_info = summary_string(net, _input_size, dtypes)
    print(result)

    if in_train_mode:
        net.train()

    return params_info
Exemplo n.º 24
0
    def __init__(
        self,
        img_size=224,
        patch_size=16,
        in_chans=3,
        embed_dim=768,
        depth=12,
        num_heads=12,
        mlp_ratio=4,
        qkv_bias=False,
        qk_scale=None,
        drop_rate=0.0,
        attn_drop_rate=0.0,
        drop_path_rate=0.0,
        norm_layer=nn.LayerNorm,
        epsilon=1e-5,
        class_dim=1000,
    ):
        super().__init__()
        self.class_dim = class_dim
        self.num_features = self.embed_dim = embed_dim

        self.patch_embed = PatchEmbed(
            img_size=img_size,
            patch_size=patch_size,
            in_chans=in_chans,
            embed_dim=embed_dim,
        )
        num_patches = self.patch_embed.num_patches

        self.pos_embed = add_parameter(
            self, paddle.zeros((1, num_patches + 1, embed_dim))
        )
        self.cls_token = add_parameter(self, paddle.zeros((1, 1, embed_dim)))

        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = np.linspace(0, drop_path_rate, depth)

        self.blocks = nn.LayerList(
            [
                Block(
                    dim=embed_dim,
                    num_heads=num_heads,
                    mlp_ratio=mlp_ratio,
                    qkv_bias=qkv_bias,
                    qk_scale=qk_scale,
                    drop=drop_rate,
                    attn_drop=attn_drop_rate,
                    drop_path=dpr[i],
                    norm_layer=norm_layer,
                    epsilon=epsilon,
                )
                for i in range(depth)
            ]
        )

        self.norm = norm_layer(embed_dim, epsilon=epsilon)

        # Classifier head
        if class_dim > 0:
            self.head = nn.Linear(embed_dim, class_dim)

        if paddle.in_dynamic_mode():
            trunc_normal_(self.pos_embed)
            trunc_normal_(self.cls_token)
            self.apply(self._init_weights)
Exemplo n.º 25
0
def nonzero(x, as_tuple=False):
    """
    Return a tensor containing the indices of all non-zero elements of the `input` 
    tensor. If as_tuple is True, return a tuple of 1-D tensors, one for each dimension 
    in `input`, each containing the indices (in that dimension) of all non-zero elements 
    of `input`. Given a n-Dimensional `input` tensor with shape [x_1, x_2, ..., x_n], If 
    as_tuple is False, we can get a output tensor with shape [z, n], where `z` is the 
    number of all non-zero elements in the `input` tensor. If as_tuple is True, we can get 
    a 1-D tensor tuple of length `n`, and the shape of each 1-D tensor is [z, 1].

    Args:
        x (Tensor): The input tensor variable.
        as_tuple (bool): Return type, Tensor or tuple of Tensor.

    Returns:
        Tensor. The data type is int64.

    Examples:

        .. code-block:: python

            import paddle

            x1 = paddle.to_tensor([[1.0, 0.0, 0.0],
                                   [0.0, 2.0, 0.0],
                                   [0.0, 0.0, 3.0]])
            x2 = paddle.to_tensor([0.0, 1.0, 0.0, 3.0])
            out_z1 = paddle.nonzero(x1)
            print(out_z1)
            #[[0 0]
            # [1 1]
            # [2 2]]
            out_z1_tuple = paddle.nonzero(x1, as_tuple=True)
            for out in out_z1_tuple:
                print(out)
            #[[0]
            # [1]
            # [2]]
            #[[0]
            # [1]
            # [2]]
            out_z2 = paddle.nonzero(x2)
            print(out_z2)
            #[[1]
            # [3]]
            out_z2_tuple = paddle.nonzero(x2, as_tuple=True)
            for out in out_z2_tuple:
                print(out)
            #[[1]
            # [3]]

    """
    list_out = []
    shape = x.shape
    rank = len(shape)

    if in_dygraph_mode():
        outs = _C_ops.final_state_where_index(x)
    elif paddle.in_dynamic_mode():
        outs = _C_ops.where_index(x)
    else:
        helper = LayerHelper("where_index", **locals())

        outs = helper.create_variable_for_type_inference(
            dtype=core.VarDesc.VarType.INT64)

        helper.append_op(type='where_index',
                         inputs={'Condition': x},
                         outputs={'Out': [outs]})

    if not as_tuple:
        return outs
    elif rank == 1:
        return tuple([outs])
    else:
        for i in range(rank):
            list_out.append(
                paddle.slice(outs, axes=[1], starts=[i], ends=[i + 1]))
        return tuple(list_out)
Exemplo n.º 26
0
def sparse_attention(query,
                     key,
                     value,
                     sparse_csr_offset,
                     sparse_csr_columns,
                     key_padding_mask=None,
                     attn_mask=None,
                     name=None):
    r"""
    This operator sparsify the Attention matrix in Transformer module
    to achieve the effect of reducing memory consumption and computation. 
    The sparse layout is expressed in CSR format and contains two parameters, 
    ``offset`` and ``columns``. The equation is: 

    .. math::

        result=softmax(\frac{ Q * K^T }{\sqrt{d}}) * V

    where : ``Q``, ``K``, and ``V`` represent the three input parameters of the attention module. 
    The dimensions of the three parameters are the same. 
    ``d`` represents the size of the last dimension of the three parameters.

    Warning:    
        This API is only used in ``CUDA 11.3`` and above versions.

    Args:
        query(Tensor): The query tensor in the Attention module. 
                        4-D tensor with shape: 
                        [batch_size, num_heads, seq_len, head_dim]. 
                        The dtype can be float32 and float64.
        key(Tensor): The key tensor in the Attention module. 
                        4-D tensor with shape: 
                        [batch_size, num_heads, seq_len, head_dim]. 
                        The dtype can be float32 and float64.
        value(Tensor): The value tensor in the Attention module. 
                        4-D tensor with shape:  
                        [batch_size, num_heads, seq_len, head_dim]. 
                        The dtype can be float32 and float64.
        sparse_csr_offset(Tensor): The sparsity feature in the Attention module 
                        is expressed in the CSR format, and the offset represents 
                        the number of non-zero elements in each row of the matrix.
                        3-D tensor with shape:   
                        [batch_size, num_heads, seq_len + 1]. 
                        The dtype should be int32.
        sparse_csr_columns(Tensor): The sparsity feature in the Attention module 
                        is expressed in the CSR format, and the columns represent 
                        the column index values of non-zero elements in the matrix.
                        3-D tensor with shape:  
                        [batch_size, num_heads, sparse_nnz]. 
                        The dtype should be int32.
        key_padding_mask(Tensor, optional):The key padding mask tensor in the Attention module. 
                        2-D tensor with shape: [batch_size, seq_len]. 
                        The dtype can be float32 and float64.
                        A value of 0 means that the position is masked.
        attn_mask(Tensor, optional):The attention mask tensor in the Attention module. 
                        2-D tensor with shape: [seq_len, seq_len]. 
                        The dtype can be float32 and float64.
                        A value of 0 means that the position is masked.
        name(str, optional): The default value is None. Normally there is no need for user
                        to set this property. For more information, please refer to
                        :ref:`api_guide_Name`.

    Returns:
        4-D tensor with shape:
        [batch_size, num_heads, seq_len, head_dim]. 
        The dtype can be float32 or float64.

    Examples:
        .. code-block:: python

            # required: skiptest
            import paddle
            import numpy as np

            query_data = np.array([[[[0, 1,], [2, 3],
                    [ 0, 1], [2, 3]]]]).astype("float32")
            key_data = np.array([[[[0, 1,], [2, 3],
                            [ 0, 1], [2, 3]]]]).astype("float32")
            value_data = np.array([[[[0, 1,], [2, 3],
                            [ 0, 1], [2, 3]]]]).astype("float32")
            sparse_csr_offset_data = np.array([[[0, 2,
                            4, 6, 8]]]).astype("int32")
            sparse_csr_columns_data = np.array([[[0, 1,
                            0, 1, 2, 3, 2, 3]]]).astype("int32")
            key_padding_mask_data = np.array([[1,1,1,0]]).astype("float32")
            attention_mask_data = np.array([[1,0,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]]).astype("float32")
            print(query_data.shape)
            # (1, 1, 4, 2)
            print(sparse_csr_offset_data.shape)
            # (1, 1, 5)
            print(sparse_csr_columns_data.shape)
            # (1, 1, 8)
            paddle.disable_static()
            query = paddle.to_tensor(query_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            key = paddle.to_tensor(key_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            value = paddle.to_tensor(value_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            offset = paddle.to_tensor(sparse_csr_offset_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            columns = paddle.to_tensor(sparse_csr_columns_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            key_padding_mask = paddle.to_tensor(key_padding_mask_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            attention_mask = paddle.to_tensor(attention_mask_data, stop_gradient=False, 
                            place=paddle.CUDAPlace(0))
            output_mask = paddle.nn.functional.sparse_attention(query, key, 
                            value, offset, columns, 
                            key_padding_mask=key_padding_mask, attn_mask=attention_mask)
            print(output_mask)
            # [[[[0.        , 1.        ],
            #    [1.99830270, 2.99830270],
            #    [0.        , 1.        ],
            #    [0.        , 1.        ]]]]
            output = paddle.nn.functional.sparse_attention(query, key, 
                            value, offset, columns)
            print(output) 
            # [[[[1.60885942, 2.60885954],
            #       [1.99830270, 2.99830270],
            #       [1.60885942, 2.60885954],
            #       [1.99830270, 2.99830270]]]]
    """
    if in_dynamic_mode():
        result_attention, result_sdd, result_softmax = _C_ops.sparse_attention(
            query, key, value, sparse_csr_offset, sparse_csr_columns,
            key_padding_mask, attn_mask)
        return result_attention

    helper = LayerHelper('sparse_attention', **locals())
    dtype = helper.input_dtype(input_param_name='Q')
    out = helper.create_variable_for_type_inference(dtype)
    result_sdd = helper.create_variable_for_type_inference(dtype)
    result_softmax = helper.create_variable_for_type_inference(dtype)
    inputs = {
        'Q': query,
        'K': key,
        'V': value,
        'Offset': sparse_csr_offset,
        'Columns': sparse_csr_columns,
        'KeyPaddingMask': key_padding_mask,
        'AttnMask': attn_mask,
    }
    outputs = {
        'Out': out,
        'SparseDotSdd': result_sdd,
        'Softmax': result_softmax
    }
    helper.append_op(type='sparse_attention', inputs=inputs, outputs=outputs)
    return out
 def __impl__(*args, **kwargs):
     if paddle.in_dynamic_mode():
         return func(*args, **kwargs)
     else:
         with fluid.dygraph.guard():
             return func(*args, **kwargs)
Exemplo n.º 28
0
def kl_div(input, label, reduction='mean', name=None):
    """
    This operator calculates the Kullback-Leibler divergence loss
    between Input(X) and Input(Target). Notes that Input(X) is the
    log-probability and Input(Target) is the probability.

    KL divergence loss is calculated as follows:

    $$l(x, y) = y * (\log(y) - x)$$

    While :math:`x` is input and :math:`y` is label.

    While :attr:`reduction` is :attr:`none`, output loss is in
    the same shape as input, loss in each point is calculated
    seperately and no reduction is applied.

    While :attr:`reduction` is :attr:`mean`, output loss is in
    shape of [1] and loss value is the mean value of all losses.

    While :attr:`reduction` is :attr:`sum`, output loss is in
    shape of [1] and loss value is the sum value of all losses.

    While :attr:`reduction` is :attr:`batchmean`, output loss is
    in shape of [1] and loss value is the sum value of all losses
    divided by batch size.

    Args:
        input (Tensor): The input tensor. The shapes is [N, *], where N is batch size and `*` means
             any number of additional dimensions. It's data type should be float32, float64.
        label (Tensor): label. The shapes is [N, *], same shape as ``input`` . It's data type should be float32, float64.
        reduction (Tensor): Indicate how to average the loss,
             the candicates are ``'none'`` | ``'batchmean'`` | ``'mean'`` | ``'sum'``.
             If `reduction` is ``'mean'``, the reduced mean loss is returned;
             If `reduction` is ``'batchmean'``, the sum loss divided by batch size is returned;
             if `reduction` is ``'sum'``, the reduced sum loss is returned;
             if `reduction` is ``'none'``, no reduction will be apllied.
             Default is ``'mean'``.
        name(str, optional): Name for the operation (optional, default is None). For more information,
            please refer to :ref:`api_guide_Name`.

    Returns:
        Tensor: The KL divergence loss. The data type is same as input tensor

    Examples:
        .. code-block:: python

            import paddle
            import numpy as np
            import paddle.nn.functional as F

            paddle.disable_static()

            shape = (5, 20)
            input = np.random.uniform(-10, 10, shape).astype('float32')
            target = np.random.uniform(-10, 10, shape).astype('float32')

            # 'batchmean' reduction, loss shape will be [1]
            pred_loss = F.kl_div(paddle.to_tensor(input),
                                 paddle.to_tensor(target), reduction='batchmean')
            # shape=[1]

            # 'mean' reduction, loss shape will be [1]
            pred_loss = F.kl_div(paddle.to_tensor(input),
                                 paddle.to_tensor(target), reduction='mean')
            # shape=[1]

            # 'sum' reduction, loss shape will be [1]
            pred_loss = F.kl_div(paddle.to_tensor(input),
                                 paddle.to_tensor(target), reduction='sum')
            # shape=[1]

            # 'none' reduction, loss shape is same with input shape
            pred_loss = F.kl_div(paddle.to_tensor(input),
                                 paddle.to_tensor(target), reduction='none')
            # shape=[5, 20]

    """
    if paddle.in_dynamic_mode():
        out = core.ops.kldiv_loss(input, label, 'reduction', reduction)
        return out

    helper = LayerHelper('kl_div', **locals())

    fluid.data_feeder.check_variable_and_dtype(input, 'input',
                                               ['float32', 'float64'],
                                               'kl_div')
    fluid.data_feeder.check_variable_and_dtype(label, 'label',
                                               ['float32', 'float64'],
                                               'kl_div')
    fluid.data_feeder.check_type(reduction, 'reduction', str, 'kl_div')

    loss = helper.create_variable_for_type_inference(dtype=input.dtype)
    helper.append_op(type='kldiv_loss',
                     inputs={
                         'X': input,
                         'Target': label
                     },
                     outputs={'Loss': loss},
                     attrs={'reduction': reduction})
    return loss
Exemplo n.º 29
0
    def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
        """
        Instantiate an instance of `PretrainedModel` from a predefined
        model specified by name or path.
        Args:
            pretrained_model_name_or_path (str): A name of or a file path to a
                pretrained model.
            *args (tuple): position arguments for `__init__`. If provide, use
                this as position argument values for model initialization.
            **kwargs (dict): keyword arguments for `__init__`. If provide, use
                this to update pre-defined keyword argument values for model
                initialization.
        Returns:
            PretrainedModel: An instance of PretrainedModel.
        """
        pretrained_models = list(cls.pretrained_init_configuration.keys())
        resource_files = {}
        init_configuration = {}
        if pretrained_model_name_or_path in pretrained_models:
            for file_id, map_list in cls.pretrained_resource_files_map.items():
                resource_files[file_id] = map_list[
                    pretrained_model_name_or_path]
            init_configuration = copy.deepcopy(
                cls.
                pretrained_init_configuration[pretrained_model_name_or_path])
        else:
            if os.path.isdir(pretrained_model_name_or_path):
                for file_id, file_name in cls.resource_files_names.items():
                    full_file_name = os.path.join(
                        pretrained_model_name_or_path, file_name)
                    resource_files[file_id] = full_file_name
                resource_files["model_config_file"] = os.path.join(
                    pretrained_model_name_or_path, cls.model_config_file)
            else:
                raise ValueError(
                    "Calling {}.from_pretrained() with a model identifier or the "
                    "path to a directory instead. The supported model "
                    "identifiers are as follows: {}".format(
                        cls.__name__,
                        cls.pretrained_init_configuration.keys()))
        # FIXME(chenzeyu01): We should use another data path for storing model
        default_root = os.path.join(DATA_HOME, pretrained_model_name_or_path)
        resolved_resource_files = {}
        for file_id, file_path in resource_files.items():
            path = os.path.join(default_root, file_path.split('/')[-1])
            if file_path is None or os.path.isfile(file_path):
                resolved_resource_files[file_id] = file_path
            elif os.path.exists(path):
                logger.info("Already cached %s" % path)
                resolved_resource_files[file_id] = path
            else:
                logger.info("Downloading %s and saved to %s" %
                            (file_path, default_root))
                resolved_resource_files[file_id] = get_path_from_url(
                    file_path, default_root)

        # Prepare model initialization kwargs
        # Did we saved some inputs and kwargs to reload ?
        model_config_file = resolved_resource_files.pop(
            "model_config_file", None)
        if model_config_file is not None:
            with io.open(model_config_file, encoding="utf-8") as f:
                init_kwargs = json.load(f)
        else:
            init_kwargs = init_configuration
        # position args are stored in kwargs, maybe better not include
        init_args = init_kwargs.pop("init_args", ())
        # class name corresponds to this configuration
        init_class = init_kwargs.pop("init_class",
                                     cls.base_model_class.__name__)

        # Check if the loaded config matches the current model class's __init__
        # arguments. If not match, the loaded config is for the base model class.
        if init_class == cls.base_model_class.__name__:
            base_args = init_args
            base_kwargs = init_kwargs
            derived_args = ()
            derived_kwargs = {}
            base_arg_index = None
        else:  # extract config for base model
            derived_args = list(init_args)
            derived_kwargs = init_kwargs
            for i, arg in enumerate(init_args):
                if isinstance(arg, dict) and "init_class" in arg:
                    assert arg.pop(
                        "init_class") == cls.base_model_class.__name__, (
                            "pretrained base model should be {}").format(
                                cls.base_model_class.__name__)
                    base_arg_index = i
                    break
            for arg_name, arg in init_kwargs.items():
                if isinstance(arg, dict) and "init_class" in arg:
                    assert arg.pop(
                        "init_class") == cls.base_model_class.__name__, (
                            "pretrained base model should be {}").format(
                                cls.base_model_class.__name__)
                    base_arg_index = arg_name
                    break
            base_args = arg.pop("init_args", ())
            base_kwargs = arg
        if cls == cls.base_model_class:
            # Update with newly provided args and kwargs for base model
            base_args = base_args if not args else args
            base_kwargs.update(kwargs)
            model = cls(*base_args, **base_kwargs)
        else:
            # Update with newly provided args and kwargs for derived model
            base_model = cls.base_model_class(*base_args, **base_kwargs)
            if base_arg_index is not None:
                derived_args[base_arg_index] = base_model
            else:
                derived_args = (base_model, )  # assume at the first position
            derived_args = derived_args if not args else args
            derived_kwargs.update(kwargs)
            model = cls(*derived_args, **derived_kwargs)

        # Maybe need more ways to load resources.
        weight_path = list(resolved_resource_files.values())[0]
        assert weight_path.endswith(
            ".pdparams"), "suffix of weight must be .pdparams"
        state_dict = paddle.load(weight_path)

        # Make sure we are able to load base models as well as derived models
        # (with heads)
        start_prefix = ""
        model_to_load = model
        state_to_load = state_dict
        unexpected_keys = []
        missing_keys = []
        if not hasattr(model, cls.base_model_prefix) and any(
                s.startswith(cls.base_model_prefix)
                for s in state_dict.keys()):
            # base model
            state_to_load = {}
            start_prefix = cls.base_model_prefix + "."
            for k, v in state_dict.items():
                if k.startswith(cls.base_model_prefix):
                    state_to_load[k[len(start_prefix):]] = v
                else:
                    unexpected_keys.append(k)
        if hasattr(model, cls.base_model_prefix) and not any(
                s.startswith(cls.base_model_prefix)
                for s in state_dict.keys()):
            # derived model (base model with heads)
            model_to_load = getattr(model, cls.base_model_prefix)
            for k in model.state_dict().keys():
                if not k.startswith(cls.base_model_prefix):
                    missing_keys.append(k)
        if len(missing_keys) > 0:
            logger.info(
                "Weights of {} not initialized from pretrained model: {}".
                format(model.__class__.__name__, missing_keys))
        if len(unexpected_keys) > 0:
            logger.info(
                "Weights from pretrained model not used in {}: {}".format(
                    model.__class__.__name__, unexpected_keys))
        model_to_load.set_state_dict(state_to_load)
        if paddle.in_dynamic_mode():
            return model
        return model, state_to_load
Exemplo n.º 30
0
def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
    """
    Compute the quantile of the input along the specified axis.

    Args:
    Args:
        x (Tensor): The input Tensor, it's data type can be float32, float64.
        q (int|float|list): The q for calculate quantile, which should be in range [0, 1]. If q is a list,
            each q will be calculated and the first dimension of output is same to the number of ``q`` .
        axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
            ``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
            If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
            If ``axis`` is a list, quantile is calculated over all elements of given axises.
            If ``axis`` is None, quantile is calculated over all elements of ``x``. Default is None.
        keepdim (bool, optional): Whether to reserve the reduced dimension(s)
            in the output Tensor. If ``keepdim`` is True, the dimensions of
            the output Tensor is the same as ``x`` except in the reduced
            dimensions(it is of size 1 in this case). Otherwise, the shape of
            the output Tensor is squeezed in ``axis`` . Default is False.
        ignore_nan: (bool, optional): Whether to ignore NaN of input Tensor.
            If ``ignore_nan`` is True, it will calculate nanquantile.
            Otherwise it will calculate quantile. Default is False.

    Returns:
        Tensor, results of quantile along ``axis`` of ``x``.
        In order to obtain higher precision, data type of results will be float64.
    """
    # Validate x
    if not isinstance(x, Variable):
        raise TypeError("input x should be a Tensor.")

    # Validate q
    if isinstance(q, (int, float)):
        q = [q]
    elif isinstance(q, (list, tuple)):
        if len(q) <= 0:
            raise ValueError("q should not be empty")
    else:
        raise TypeError("Type of q should be int, float, list or tuple.")

    # Validate axis
    dims = len(x.shape)
    out_shape = list(x.shape)
    if axis is None:
        x = paddle.flatten(x)
        axis = 0
        out_shape = [1] * dims
    else:
        if isinstance(axis, list):
            if len(axis) <= 0:
                raise ValueError("axis should not be empty")
            axis_src, axis_dst = [], []
            for axis_single in axis:
                if not isinstance(axis_single, int) or not (
                        axis_single < dims and axis_single >= -dims):
                    raise ValueError(
                        "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
                    )
                if axis_single < 0:
                    axis_single = axis_single + dims
                axis_src.append(axis_single)
                out_shape[axis_single] = 1
            axis_dst = list(range(-len(axis), 0))
            x = paddle.moveaxis(x, axis_src, axis_dst)
            x = paddle.flatten(x, axis_dst[0], axis_dst[-1])
            axis = axis_dst[0]
        else:
            if not isinstance(axis, int) or not (axis < dims and axis >= -dims):
                raise ValueError(
                    "Axis should be None, int, or a list, element should in range [-rank(x), rank(x))."
                )
            if axis < 0:
                axis += dims
            out_shape[axis] = 1

    mask = x.isnan()
    valid_counts = mask.logical_not().sum(axis=axis,
                                          keepdim=True,
                                          dtype='float64')

    indices = []

    for q_num in q:
        if q_num < 0 or q_num > 1:
            raise ValueError("q should be in range [0, 1]")
        if paddle.in_dynamic_mode():
            q_num = paddle.to_tensor(q_num, dtype='float64')
        if ignore_nan:
            indices.append(q_num * (valid_counts - 1))
        else:
            # TODO(Asthestarsfalll): Use paddle.index_fill instead of where
            index = q_num * (valid_counts - 1)
            last_index = x.shape[axis] - 1
            nums = paddle.full_like(index, fill_value=last_index)
            index = paddle.where(mask.any(axis=axis, keepdim=True), nums, index)
            indices.append(index)

    sorted_tensor = paddle.sort(x, axis)

    outputs = []

    # TODO(chenjianye): replace the for-loop to directly take elements.
    for index in indices:
        indices_below = paddle.floor(index).astype(paddle.int32)
        indices_upper = paddle.ceil(index).astype(paddle.int32)
        tensor_upper = paddle.take_along_axis(
            sorted_tensor, indices_upper, axis=axis)
        tensor_below = paddle.take_along_axis(
            sorted_tensor, indices_below, axis=axis)
        weights = (index - indices_below.astype('float64'))
        out = paddle.lerp(
            tensor_below.astype('float64'),
            tensor_upper.astype('float64'), weights)
        if not keepdim:
            out = paddle.squeeze(out, axis=axis)
        else:
            out = out.reshape(out_shape)
        outputs.append(out)

    if len(q) > 1:
        outputs = paddle.stack(outputs, 0)
    else:
        outputs = outputs[0]

    return outputs