Python Parameter примеры использования

Язык программирования: Python

Пространство имен/Пакет: mxnet.gluon

Класс/Тип: Parameter

Примеров на hotexamples.com: 6

Python Parameter - 6 примеров найдено. Это лучшие примеры Python кода для mxnet.gluon.Parameter, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Parameter(12)

data(8)

initialize(3)

grad(1)

set_data(1)

Пример #1

Показать файл

Файл: main.py Проект: QiangCai/incubator-mxnet

def optimize(args):
    """    Gatys et al. CVPR 2017
    ref: Image Style Transfer Using Convolutional Neural Networks
    """
    if args.cuda:
        ctx = mx.gpu(0)
    else:
        ctx = mx.cpu(0)
    # load the content and style target
    content_image = utils.tensor_load_rgbimage(args.content_image,ctx, size=args.content_size, keep_asp=True)
    content_image = utils.subtract_imagenet_mean_preprocess_batch(content_image)
    style_image = utils.tensor_load_rgbimage(args.style_image, ctx, size=args.style_size)
    style_image = utils.subtract_imagenet_mean_preprocess_batch(style_image)
    # load the pre-trained vgg-16 and extract features
    vgg = net.Vgg16()
    utils.init_vgg_params(vgg, 'models', ctx=ctx)
    # content feature
    f_xc_c = vgg(content_image)[1]
    # style feature
    features_style = vgg(style_image)
    gram_style = [net.gram_matrix(y) for y in features_style]
    # output
    output = Parameter('output', shape=content_image.shape)
    output.initialize(ctx=ctx)
    output.set_data(content_image)
    # optimizer
    trainer = gluon.Trainer([output], 'adam',
                            {'learning_rate': args.lr})
    mse_loss = gluon.loss.L2Loss()

    # optimizing the images
    for e in range(args.iters):
        utils.imagenet_clamp_batch(output.data(), 0, 255)
        # fix BN for pre-trained vgg
        with autograd.record():
            features_y = vgg(output.data())
            content_loss = 2 * args.content_weight * mse_loss(features_y[1], f_xc_c)
            style_loss = 0.
            for m in range(len(features_y)):
                gram_y = net.gram_matrix(features_y[m])
                gram_s = gram_style[m]
                style_loss = style_loss + 2 * args.style_weight * mse_loss(gram_y, gram_s)
            total_loss = content_loss + style_loss
            total_loss.backward()

        trainer.step(1)
        if (e + 1) % args.log_interval == 0:
            print('loss:{:.2f}'.format(total_loss.asnumpy()[0]))
        
    # save the image
    output = utils.add_imagenet_mean_batch(output.data())
    utils.tensor_save_bgrimage(output[0], args.output_image, args.cuda)

Пример #2

Показать файл

    def __init__(
        self,
        d_hidden: int,
        kernel_sizes: List[int],
        n_head: int = 1,
        bias: bool = True,
        bidirectional: bool = False,
        dist_enc: Optional[str] = None,
        share_values: bool = False,
        dropout: float = 0.0,
        temperature: float = 1.0,
        **kwargs,
    ):
        """
        Self-attention module with q,k,v from the same input

        Parameters
        ----------
        d_hidden : int
            hidden dimension
        kernel_sizes: int
            kernel sizes of convolutions to generate queries and keys
        n_head : int, optional
            number of attention heads, by default 1
        bias : bool, optional
            add bias term in input and output projections, by default True
        bidirectional : bool, optional
            if False, add a mask to avoid backward attention, by default False
        dist_enc : Optional[str], optional
            add relative distance embeddings to dot-product attention, can be 
                'add' (linearly combine key and dist),
                'dot' (dot product between key and dist), 
                or None (disabled),
            by default None
        share_values : bool, optional
            if True, a value reprensentation is shared by all attention heads, by default False
            ref. https://arxiv.org/abs/1912.09363
        dropout : float, optional
            dropout rate, by default 0.0
        temperature : float, optional
            softmax temperature, by default 1.0
        """
        super(SelfAttention, self).__init__(**kwargs)
        n_groups = len(kernel_sizes)
        assert (
            d_hidden % n_head == 0
        ), f"hidden dim {d_hidden} cannot be split into {n_head} heads."
        assert (
            d_hidden % n_groups == 0
        ), f"hidden dim {d_hidden} cannot be split into {n_groups} groups."
        assert (
            n_head % n_groups == 0
        ), f"num_heads {n_heads} cannot be allocated for {n_groups} groups."
        self.d_hidden = d_hidden
        self.kernel_sizes = kernel_sizes
        self.n_groups = n_groups
        self.d_group = self.d_hidden // self.n_groups
        self.n_head = n_head
        self.d_head = self.d_hidden // self.n_head
        self.bias = bias
        self.dist_enc = dist_enc
        self.bidirectional = bidirectional
        self.share_values = share_values
        self.temperature = temperature

        with self.name_scope():
            self.qk_proj = HybridConcurrent(axis=-1, prefix="qk_proj_")
            for ksize in self.kernel_sizes:
                self.qk_proj.add(
                    CausalConv1D(
                        channels=self.d_group * 2,
                        kernel_size=ksize,
                        prefix=f"conv{ksize}_",
                    ))
            self.v_proj = nn.Dense(
                units=self.d_head if self.share_values else d_hidden,
                use_bias=bias,
                flatten=False,
                weight_initializer=init.Xavier(),
                prefix="v_proj_",
            )
            self.out_proj = nn.Dense(
                units=d_hidden,
                use_bias=bias,
                flatten=False,
                weight_initializer=init.Xavier(),
                prefix="out_proj_",
            )

            if self.dist_enc is not None:
                assert self.dist_enc in [
                    "dot",
                    "add",
                ], f"distance encoding type {self.dist_enc} is not supported"
                self.posemb = SinusoidalPositionalEmbedding(d_hidden)
                self.pos_proj = nn.Dense(
                    units=d_hidden,
                    use_bias=bias,
                    flatten=False,
                    weight_initializer=init.Xavier(),
                    prefix="pos_proj_",
                )
                if self.dist_enc == "add":
                    self._ctt_bias_weight = Parameter(
                        "_ctt_bias_weight",
                        shape=(1, n_head, 1, self.d_head),
                        init=init.Xavier(),
                    )
                    self._pos_bias_weight = Parameter(
                        "_pos_bias_weight",
                        shape=(1, n_head, 1, self.d_head),
                        init=init.Xavier(),
                    )

            self.dropout = nn.Dropout(dropout)

Пример #3

Показать файл

class NoNorm(HybridBlock):
    r"""
    Apply an element-wise linear transformation to the n-dimensional input array.
    replacing the layer normalization.

    .. math::
        out = \gmmma \circ data + \beta

    Parameters
    ----------
    in_channels : int
        Number of channels (feature maps) in input data. If not specified,
        initialization will be deferred to the first time `forward` is called
    center: bool, default True
        If True, add offset of `beta` to normalized tensor.
        If False, `beta` is ignored.
    scale: bool, default True
        If True, multiply by `gamma`. If False, `gamma` is not used.
    beta_initializer: str or `Initializer`, default 'zeros'
        Initializer for the beta weight.
    gamma_initializer: str or `Initializer`, default 'ones'
        Initializer for the gamma weight.

    Inputs:
        - **data**: input tensor with arbitrary shape.

    Outputs:
        - **out**: output tensor with the same shape as `data`.

    References
    ----------
        `MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices
        <https://arxiv.org/pdf/2004.02984.pdf>`_

    Examples
    --------
    >>> # Input of shape (2, 5)
    >>> x = mx.np.array([[1, 2, 3, 4, 5], [1, 1, 2, 2, 2]])
    >>> # Layer normalization is calculated with the above formula
    >>> layer = NoNorm(in_channels=5)
    >>> layer.initialize(ctx=mx.cpu(0))
    >>> layer(x)
    array([[1., 2., 3., 4., 5.],
       [1., 1., 2., 2., 2.]])
    """
    def __init__(self,
                 in_channels,
                 center=True,
                 scale=True,
                 beta_initializer='zeros',
                 gamma_initializer='ones',
                 dtype='float32',
                 **kwargs):
        super().__init__(**kwargs)
        self._kwargs = {'center': center, 'scale': scale}
        self._in_channels = in_channels
        self.gamma = Parameter('gamma',
                               grad_req='write' if scale else 'null',
                               shape=(in_channels, ),
                               init=gamma_initializer,
                               dtype=dtype)
        self.beta = Parameter('beta',
                              grad_req='write' if center else 'null',
                              shape=(in_channels, ),
                              init=beta_initializer,
                              dtype=dtype)

    def forward(self, data):
        return data * self.gamma.data() + self.beta.data()

    def __repr__(self):
        s = '{name}({content}'
        in_channels = self.gamma.shape[0]
        s += ', in_channels={0}'.format(in_channels)
        s += ')'
        return s.format(name=self.__class__.__name__,
                        content=', '.join([
                            '='.join([k, v.__repr__()])
                            for k, v in self._kwargs.items()
                        ]))

Пример #4

Показать файл

    def __init__(self,
                 vocab_size: int,
                 embed_size: int,
                 units: int,
                 cutoffs: Optional[Union[int, List]] = None,
                 div_val: float = 1.0,
                 dtype='float32',
                 scaled=True,
                 embedding_initializer: InitializerType = None,
                 weight_initializer: InitializerType = None):
        """

        Parameters
        ----------
        vocab_size
            The size of the vocabulary
        embed_size
            The base size of the embedding vectors. The embedding size of each cluster will be
            [embed_size / div_val**0, embed_size / div_val**1, embed_size / div_val**2, ...]
        units
            The number of units after the mapping
        cutoffs
            The cutoffs to slice the vocab to multiple clusters. It should be a sorted list. Each
            value should be between 1 --> vocab_size - 1.
        div_val
            The base denominator for computing the size of the embedding vector in each cluster.
        dtype
            The data type of layer
        scaled
            Whether to scale the embedding by sqrt(units)
        embedding_initializer
            Initializer of the embedding vectors
        weight_initializer
            Initializer of projection layers
        bias_initializer
            Initializer of the bias
        """
        super().__init__()
        cutoffs = _fmt_and_check_cutoffs(cutoffs, vocab_size)
        if cutoffs is None:
            assert div_val == 1.0
        self._dtype = dtype
        self._kwargs = OrderedDict([('cutoffs', cutoffs),
                                    ('vocab_size', vocab_size),
                                    ('embed_size', embed_size),
                                    ('units', units), ('div_val', div_val),
                                    ('dtype', dtype), ('scaled', scaled)])
        self._vocab_size = vocab_size
        self._cutoffs = cutoffs
        self._units = units
        self._embed_size = embed_size
        self._div_val = div_val
        self._scaled = scaled
        if self._scaled:
            self._emb_scale = units**0.5
        if div_val == 1.0:
            self.embed0_weight = Parameter('embed0_weight',
                                           shape=(vocab_size, embed_size),
                                           init=embedding_initializer,
                                           allow_deferred_init=True)

            if units != embed_size:
                self.inter_proj0_weight = Parameter('inter_proj0_weight',
                                                    shape=(embed_size, units),
                                                    init=weight_initializer,
                                                    allow_deferred_init=True)
            else:
                self.proj_layers = None
        else:
            self.proj_layers = nn.HybridSequential()
            for i, (l_idx, r_idx) in enumerate(
                    zip([0] + cutoffs, cutoffs + [vocab_size])):
                inner_embed_size = int(embed_size / div_val**i)
                if inner_embed_size == 0:
                    raise ValueError(
                        'div_val = {} is too large for the layer. Currently, the '
                        'cutoffs are {} and the embed_size is {}. Using the '
                        'div_val = {} will cause some clusters to have '
                        'embed_size=0.'.format(div_val, cutoffs, embed_size,
                                               div_val))
                setattr(
                    self, 'embed{}_weight'.format(i),
                    Parameter('embed{}_weight'.format(i),
                              shape=(r_idx - l_idx, inner_embed_size),
                              init=embedding_initializer,
                              allow_deferred_init=True))
                setattr(
                    self, 'inter_proj{}_weight'.format(i),
                    Parameter('inter_proj{}_weight'.format(i),
                              shape=(inner_embed_size, units),
                              init=weight_initializer,
                              allow_deferred_init=True))

Пример #5

Показать файл

 def __init__(self, d_model, epsilon, dtype): 
     super().__init__()
     self.gemma = Parameter('layernorm_weight', shape=d_model, init='ones', dtype=dtype)
     self.variance_epsilon = epsilon

Пример #6

Показать файл

class TransformerXLDecoder(HybridBlock):
    def __init__(self,
                 num_layers=3,
                 units=512,
                 hidden_size=2048,
                 num_heads=8,
                 activation_dropout=0.1,
                 dropout=0.1,
                 attention_dropout=0.0,
                 layernorm_eps=1E-12,
                 activation='relu',
                 dtype='float32',
                 layout='NT',
                 pre_norm=False,
                 weight_initializer=None,
                 bias_initializer=None):
        super().__init__()
        self.query_k_bias = Parameter('query_k_bias',
                                      shape=(num_heads, units // num_heads),
                                      init=bias_initializer,
                                      allow_deferred_init=True)
        self.query_r_bias = Parameter('query_r_bias',
                                      shape=(num_heads, units // num_heads),
                                      init=bias_initializer,
                                      allow_deferred_init=True)
        self.decoder_layers = HybridSequential()
        for i in range(num_layers):
            self.decoder_layers.add(
                TransformerXLDecoderLayer(
                    units=units,
                    hidden_size=hidden_size,
                    num_heads=num_heads,
                    activation_dropout=activation_dropout,
                    dropout=dropout,
                    attention_dropout=attention_dropout,
                    layer_norm_eps=layernorm_eps,
                    activation=activation,
                    dtype=dtype,
                    layout=layout,
                    pre_norm=pre_norm,
                    weight_initializer=weight_initializer,
                    bias_initializer=bias_initializer))

    def forward(self, data, mem_l, rel_positions, mask):
        """

        Parameters
        ----------
        F
        data
            - layout = 'NT':
                Shape (batch_size, query_length)
            - layout = 'TN':
                Shape (query_length, batch_size)
        mem_l
            Contains a list of memory objects, each one will contain:
            - layout = 'NT':
                Shape (batch_size, mem_length, C_i)
            - layout = 'TN':
                Shape (mem_length, batch_size, C_i)
        rel_positions
            The relative positions.
            Shape (query_length, mem_length + query_length)
        mask
            Mask between the query and the memory + query.
            Shape (batch_size, query_length, mem_length + query_length)

        Returns
        -------
        out_l
            Contains a list of hidden states, each will contain:
            - layout = 'NT'
                Shape (batch_size, query_length, C_o)
            - layout = 'TN'
                Shape (query_length, batch_size, C_o)
        """
        query_k_bias = self.query_k_bias.data()
        query_r_bias = self.query_r_bias.data()
        out_l = []
        out = data
        for i, layer in enumerate(self.decoder_layers):
            out = layer(out, mem_l[i], rel_positions, mask, query_r_bias,
                        query_k_bias)
            out_l.append(out)
        return out_l