Exemplo n.º 1
0
def embedding(x, size, vocabulary=None, **kw):
    """ Embedding layer.\n
    The input is usually a list of indices (integers), and the output is a dense matrix which
    maps indices to dense vectors. Thus the output will have 1 more dimension than the input.\n
    **Note**: The output of this function is always one more dimension than the input. For input with shape `(*)`,
    The output will be `(*, size)`. Any shape specifications in the KWargs are ignored. \n
    -  `x: Tensor`; Contains indices into the vocabulary. Will be converted to `LongTensor` of integers.
        Can be of any shape.
    -  `size: int`; The size of embedding vector.
    -  `vocabulary: int or None`; The size of vocabulary of embedding, or max number of unique indices in `x`.
        By default it is set to `max(x)-min(x)+1`.
    -  `**kw: dict`; Any additional KWargs are passed down to `torch.nn.LayerNorm`, as well as `warm.engine.forward`.
    -  `return: Tensor`; With the embedded dim appended to the shape of x.
        Thus with shape `(*, Size)`, where `*` is the shape of `x`. """
    x = x.type(torch.LongTensor)
    if vocabulary is None:
        vocabulary = x.max() - x.min() + 1
    kw.pop('in_shape', None)
    kw.pop('out_shape', None)
    kw.pop('base_shape', None)
    inferred_kw = dict(
        base_name='embedding',
        base_class=nn.Embedding,
        base_kw=dict(
            num_embeddings=vocabulary,
            embedding_dim=size,
            **engine.unused_kwargs(kw),
        ),
        base_shape=None,
        in_shape=None,
        out_shape=None,
    )
    return engine.forward(x, **{**inferred_kw, **kw})
Exemplo n.º 2
0
def linear(x, size, init_weight=None, init_bias=None, bias=True, **kw):
    """ Linear transformation layer.\n
    - `x: Tensor`; 2d or more, with shapes `(Batch, Channel, *)` where `*` means any number of additional dimensions.
    - `size: int`; Size of hidden features, and size of the output channel.
    - `init_weight: None or str or callable`; Initialization specification for the weight tensor.
        If a `str`, should be one of the nonlinearity functions contained in `torch.nn.init`.
        If a `callable`, it will be applied to `x` directly, i.e. `spec(x)`. If a 2-`tuple`,
        it must be of format `(callable, kwargs)`, i.e. `callable(x, **kwargs)`.
        Default: `None`, and the weight tensor is initialized using `torch.nn.Linear`s default scheme.
    - `init_bias: None or str or callable`; Same as `init_weight`, but for the bias tensor.
    - `bias: bool`; If `True`, adds a learnable bias to the output. Default: `True`.
    - `**kw:dict`; Any additional KWargs are passed down to `warm.engine.forward`. Refer to its docs for details.
    - `return: Tensor`; With shape `(Batch, Size, *)` where `*` can be 1d, 2d, 3d that depends on `x`. """
    inferred_kw = dict(
        base_name='linear',
        base_class=nn.Linear,
        base_kw={
            'out_features': size,
            'bias': bias
        },
        base_shape='BDC',
        infer_kw={'in_features': 'C'},
        initialization={
            'weight': init_weight,
            **({
                'bias': init_bias
            } if bias else {})
        },
    )
    return engine.forward(x, **{**inferred_kw, **kw})
Exemplo n.º 3
0
def lstm(x,
         size,
         init_weight_hh='orthogonal_',
         init_weight_ih=None,
         init_bias_hh=None,
         init_bias_ih=None,
         bias=True,
         num_layers=1,
         **kw):
    """ Long Short Term Memory layer.\n
    - `x: Tensor`; 3d, with shapes `(Batch, Channel, Length)`.
    - `size: int`; Size of hidden features, and size of the output channel.
    - `init_weight_hh: None or str or callable`; Initialization specification for the hidden-hidden weight tensor.
        If a `str`, should be one of the nonlinearity functions contained in `torch.nn.init`.
        If a `callable`, it will be applied to `x` directly, i.e. `spec(x)`. If a 2-`tuple`,
        it must be of format `(callable, kwargs)`, i.e. `callable(x, **kwargs)`.
        Default: `'orthogonal_'`.
    - `init_weight_ih: None or str or callable`; Initialization specification for the input-hidden weight tensor.
        Default: `None`, and the weight tensor is initialized using `torch.nn.LSTM`s default scheme.
    - `init_bias_hh: None or str or callable`; Initialization specification for the hidden-hidden bias tensor.
        Default: `None`, and the weight tensor is initialized using `torch.nn.LSTM`s default scheme.
    - `init_bias_ih: None or str or callable`; Initialization specification for the input-hidden bias tensor.
        Default: `None`, and the weight tensor is initialized using `torch.nn.LSTM`s default scheme.
    - `bias: bool`; If `False`, then the layer does not use `bias_ih` and `bias_hh`. Default: `True`.
    - `num_layers: int`; Number of the recurrent layers. Default: 1.
    - `tuple_out: bool`; If `True`, the returned value will be a tuple `(out, (h_n, c_n))`. Default: False.
    - `**kw: dict`; Any additional KWargs are passed down to `torch.nn.LSTM`, as well as `warm.engine.forward`.
        Refer to their docs for details. Some of the additional LSTM arguments: `dropout, bidirectional, batch_first`.
    - `return: Tensor or tuple`; If `tuple_out` set to true, will return `(out, (h_n, c_n)`, otherwise just `out`.
        `out` has shape `(Batch, Size, Length*Directions)`,
            where Directions = 2 if `bidirectional` else 1.
        `h_n` is the hidden states with shape `(num_layers*Directions, Batch, Size)`.
        `c_n` is the cell states with shape `(num_layers*Directions, Batch, Size)`. """
    init = dict(
        weight_hh=init_weight_hh,
        weight_ih=init_weight_ih,
        bias_hh=init_bias_hh,
        bias_ih=init_bias_ih,
    )
    inferred_kw = dict(
        base_name='lstm',
        base_class=nn.LSTM,
        base_kw={
            'hidden_size': size,
            'num_layers': num_layers,
            **engine.unused_kwargs(kw),
        },
        base_shape='DBC',
        infer_kw={'input_size': 'C'},
        initialization={
            f'{k}_l{l}': init[k]
            for k in ['weight_hh', 'weight_ih'] +
            (['bias_hh', 'bias_ih'] if bias else []) for l in range(num_layers)
        },
    )
    return engine.forward(x, **{**inferred_kw, **kw})
Exemplo n.º 4
0
def batch_norm(x, **kw):
    """ Batch Normalization layer.\n
    - `x: Tensor`; 2d or more, with shapes `(Batch, Channel, *)` where `*` means any number of additional dimensions.
    - `**kw: dict`; Any additional KWargs are passed down to `torch.nn.BatchNormNd`, where N can be 1, 2 or 3.
        as well as `warm.engine.forward`. Refer to their docs for details. Some of the additional BatchNorm arguments:
        `eps, momentum, affine, track_running_stats`.
    - `return: Tensor`; Same shape as input  `x`. """
    d = x.ndim - 3
    assert d in [0, 1, 2], 'Incompatible number of dims for input x.'
    inferred_kw = dict(
        base_name='batch_norm',
        base_class=[nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d][d],
        base_kw={'num_features': x.shape[1]},
    )
    return engine.forward(x, **{**inferred_kw, **kw})
Exemplo n.º 5
0
def dropout(x, rate=0.5, by_channel=False, **kw):
    """ Dropout layer.\n
    During training, randomly zeros part of input tensor `x`, at probability `rate`.\n
    - `x: Tensor`; Can be of any shape if `by_channel` is false, or 2d and up if `by_channel` is true.
    - `rate: float`; The probability of dropout. Default 0.5.
    - `by_channel: bool`; If true, will dropout entire channels (all `'D'` dimensions will be 0 if x is `'BCD'`).
        `by_channel` true requires `x` to be 2d or more.
    - `inplace: bool`; If true, the operation will be in-place and the input `x` will be altered.
    - `return: Tensor`; Same shape as `x`. """
    inferred_kw = dict(
        base_name='dropout',
        base_class=[nn.Dropout, nn.Dropout2d][by_channel],
        base_kw={'p': rate},
        base_shape=[None, 'BCD'][by_channel],
    )
    return engine.forward(x, **{**inferred_kw, **kw})
Exemplo n.º 6
0
 def forward(self, x):
     x = engine.forward(
         x,
         nn.Linear,
         'linear',
         base_arg=(x.shape[-1], 4,
                   False),  # in_features, out_features, bias
         in_shape=None,
         out_shape=None,
         base_shape=None,
         initialization={'weight': 'ones_'},
         activation=(F.dropout, {
             'p': 1.0
         }),
     )
     return x
Exemplo n.º 7
0
def conv(x, size, kernel, init_weight=None, init_bias=None, bias=True, **kw):
    """ Convolution layer.\n
    - `x: Tensor`; With shape `(Batch, Channel, *)` where `*` Can be 1d or 2d or 3d.
        If 3d, shapes are `(Batch, Channel, Length)`.
        If 4d, shapes are `(Batch, Channel, Height, Width)`.
        If 5d, shapes are `(Batch, Channel, Depth, Height, Width)`.
    - `size: int`; Size of hidden filters, and size of the output channel.
    - `kernel: int or tuple`; Size of the convolution kernel.
    - `init_weight: None or str or callable`; Initialization specification for the weight tensor.
        If a `str`, should be one of the nonlinearity functions contained in `torch.nn.init`.
        If a `callable`, it will be applied to `x` directly, i.e. `spec(x)`. If a 2-`tuple`,
        it must be of format `(callable, kwargs)`, i.e. `callable(x, **kwargs)`.
        Default: `None`, and the weight tensor is initialized using `torch.nn.ConvNd`s default scheme.
    - `init_bias: None or str or callable`; Same as `init_weight`, but for the bias tensor.
    - `bias: bool`; If `True`, adds a learnable bias to the output. Default: `True`.
    - `**kw:dict`; Any additional KWargs are passed down to `torch.nn.ConvNd`, where N can be 1, 2 or 3.
        as well as `warm.engine.forward`. Refer to their docs for details. Some of the additional ConvNd arguments:
        `stride, padding, dilation, groups`.
    - `return: Tensor`; With shape `(Batch, Size, *)` where `*` can be 1d, 2d, 3d that depends on `x`. """
    d = x.ndim - 3
    assert d in [0, 1, 2], 'Incompatible number of dims for input x.'
    inferred_kw = dict(
        base_name='conv',
        base_class=[nn.Conv1d, nn.Conv2d, nn.Conv3d][d],
        base_kw={
            'out_channels': size,
            'kernel_size': kernel,
            'bias': bias,
            **engine.unused_kwargs(kw),
        },
        infer_kw={'in_channels': 'C'},
        initialization={
            'weight': init_weight,
            **({
                'bias': init_bias
            } if bias else {})
        },
    )
    return engine.forward(x, **{**inferred_kw, **kw})
Exemplo n.º 8
0
def test_forward():
    x = torch.randn(1, 2, 3)
    m = nn.Module()
    engine.set_default_parent(m)

    class TripleOut(nn.Module):  # to test tuple_out
        def forward(self, x, b=1, c='2'):
            return x + b, x, c

    y = engine.forward(x,
                       base_class=TripleOut,
                       base_name='tri',
                       tuple_out=False)
    assert isinstance(y, torch.Tensor), 'tuple_out did not work correctly.'
    y = engine.forward(x,
                       base_class=TripleOut,
                       base_name='tri',
                       tuple_out=True)
    assert isinstance(y, tuple) and len(
        y) == 3 and y[-1] == '2', 'tuple_out did not work correctly.'
    y = engine.forward(x,
                       base_class=TripleOut,
                       base_name='tri',
                       forward_kw={'c': 3},
                       tuple_out=True)
    assert y[-1] == 3, 'forward_kw did not work correctly.'
    y = engine.forward(x,
                       base_class=TripleOut,
                       base_name='tri',
                       forward_arg=(2.0, ))
    assert torch.allclose(y - x, torch.Tensor(
        [2.0])), 'forward_arg did not work correctly.'
    y = engine.forward(x,
                       base_class=TripleOut,
                       activation=(F.dropout, {
                           'p': 1.0
                       }))
    assert torch.allclose(y, torch.Tensor(
        [0.0])), 'activation did not work correctly.'
    y = engine.forward(x,
                       base_class=nn.Linear,
                       base_kw={'out_features': 4},
                       infer_kw={'in_features': 'C'},
                       base_shape='BDC')
    assert y.shape[1] == 4, 'base_kw, infer_kw did not work correctly.'
Exemplo n.º 9
0
def layer_norm(x, dim=1, **kw):
    """ Layer Normalization.\n
    -  `x: Tensor`; Can be of any shape.
    -  `dim: int or list of int`; Dimensions to be normalized. Default: 1.
    -  `**kw: dict`; Any additional KWargs are passed down to `torch.nn.LayerNorm`, as well as `warm.engine.forward`.
    -  `return: Tensor`; Same shape as `x`. """
    if dim != -1:
        if isinstance(dim, int):
            dim = [dim]
        dim_norm = [x.ndim + i if i < 0 else i for i in dim]
        order = [i for i in range(x.ndim) if i not in dim_norm] + dim_norm
        x = x.permute(order)
        norm_shape = x.shape[-len(dim_norm):]
    else:
        norm_shape = [x.shape[-1]]
    inferred_kw = dict(
        base_name='layer_norm',
        base_class=nn.LayerNorm,
        base_kw={'normalized_shape': norm_shape},
    )
    x = engine.forward(x, **{**inferred_kw, **kw})
    if dim != -1:
        x = x.permute(np.argsort(order).tolist())
    return x
Exemplo n.º 10
0
def transformer(x,
                y=None,
                num_encoder=6,
                num_decoder=6,
                num_head=8,
                mask=None,
                causal=False,
                in_shape='BCD',
                **kw):
    """ Transformer layer.\n
    This layer covers functionality of `Transformer`, `TransformerEncoder`, and `TransformerDecoder`.
    See [`torch.nn.Transformer`](https://pytorch.org/docs/stable/nn.html#transformer) for more details.\n
    -  `x: Tensor`; The source sequence, with shape `(Batch, Channel, LengthX)`.
        `Channel` is usually from embedding.
    -  `y: None or Tensor`; The target sequence. Also with shape `(Batch, Channel, LengthY)`.
        If not present, default to equal `x`.
    -  `num_encoder: int`; Number of encoder layers. Set to 0 to disable encoder and use only decoder. Default 6.
    -  `num_decoder: int`; Number of decoder layers. Set to 0 to disable decoder and use only encoder. Default 6.
    -  `num_head: int`; Number of heads for multi-headed attention. Default 8.
    -  `mask: None or dict`; Keys are among: `src_mask`, `tgt_mask`, `memory_mask`,
        `src_key_padding_mask`, `tgt_key_padding_mask`, `memory_key_padding_mask`.
        See the `forward` method of `torch.nn.Transformer` for details.
    -  `causal: bool`; Default false. if true, will add causal masks to source and target, so that
        current value only depends on the past, not the future, in the sequences.
    -  `**kw: dict`; Any additional KWargs are passed down to `torch.nn.Transformer`, as well as `warm.engine.forward`.
    -  `return: Tensor`; Same shape as `y`, if `num_decoder` > 0. Otherwise same shape as `x`. """
    def _causal_mask(n):
        mask = (torch.triu(torch.ones(n, n)) == 1).transpose(0, 1)
        return mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(
            mask == 1, float(0.0))

    if y is None:
        y = x
    y = permute(y, in_shape, 'DBC')
    mask = mask or {}
    if causal:
        i = in_shape.find('D')
        mx = _causal_mask(x.shape[i])
        mask['src_mask'] = mask.pop('src_mask', 0.0) + mx
        my = _causal_mask(y.shape[0])
        mask['tgt_mask'] = mask.pop('tgt_mask', 0.0) + my
    encoder = identity if num_encoder == 0 else None
    decoder = identity if num_decoder == 0 else None
    inferred_kw = dict(
        base_name='transformer',
        base_class=nn.Transformer,
        base_shape='DBC',
        base_kw=dict(
            d_model=x.shape[in_shape.find('C')],
            custom_encoder=encoder,
            custom_decoder=decoder,
            nhead=num_head,
            num_encoder_layers=num_encoder,
            num_decoder_layers=num_decoder,
            **engine.unused_kwargs(kw),
        ),
        in_shape=in_shape,
        forward_kw=mask,
        forward_arg=(y, ),
    )
    return engine.forward(x, **{**inferred_kw, **kw})