Exemplo n.º 1
0
def test_dropout():
    m = nn.Module()
    x = torch.ones(2, 6, 6, 6)
    torch.manual_seed(100)
    y0 = nn.Dropout(0.3)(x)
    torch.manual_seed(100)
    y1 = W.dropout(x, 0.3, parent=m)
    assert torch.equal(y0, y1)
    torch.manual_seed(100)
    y0 = nn.Dropout2d(0.3)(x)
    torch.manual_seed(100)
    y1 = W.dropout(x, 0.3, by_channel=True, parent=m)
    assert torch.equal(y0, y1)
Exemplo n.º 2
0
def multi_head_attention(x, y=None, num_head=8, dropout=0.1, mask=None, **kw):
    def split_heads(t):  # (B, C, L) -> (B, N, H, L) where N*H == C
        return t.reshape(batch, num_head, size // num_head, t.shape[-1])

    def merge_heads(t):  # (B, N, H, L) -> (B, C, L)
        return t.reshape(batch, -1, t.shape[-1])  # (B, C, L)

    if y is None:
        y = x  # self attention
    batch, size = x.shape[:2]  # B, C, Lx
    assert size % num_head == 0, 'num_head must be a divisor of size.'
    assert y.shape[:2] == x.shape[:2], 'The first 2 dims of x, y must match.'
    q = W.linear(x, size)  # query
    k = W.linear(y, size)  # key
    v = W.linear(y, size)  # value
    q = split_heads(q)  # (B, N, H, Lx)
    k = split_heads(k)  # (B, N, H, Ly)
    v = split_heads(v)  # (B, N, H, Ly)
    q *= (size // num_head)**(-0.5)
    a = q.transpose(2, 3).contiguous().matmul(
        k)  # attention weights, (B, N, Lx, Ly)
    if mask is not None:
        a += mask
    a = F.softmax(a, dim=-1)
    a = W.dropout(a, dropout)
    x = v.matmul(a.transpose(2, 3).contiguous())  # (B, N, H, Lx)
    x = merge_heads(x)  # (B, C, Lx)
    return W.linear(x, size)
Exemplo n.º 3
0
 def forward(self, x):
     x = conv_bn_act(x, 32, kernel=3, stride=2, name='head')
     for size, expand, kernel, stride, repeat, se_ratio, dc_ratio in spec_b0:
         for i in range(repeat):
             stride = stride if i == 0 else 1
             x = mb_block(x, size, expand, kernel, stride, se_ratio,
                          dc_ratio)
     x = conv_bn_act(x, 1280, name='tail')
     x = F.adaptive_avg_pool2d(x, 1)
     x = W.dropout(x, 0.2)
     x = x.view(x.shape[0], -1)
     x = W.linear(x, 1000)
     return x
Exemplo n.º 4
0
def classify(x, size, *arg, **kw):
    x = W.dropout(x, rate=0.2, name='classifier-0')
    return W.linear(x, size, name='classifier-1')
Exemplo n.º 5
0
def residual_add(x, layer, dropout=0.1, **kw):
    y = W.layer_norm(x)
    y = layer(y, **kw)
    y = W.dropout(y, dropout)
    return x + y
Exemplo n.º 6
0
def feed_forward(x, size_ff=2048, dropout=0.1, **kw):
    y = W.linear(x, size_ff, activation='relu')
    y = W.dropout(y, dropout)
    return W.linear(y, x.shape[1])