Пример #1
0
    def forward(self, hidden_states, attention_mask):
        mixed_query_layer = self.query(hidden_states)
        mixed_key_layer = self.key(hidden_states)
        mixed_value_layer = self.value(hidden_states)

        query_layer = self.transpose_for_scores(mixed_query_layer)
        key_layer = self.transpose_for_scores(mixed_key_layer)
        value_layer = self.transpose_for_scores(mixed_value_layer)

        # Take the dot product between "query" and "key" to get the raw attention scores.
        attention_scores = F.matmul(query_layer, transpose(key_layer, -1, -2))
        attention_scores = attention_scores / math.sqrt(
            self.attention_head_size)
        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
        attention_scores = attention_scores + attention_mask

        # Normalize the attention scores to probabilities.
        attention_probs = Softmax(len(attention_scores.shape) -
                                  1)(attention_scores)

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs = self.dropout(attention_probs)

        context_layer = F.matmul(attention_probs, value_layer)
        context_layer = context_layer.transpose(0, 2, 1, 3)
        # using symbolic shapes to make trace happy
        context_shape = mge.tensor(context_layer.shape)
        new_context_layer_shape = F.concat(
            [context_shape[:-2], self.all_head_size])
        context_layer = context_layer.reshape(new_context_layer_shape)
        return context_layer
Пример #2
0
def test_basic():
    x = mge.tensor([1.0, 3.0, 5.0]).reshape(1, 3)
    w = mge.tensor([2.0, 4.0, 6.0]).reshape(3, 1)
    b = mge.tensor(-1.0)

    gm = GradManager().attach([w, b])
    gm.record()

    p = F.matmul(x, w)
    y = p + b

    gm.backward(y)
    gm.release()  # is not necessary
    np.testing.assert_equal(w.grad.numpy(), [[1], [3], [5]])
    np.testing.assert_equal(b.grad.numpy(), [1])

    w.grad = None
    b.grad = None
    with gm:
        p = F.matmul(x, w)
        y = p + b
        gm.backward(y)

    np.testing.assert_equal(w.grad.numpy(), [[1], [3], [5]])
    np.testing.assert_equal(b.grad.numpy(), [1])
Пример #3
0
def test_level1_infer_shape_with_unknown():
    config_async_level(2)
    a = mge.tensor([[1, 2, 2, 3]], dtype="float32")
    b = mge.tensor([1, 1])
    multi2 = mge.tensor(np.array([[2, 0], [0, 2]]), dtype="float32")
    c = F.matmul(b, multi2)
    # make DepType::SHAPE unknown
    d = F.reshape(a, c)
    e = mge.tensor([[1, 2]], dtype="float32")
    config_async_level(1)
    # test src no shape, throw in level1
    with pytest.raises(RuntimeError):
        f = F.reshape(d, b)
    with pytest.raises(RuntimeError):
        g = F.matmul(d, e)
    config_async_level(2)
Пример #4
0
def test_level1_infer_value():
    config_async_level(1)
    a = mge.tensor([[1, 2], [2, 3], [3, 4]], dtype="float32")
    b = mge.tensor([1, 1], dtype="float32")
    identity = mge.tensor(np.array([[1, 0], [0, 1]]), dtype="float32")
    # make DepType::VALUE unknown
    c = F.matmul(b, identity)
    with pytest.raises(RuntimeError):
        d = F.reshape(a, c)
    config_async_level(2)
Пример #5
0
 def forward(self, x):
     if not self.training or self.drop_prob <= 0.0:
         return x
     _, c, h, w = x.shape
     pad_h = max((self.kernel_size - 1), 0)
     pad_w = max((self.kernel_size - 1), 0)
     numel = c * h * w
     gamma = self.drop_prob * (w * h) / (self.kernel_size**2) / (
         (w - self.kernel_size + 1) * (h - self.kernel_size + 1))
     mask = mge.random.uniform(0, 1, size=(1, c, h, w))
     mask[mask < gamma] = 1
     mask[mask >= gamma] = 0
     mask = F.max_pool2d(mask, [self.kernel_size, self.kernel_size],
                         stride=1,
                         padding=(pad_h // 2, pad_w // 2))
     mask = 1 - mask
     x1 = F.expand_dims(1.0 * numel / mask.sum(axis=0), axis=0)
     y = F.matmul(F.matmul(x, mask), x1)
     return y
Пример #6
0
 def forward(self, inps):
     x = F.matmul(inps[0], inps[1], self.param["transA"],
                  self.param["transB"])
     if self.param["alpha"] != 1.0:
         x = F.mul(x, self.param["alpha"])
     if len(inps) == 3:
         if self.param["beta"] != 1.0:
             x = F.add(x, F.mul(inps[2], self.param["beta"]))
         else:
             x = F.add(x, inps[2])
     return x
Пример #7
0
 def forward(self, x, bridge):
     up = self.up(x)
     bridge = self.skip_m(bridge)
     out = F.concat([up, bridge], 1)
     if self.subnet:
         b_, c_, h_, w_ = bridge.shape
         sub = self.subnet(out)
         V_t = sub.reshape(b_, self.num_subspace, h_ * w_)
         V_t = V_t / (1e-6 + F.abs(V_t).sum(axis=2, keepdims=True))
         V = V_t.transpose(0, 2, 1)
         mat = F.matmul(V_t, V)
         mat_inv = F.matinv(mat)
         project_mat = F.matmul(mat_inv, V_t)
         bridge_ = bridge.reshape(b_, c_, h_ * w_)
         project_feature = F.matmul(project_mat, bridge_.transpose(0, 2, 1))
         bridge = F.matmul(V, project_feature).transpose(0, 2, 1).reshape(
             b_, c_, h_, w_)
         out = F.concat([up, bridge], 1)
     out = self.conv_block(out)
     return out
Пример #8
0
def test_level1_infer_shape_with_unknown():
    config_async_level(2)
    a = mge.tensor([[1, 2, 2, 3]], dtype="float32")
    b = mge.tensor([1, 1])
    c = b * 2
    # make DepType::SHAPE unknown
    d = F.reshape(a, c)
    config_async_level(1)
    e = mge.tensor([[1, 2]], dtype="float32")
    with pytest.raises(RuntimeError):
        f = F.matmul(d, e)
Пример #9
0
def get_flow_mge(H_mat_mul, patch_indices, image_size_h=600, image_size_w=800):
    # (N, 6, 3, 3)
    batch_size = H_mat_mul.shape[0]
    divide = H_mat_mul.shape[1]
    H_mat_mul = mge.Tensor(H_mat_mul.reshape(batch_size, divide, 3, 3))

    small_patch_sz = [image_size_h // divide, image_size_w]
    small = 1e-7

    H_mat_pool = F.zeros((batch_size, image_size_h, image_size_w, 3, 3))

    for i in range(divide):
        H_mat = H_mat_mul[:, i, :, :]

        if i == divide - 1:
            H_mat = F.broadcast_to(F.expand_dims(F.expand_dims(H_mat, 1), 1),
                                   (batch_size, image_size_h -
                                    i * small_patch_sz[0], image_size_w, 3, 3))
            H_mat_pool[:, i * small_patch_sz[0]:, ...] = H_mat
            continue

        H_mat = F.broadcast_to(F.expand_dims(F.expand_dims(
            H_mat, 1), 1), (batch_size, small_patch_sz[0], image_size_w, 3, 3))
        H_mat_pool[:, i * small_patch_sz[0]:(i + 1) * small_patch_sz[0],
                   ...] = H_mat

    pred_I2_index_warp = F.expand_dims(patch_indices.transpose(0, 2, 3, 1), 4)
    pred_I2_index_warp = F.matmul(H_mat_pool,
                                  pred_I2_index_warp)[:, :, :, :,
                                                      0].transpose(0, 3, 1, 2)
    T_t = pred_I2_index_warp[:, 2:3, ...]
    smallers = 1e-6
    T_t = T_t + smallers
    v1 = pred_I2_index_warp[:, 0:1, ...]
    v2 = pred_I2_index_warp[:, 1:2, ...]
    v1 = v1 / T_t
    v2 = v2 / T_t
    warp_index = F.concat((v1, v2), 1)
    vgrid = patch_indices[:, :2, ...]

    flow = warp_index - vgrid
    return flow
Пример #10
0
 def forward(self, data, quad):
     """
     data: (1, 3, 48, 160)
     quad: (1, 4, 2)
     """
     N = quad.shape[0]
     dst = F.repeat(self.bb_out, N, axis=0).reshape(-1, 4, 2)
     I = F.broadcast_to(self.I, quad.shape)
     A = F.broadcast_to(self.A, (N, 8, 8))
     A[:, 0:4, 0:2] = quad
     A[:, 4:8, 5:6] = I[:, :, 0:1]
     A[:, 0:4, 6:8] = -quad * dst[:, :, 0:1]
     A[:, 4:8, 3:5] = quad
     A[:, 0:4, 2:3] = I[:, :, 0:1]
     A[:, 4:8, 6:8] = -quad * dst[:, :, 1:2]
     B = dst.transpose(0, 2, 1).reshape(-1, 8, 1)
     M = F.concat([F.matmul(F.matinv(A), B)[:, :, 0], I[:, 0:1, 0]],
                  axis=1).reshape(-1, 3, 3)
     new_data = F.warp_perspective(data, M, (48, 160))  # (N, 3, 48, 160)
     return {"data": new_data}
Пример #11
0
def calculate_score(configs, facescrub, labels, megaface):
    """calculate megaface identification top1 score. this evaluation implement strictly follows the description of
    `"The MegaFace Benchmark: 1 Million Faces for Recognition at Scale" <https://arxiv.org/pdf/1512.00596.pdf>`_
    this implement outputs exactly the same as dev-sdk provided by the official, but with much higher speed

    Args:
        configs (dict): configuration
        facescrub (np.array): feature of facescrub
        labels (np.array): label of facescrub
        megaface (np.array): feature of megaface

    Returns:
        megaface_score (float): top1 score of megaface
    """
    facescrub = mge.tensor(facescrub, dtype="float32")
    megaface = mge.tensor(megaface, dtype="float32")

    # note: (x - y) ** 2 = x ** 2 + y ** 2 - 2 * x * y
    # facescrub_score[i][j] = l2-dist(facescrub[i], facescrub[j])
    facescrub_score = (
        (facescrub ** 2).sum(axis=-1, keepdims=True)
        + (facescrub ** 2).sum(axis=-1, keepdims=True).transpose(1, 0)
        - 2 * F.matmul(facescrub, facescrub.transpose(1, 0))
    )
    facescrub_score = facescrub_score.numpy()

    def get_score_min_megaface(x):
        distr_score = (x ** 2).sum(axis=-1) + (megaface ** 2).sum(axis=-1) - 2 * (x * megaface).sum(axis=-1)
        return distr_score.min()

    up, down = 0, 0
    for probe_i in tqdm(range(len(facescrub))):
        distr_score_min = get_score_min_megaface(facescrub[probe_i]).numpy()
        mask = (labels == labels[probe_i]) & (np.arange(len(facescrub)) != probe_i)
        for probe_j in np.where(mask)[0]:
            probe_score = facescrub_score[probe_i][probe_j]
            up += probe_score < distr_score_min
            down += 1

    megaface_score = up / down * 100
    return megaface_score
Пример #12
0
def test_dy():
    x = mge.tensor([1.0, 3.0, 5.0]).reshape(1, 3)
    w = mge.tensor([2.0, 4.0, 6.0]).reshape(3, 1)
    b = mge.tensor(-1.0)

    gm = GradManager().attach([w, b])

    def get_grad(grad, dy, idx):
        if isinstance(dy, (list, tuple)):
            return np.array(grad) * dy[idx]
        else:
            return np.array(grad) * dy

    # dy's shape should be the same as y's
    dy = mge.tensor(2.5).reshape(1, 1)
    w.grad = None
    b.grad = None
    with gm:
        p = F.matmul(x, w)
        y = p + b
        gm.backward(y, dy=dy)

    np.testing.assert_equal(w.grad.numpy(), [[1], [3], [5]] * dy.numpy())
    np.testing.assert_equal(b.grad.numpy(), [1] * dy.numpy())
Пример #13
0
print(A + B)
print(A - B)
print(A * B)
print(A / B)

print(F.add(A, B))
print(F.sub(A, B))
print(F.mul(A, B))
print(F.div(A, B))

A = mge.tensor([[1., 2., 3.],
                [4., 5., 6.]])

print(A[1, :2])

A = mge.tensor([[1., 2., 3.],
                [4., 5., 6.]])

print(A.shape)
A = A.reshape(3, 2)
print(A.shape)

x = mge.tensor([[1., 3., 5.],
                [2., 4., 6.]])
w = mge.tensor([[1., 2.],
                [3., 4.],
                [5., 6.]])

p = F.matmul(x, w)
print(p)
Пример #14
0
 def fwd(x, y):
     return F.matmul(x, y)
Пример #15
0
 def fwd(data1, data2):
     return F.matmul(data1, data2)
Пример #16
0
 def forward(self, embedding):
     w = F.normalize(self.weight, axis=1)
     x = embedding  # embedding has been normalized already
     logits = F.matmul(x, w.transpose(1, 0))
     return logits
Пример #17
0
 def func(a, b):
     return F.matmul(a, b)
Пример #18
0
 def forward(self, x):
     x = F.matmul(x, self.linear_weight, transpose_b=self.transpose)
     x = self.bn(x)
     return x
Пример #19
0
    def forward(self, features, label=None, mask=None):
        """
        if label and mask both None, the loss will degenerate to
        SimSLR unsupervised loss.
        Reference:
            "A Simple Framework for Contrastive Learning of Visual Representations"<https://arxiv.org/pdf/2002.05709.pdf>
            "Supervised Contrastive Learning"<https://arxiv.org/abs/2004.11362>
        Args:
            features(tensor): The embedding feature. shape=[bs, n_views, ...]
            label(tensor): The label of images, shape=[bs]
            mask(tensor): contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
                has the same class as sample i. Can be asymmetric.
        return:
            loss
        """
        if len(features.shape) < 3:
            raise ValueError("Features need have 3 dimensions at least")
        bs, num_view = features.shape[:2]
        #if dimension > 3, change the shape of the features to [bs, num_view, ...]
        if len(features.shape) > 3:
            features = features.reshape(bs, num_view, -1)

        #label and mask cannot provided at the same time
        if (label is not None) and (mask is not None):
            raise ValueError("label and mask cannot provided at the same time")
        elif (label is None) and (mask is None):
            mask = F.eye(bs, dtype="float32")
        elif label is not None:
            label = label.reshape(-1, 1)
            if label.shape[0] != bs:
                raise RuntimeError(
                    "Num of labels does not match num of features")
            mask = F.equal(label, label.T)
        else:
            mask = mask.astype("float32")

        contrast_count = features.shape[1]
        features = F.split(features, features.shape[1], axis=1)
        contrast_feature = F.squeeze(F.concat(features, axis=0), axis=1)
        if self.contrast_mode == "one":
            anchor_feature = features[:, 0]
            anchor_count = 1
        elif self.contrast_mode == "all":
            anchor_feature = contrast_feature
            anchor_count = contrast_count
        else:
            raise ValueError("Unknown mode:{}".format(self.contrast_mode))
        #compute logits
        anchor_dot_contrast = F.div(
            F.matmul(anchor_feature, contrast_feature.T), self.temperate)

        #for numerical stability
        logits_max = F.max(anchor_dot_contrast, axis=-1, keepdims=True)
        logits = anchor_dot_contrast - logits_max

        #tile mask
        an1, con = mask.shape[:2]
        nums = anchor_count * contrast_count
        # mask-out self-contrast cases
        mask = F.stack([mask] * nums).reshape(an1 * anchor_count,
                                              con * contrast_count)
        logits_mask = F.scatter(
            F.ones_like(mask), 1,
            F.arange(0, int(bs * anchor_count), dtype="int32").reshape(-1, 1),
            F.zeros(int(bs * anchor_count), dtype="int32").reshape(-1, 1))
        mask = mask * logits_mask
        #compute log_prob
        exp_logits = F.exp(logits) * logits_mask
        log_prob = logits - F.log(F.sum(exp_logits, axis=1,
                                        keepdims=True))  #equation 2

        #mean
        mean_log_prob_pos = F.sum(mask * log_prob, axis=1) / F.sum(mask,
                                                                   axis=1)

        #loss
        loss = -(self.temperate / self.base_temperate) * mean_log_prob_pos
        loss = F.mean(loss.reshape(anchor_count, bs))
        return loss
Пример #20
0
 def f(x):
     return F.dot(u, F.matmul(x, v))