def decode(self, th_b, hidden, train): ht_b = dy.transpose(th_b) T = ht_b.dim()[0][1] ht_b = self.proj_to_dsz(ht_b) mask = subsequent_mask(T) output = self.transformer(ht_b, mask, train) output = [out for out in dy.transpose(output)] return output, None
def decode(self, th_b, hidden, train): ht_b = dy.transpose(th_b) T = ht_b.dim()[0][1] ht_b = self.proj_to_dsz(ht_b) mask = subsequent_mask(T) output = self.transformer(ht_b, mask, train) output = [out for out in dy.transpose(output)] return output, None
def attn_values_sub_mask(attn, qkv): q, k, v = qkv ((_, T, H), B) = q.dim() q = dy.zeros(q.dim()[0], batch_size=q.dim()[1]) mask = subsequent_mask(T) res = attn(q, k, v, mask=mask).npvalue() gold = v.npvalue() for b in range(B): for h in range(H): for t in range(T): np.testing.assert_allclose(res[:, t, h, b], np.mean(gold[:, :t+1, :, :], axis=1)[:, h, b], atol=1e-5)
def __call__(self, encoder_output, dst, train): embed_out_th_b = self.tgt_embedding.encode(dst) embed_out_ht_b = dy.transpose(embed_out_th_b) embed_out_ht_b = self.proj_to_hsz(embed_out_ht_b) context = dy.concatenate_cols(encoder_output.output) T = embed_out_ht_b.dim()[0][1] dst_mask = subsequent_mask(T) src_mask = encoder_output.src_mask output = self.transformer_decoder(embed_out_ht_b, context, src_mask, dst_mask, train) output = self.proj_to_dsz(output) return self.output(output)
def test_subsequent_mask_valid_loc(): T = np.random.randint(4, 100) mask = subsequent_mask(T)[0].npvalue().squeeze() def test(T, mask): i, j = np.random.randint(0, T, size=2) if i > j: assert mask[i, j] == 0 else: assert mask[i, j] == 1 for _ in range(100): test(T, mask)
def test_subsequent_mask_valid_loc(): T = np.random.randint(4, 100) mask = subsequent_mask(T)[0].npvalue().squeeze() def test(T, mask): i, j = np.random.randint(0, T, size=2) if i > j: assert mask[i, j] == 0 else: assert mask[i, j] == 1 for _ in range(100): test(T, mask)
def attn_values_sub_mask(attn, qkv): q, k, v = qkv ((_, T, H), B) = q.dim() q = dy.zeros(q.dim()[0], batch_size=q.dim()[1]) mask = subsequent_mask(T) res = attn(q, k, v, mask=mask).npvalue() gold = v.npvalue() for b in range(B): for h in range(H): for t in range(T): np.testing.assert_allclose(res[:, t, h, b], np.mean(gold[:, :t + 1, :, :], axis=1)[:, h, b], atol=1e-5)
def test_subsequent_mask_valid_count(): T = np.random.randint(4, 50) gold = (T * (T + 1)) / 2 masks = subsequent_mask(T) mask = masks[0].npvalue() assert np.sum(mask) == gold
def test_subsequent_mask_shape(): T = np.random.randint(2, 50) gold = ((T, T, 1), 1) masks = subsequent_mask(T) for mask in masks: assert mask.dim() == gold
def test_subsequent_mask_valid_count(): T = np.random.randint(4, 50) gold = (T * (T + 1)) / 2 masks = subsequent_mask(T) mask = masks[0].npvalue() assert np.sum(mask) == gold
def test_subsequent_mask_shape(): T = np.random.randint(2, 50) gold = ((T, T, 1), 1) masks = subsequent_mask(T) for mask in masks: assert mask.dim() == gold