def call(self, x, params, state, **kwargs): del kwargs seqlen = x.shape[1] d_head = x.shape[2] x = np.reshape(x, (-1, self._n_heads, seqlen, d_head)) x = np.transpose(x, (0, 2, 1, 3)) # -> n_batch, seqlen, n_heads, d_head x = np.reshape(x, (-1, seqlen, self._n_heads * d_head)) return np.dot(x, params), state
def call(self, x, params, state, **kwargs): del kwargs seqlen = x.shape[1] res = np.dot(x, params) # n_batch, seqlen, n_heads*d_head -> n_batch, seqlen, n_heads, d_head res = np.reshape(res, (x.shape[0], seqlen, self._n_heads, self._d_head)) # n_batch, seqlen, n_heads, d_head -> n_batch, n_heads, seqlen, d_head res = np.transpose(res, (0, 2, 1, 3)) # n_batch, n_heads, seqlen, d_head -> n_batch*n_heads, seqlen, d_head res = np.reshape(res, (-1, seqlen, self._d_head)) return res, state
def JoinHeads(x): # pylint: disable=invalid-name return np.reshape(np.transpose(x, (0, 2, 1, 3)), (nbatch, -1, n_heads * d_head))
def SplitHeads(x): return np.transpose(np.reshape(x, (nbatch, -1, n_heads, d_head)), (0, 2, 1, 3))
def JoinHeads(x, params, **kwargs): del params, kwargs n_batch = np.shape(x)[0] seqlen = np.shape(x)[2] # n_batch, n_heads, seqlen, d_head --> n_batch, seqlen, d_model return np.reshape(np.transpose(x, (0, 2, 1, 3)), (n_batch, seqlen, -1))
def SplitHeads(x): return np.transpose( np.reshape(x, (nbatch, -1, num_heads, head_depth)), (0, 2, 1, 3))
def join_heads(x): return np.reshape(np.transpose(x, (0, 2, 1, 3)), (nbatch, -1, num_heads * head_depth))