def softmax(x): """ Softmax function. Score to Probability. Returns float [0.0, 1.0] >>> x = np.array([0]) >>> softmax(x) array([1.]) >>> x = np.array([0, 0]) >>> softmax(x) array([0.5, 0.5]) >>> x = np.array([[0, 0], [0, 0]]) >>> softmax(x) array([[0.5, 0.5], [0.5, 0.5]]) """ if x.ndim == 2: x = x - x.max(axis=1, keepdims=True) x = np.exp(x) x = x / x.sum(axis=1, keepdims=True) elif x.ndim == 1: x = x - np.max(x) x = np.exp(x) / np.sum(np.exp(x)) return x
def backward(self, dc): hs, ar = self.cache N, T, H = hs.shape dt = dc.reshape(N, 1, H).repeat(T, axis=1) dar = dt * hs dhs = dt * ar da = np.sum(dar, axis=2) return dhs, da
def backward(self, dout): W, _ = self.params dx = np.dot(dout, W.T) dW = np.dot(self.x.T, dout) db = np.sum(dout, axis=0) self.grads[0][...] = dW self.grads[1][...] = db return dx
def forward(self, hs, a): N, T, H = hs.shape ar = a.reshape(N, T, 1) t = hs * ar c = np.sum(t, axis=1) self.cache = (hs, ar) return c
def softmax(x): if x.ndim == 2: x = x - x.max(axis=1, keepdims=True) x = np.exp(x) x /= x.sum(axis=1, keepdims=True) elif x.ndim == 1: x = x - np.max(x) x = np.exp(x) / np.sum(np.exp(x)) return x
def forward(self, hs, h): N, T, H = hs.shape hr = h.reshape(N, 1, H) t = hs * hr s = np.sum(t, axis=2) a = self.softmax.forward(s) self.cache = (hs, hr) return a
def clip_grads(grads, max_norm): total_norm = 0 for grad in grads: total_norm += np.sum(grad**2) total_norm = np.sqrt(total_norm) rate = max_norm / (total_norm + 1e-6) if rate < 1: for grad in grads: grad *= rate
def backward(self, da): hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) return dhs, dh
def backward(self, dscore): H = self.cache dout = self.affine.backward(dscore) dout, dhs0 = dout[:, :, H:], dout[:, :, :H] dout = self.lstm.backward(dout) dembed, dhs1 = dout[:, :, H:], dout[:, :, :H] self.embed.backward(dembed) dhs = dhs0 + dhs1 dh = self.lstm.dh + np.sum(dhs, axis=1) return dh
def cross_entropy_error(y, t): if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師データがone-hot-vectorの場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def backward(self, dh_next): Wx, Wh, _ = self.params x, h_prev, h_next = self.cache dt = dh_next * (1 - h_next**2) db = np.sum(dt, axis=0) dWh = np.dot(h_prev.T, dt) dh_prev = np.dot(dt, Wh.T) dWx = np.dot(x.T, dt) dx = np.dot(dt, Wx.T) self.grads[0][...] = dWx self.grads[1][...] = dWh self.grads[2][...] = db return dx, dh_prev
def backward(self, dout): x = self.x N, T, D = x.shape W, b = self.params dout = dout.reshape(N * T, -1) rx = x.reshape(N * T, -1) db = np.sum(dout, axis=0) dW = np.dot(rx.T, dout) dx = np.dot(dout, W.T) dx = dx.reshape(*x.shape) self.grads[0][...] = dW self.grads[1][...] = db return dx
def cross_entropy_error(y, t): """ Cross entropy error. One of the loss functions. """ if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # convert to index of correct label when training data is one-hot-vector # (教師データが one-hot-vector の場合、正解ラベルのインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
def __init__(self, corpus, power, sample_size): self.sample_size = sample_size self.vocab_size = None self.word_p = None counts = Counter() for word_id in corpus: counts[word_id] += 1 vocab_size = len(counts) self.vocab_size = vocab_size self.word_p = np.zeros(vocab_size) for i in range(vocab_size): self.word_p[i] = counts[i] self.word_p = np.power(self.word_p, power) self.word_p /= np.sum(self.word_p)
def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: ts = ts.argmax(asix=2) mask = (ts != self.ignore_label) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) ls *= mask loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def forward(self, xs, ts): N, T, V = xs.shape if ts.ndim == 3: # 教師ラベルがone-hotベクトルの場合 ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) # バッチ分と時系列分をまとめる(reshape) xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) ls *= mask # ignore_labelに該当するデータは損失を0にする loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def forward(self, xs, ts): N, T, V = xs.shape # when train-label is one-hot vector if ts.ndim == 3: ts = ts.argmax(axis=2) mask = (ts != self.ignore_label) # reshape batch with recurrent xs = xs.reshape(N * T, V) ts = ts.reshape(N * T) mask = mask.reshape(N * T) ys = softmax(xs) ls = np.log(ys[np.arange(N * T), ts]) # when data is ignore_label, loss is 0 ls *= mask loss = -np.sum(ls) loss /= mask.sum() self.cache = (ts, ys, mask, (N, T, V)) return loss
def forward(self, h, idx): target_W = self.embed.forward(idx) out = np.sum(target_W * h, axis=1) # 内積を計算 self.cache = (h, target_W) return out
def cos_similarity(x, y): nx = x / (numpy.sqrt(np.sum(x**2)) + 1e-8) ny = y / (numpy.sqrt(np.sum(y**2)) + 1e-8) return np.dot(nx, ny)
def backward(self, dout): dx = self.out * dout sumdx = np.sum(dx, axis=1, keepdims=True) dx -= self.out * sumdx return dx