def backward(self, x, d, z1, z2, y): grad = {} W1, W2, W3 = network['W1'], network['W2'], network['W3'] b1, b2, b3 = network['b1'], network['b2'], network['b3'] # 出力層でのデルタ delta3 = functions.d_softmax_with_loss(d, y) # b3の勾配 grad['b3'] = np.sum(delta3, axis=0) # W3の勾配 grad['W3'] = np.dot(z2.T, delta3) # 活性化関数の導関数 Relu関数 delta2 = np.dot(delta3, W3.T) * functions.d_sigmoid(z2) # b2の勾配 grad['b2'] = np.sum(delta2, axis=0) # W2の勾配 grad['W2'] = np.dot(z1.T, delta2) # 活性化関数の導関数 Relu関数 delta1 = np.dot(delta2, W2.T) * functions.d_sigmoid(z1) # b1の勾配 grad['b1'] = np.sum(delta1, axis=0) # W1の勾配 grad['W1'] = np.dot(x.T, delta1) return grad
def backward(x, d, z1, y): # print("\n##### 誤差逆伝播開始 #####") grad = {} W1, W2 = network['W1'], network['W2'] b1, b2 = network['b1'], network['b2'] # 出力層でのデルタ delta2 = functions.d_mean_squared_error(d, y) # b2の勾配 grad['b2'] = np.sum(delta2, axis=0) # W2の勾配 grad['W2'] = np.dot(z1.T, delta2) # 中間層でのデルタ #delta1 = np.dot(delta2, W2.T) * functions.d_relu(z1) ## 試してみよう delta1 = np.dot(delta2, W2.T) * functions.d_sigmoid(z1) delta1 = delta1[np.newaxis, :] # b1の勾配 grad['b1'] = np.sum(delta1, axis=0) x = x[np.newaxis, :] # W1の勾配 grad['W1'] = np.dot(x.T, delta1) # print_vec("偏微分_重み1", grad["W1"]) # print_vec("偏微分_重み2", grad["W2"]) # print_vec("偏微分_バイアス1", grad["b1"]) # print_vec("偏微分_バイアス2", grad["b2"]) return grad
def loss_margin_with_gradient(self, x, t): features = self.__extract_feature(x) l = self.loss(x, t) # gradient score = [] for name, value in features.items(): score.append(value * self.params[name]) dW = d_sigmoid(sum(score)) for name, value in features.items(): features[name] = t * value * dW # margin m = sum(score) * t return l, m, features
# 入力値 X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1) # 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:, t + 1] = np.dot(X, W_in) + np.dot(z[:, t].reshape(1, -1), W) z[:, t + 1] = functions.sigmoid(u[:, t + 1]) # z[:,t+1] = functions.relu(u[:,t+1]) # z[:,t+1] = np.tanh(u[:,t+1]) y[:, t] = functions.sigmoid(np.dot(z[:, t + 1].reshape(1, -1), W_out)) #誤差 loss = functions.least_square(dd, y[:, t]) delta_out[:, t] = functions.d_least_square( dd, y[:, t]) * functions.d_sigmoid(y[:, t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:, t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1) delta[:, t] = (np.dot(delta[:, t + 1].T, W.T) + np.dot( delta_out[:, t].T, W_out.T)) * functions.d_sigmoid(u[:, t + 1]) # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_relu(u[:,t+1]) # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * d_tanh(u[:,t+1]) # 勾配更新 W_out_grad += np.dot(z[:, t + 1].reshape(-1, 1),
# 時系列ループ for t in range(binary_dim): # 入力値 X = np.array([a_bin[ - t - 1], b_bin[ - t - 1]]).reshape(1, -1) # 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:,t+1] = np.dot(X, W_in) + np.dot(z[:,t].reshape(1, -1), W) z[:,t+1] = functions.sigmoid(u[:,t+1]) y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out)) #誤差 loss = functions.mean_squared_error(dd, y[:,t]) delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:,t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t-1],b_bin[-t-1]]).reshape(1, -1) delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1]) # 勾配更新 W_out_grad += np.dot(z[:,t+1].reshape(-1,1), delta_out[:,t].reshape(-1,1)) W_grad += np.dot(z[:,t].reshape(-1,1), delta[:,t].reshape(1,-1)) W_in_grad += np.dot(X.T, delta[:,t].reshape(1,-1))
# 入力値 X = np.array([a_bin[ - t - 1], b_bin[ - t - 1]]).reshape(1, -1) # 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:,t+1] = np.dot(X, W_in) + np.dot(z[:,t].reshape(1, -1), W) # 活性化関数を変更してみよう -> tanh # z[:,t+1] = functions.sigmoid(u[:,t+1]) z [:, t + 1] = np.tanh (u [:, t + 1]) y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out)) #誤差 loss = functions.mean_squared_error(dd, y[:,t]) delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:,t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t-1],b_bin[-t-1]]).reshape(1, -1) # 活性化関数を変更してみよう -> tanh ,d_sigmoid-> d_tanh # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1]) delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * d_tanh(u[:,t+1]) # 勾配更新 W_out_grad += np.dot(z[:,t+1].reshape(-1,1), delta_out[:,t].reshape(-1,1)) W_grad += np.dot(z[:,t].reshape(-1,1), delta[:,t].reshape(1,-1)) W_in_grad += np.dot(X.T, delta[:,t].reshape(1,-1))
X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1) # 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:, t + 1] = np.dot(X, W_in) + np.dot(z[:, t].reshape(1, -1), W) # 活性化関数を変更してみよう -> tanh # z[:,t+1] = functions.sigmoid(u[:,t+1]) z[:, t + 1] = np.tanh(u[:, t + 1]) y[:, t] = functions.sigmoid(np.dot(z[:, t + 1].reshape(1, -1), W_out)) #誤差 loss = functions.mean_squared_error(dd, y[:, t]) delta_out[:, t] = functions.d_mean_squared_error( dd, y[:, t]) * functions.d_sigmoid(y[:, t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:, t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1) # 活性化関数を変更してみよう -> tanh ,d_sigmoid-> d_tanh # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1]) delta[:, t] = (np.dot(delta[:, t + 1].T, W.T) + np.dot(delta_out[:, t].T, W_out.T)) * d_tanh(u[:, t + 1]) # 勾配更新 W_out_grad += np.dot(z[:, t + 1].reshape(-1, 1),