def backward(x, d, z1, y): # print("\n##### 誤差逆伝播開始 #####") grad = {} W1, W2 = network['W1'], network['W2'] b1, b2 = network['b1'], network['b2'] # 出力層でのデルタ delta2 = functions.d_mean_squared_error(d, y) # b2の勾配 grad['b2'] = np.sum(delta2, axis=0) # W2の勾配 grad['W2'] = np.dot(z1.T, delta2) # 中間層でのデルタ #delta1 = np.dot(delta2, W2.T) * functions.d_relu(z1) ## 試してみよう delta1 = np.dot(delta2, W2.T) * functions.d_sigmoid(z1) delta1 = delta1[np.newaxis, :] # b1の勾配 grad['b1'] = np.sum(delta1, axis=0) x = x[np.newaxis, :] # W1の勾配 grad['W1'] = np.dot(x.T, delta1) # print_vec("偏微分_重み1", grad["W1"]) # print_vec("偏微分_重み2", grad["W2"]) # print_vec("偏微分_バイアス1", grad["b1"]) # print_vec("偏微分_バイアス2", grad["b2"]) return grad
# 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:, t + 1] = np.dot(X, W_in) + np.dot(z[:, t].reshape(1, -1), W) # 中間層の活性化関数を変更してみよう # z[:,t+1] = functions.sigmoid(u[:,t+1]) z[:, t + 1] = functions.relu(u[:, t + 1]) # y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out)) y[:, t] = functions.relu(np.dot(z[:, t + 1].reshape(1, -1), W_out)) # 誤差 loss = functions.mean_squared_error(dd, y[:, t]) # delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t]) delta_out[:, t] = functions.d_mean_squared_error( dd, y[:, t]) * functions.d_relu(y[:, t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:, t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t - 1], b_bin[-t - 1]]).reshape(1, -1) # delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1]) delta[:, t] = (np.dot(delta[:, t + 1].T, W.T) + np.dot( delta_out[:, t].T, W_out.T)) * functions.d_relu(u[:, t + 1]) # 勾配更新 W_out_grad += np.dot(z[:, t + 1].reshape(-1, 1), delta_out[:, t].reshape(-1, 1))
# 時系列ループ for t in range(binary_dim): # 入力値 X = np.array([a_bin[ - t - 1], b_bin[ - t - 1]]).reshape(1, -1) # 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:,t+1] = np.dot(X, W_in) + np.dot(z[:,t].reshape(1, -1), W) z[:,t+1] = functions.sigmoid(u[:,t+1]) y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out)) #誤差 loss = functions.mean_squared_error(dd, y[:,t]) delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:,t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t-1],b_bin[-t-1]]).reshape(1, -1) delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1]) # 勾配更新 W_out_grad += np.dot(z[:,t+1].reshape(-1,1), delta_out[:,t].reshape(-1,1)) W_grad += np.dot(z[:,t].reshape(-1,1), delta[:,t].reshape(1,-1)) W_in_grad += np.dot(X.T, delta[:,t].reshape(1,-1))
# 時系列ループ for t in range(maxlen): # 入力値 x = xs[t] u = np.dot(x, W_in) + np.dot(z, W) us.append(u) z = np.tanh(u) zs.append(z) y = np.dot(z, W_out) #誤差 loss = functions.mean_squared_error(d, y) delta_out = functions.d_mean_squared_error(d, y) delta *= 0 for t in range(maxlen)[::-1]: delta = (np.dot(delta, W.T) + np.dot(delta_out, W_out.T)) * d_tanh( us[t]) # 勾配更新 W_grad += np.dot(zs[t].reshape(-1, 1), delta.reshape(1, -1)) W_in_grad += np.dot(xs[t], delta.reshape(1, -1)) W_out_grad = np.dot(z.reshape(-1, 1), delta_out) # 勾配適用 W -= learning_rate * W_grad W_in -= learning_rate * W_in_grad