def start_nn(): print("--------- 1.load data ------------") feature, label, n_output = load_data("data.txt") print("--------- 2.training ------------") center, delta, w = bp_train(feature, label, 20, 5000, 0.008, n_output) print("--------- 3.get prediction ------------") result = get_predict(feature, center, delta, w) # print("result:", (1 - err_rate(label, result))) print("--------- 4.save model and result ------------") save_model_result(center, delta, w, result) return jsonify({"res": "success"})
alpha = min_lr steps = 0 center = mat(np.random.rand(n_hidden, n)) delta = mat(np.random.rand(1, n_hidden)) w = mat(np.random.rand(n_hidden, n_output)) while steps <= 3000: env.render() pos, vel = discretization(env, obs) state = normalize(np.array([pos, vel]).reshape(1, 2), axis=1, norm='max') state = np.matrix(state) a = get_action(state) obs, reward, terminate, _ = env.step(a) total_reward += abs(obs[0] + 0.5) pos_, vel_ = discretization(env, obs) q_table[pos][vel][a] = (1 - alpha) * q_table[pos][vel][a] + alpha * ( reward + gamma * np.max(q_table[pos_][vel_])) center, delta, w, loss = bp_train(state, np.matrix(q_table[pos][vel]), n_hidden, 1, 0.005, 3, center, delta, w) steps += 1 if terminate: print("Finished after: " + str(episode) + " steps" + str(steps)) print("Cumulated Reward: " + str(total_reward)) print("Complete!") break while True: env.render()
steps = 0 center = mat(np.random.rand(n_hidden, n)) delta = mat(np.random.rand(1, n_hidden)) w = mat(np.random.rand(n_hidden, n_output)) while steps <= 3000: env.render() #pos, vel = obs[0],obs[1] state1 = normalize_state(obs) state1 = np.matrix(state1) a = get_action(state1) #print('action',a) obs, reward, terminate, _ = env.step(a) total_reward += abs(obs[0] + 0.5) state2 = normalize_state(obs) state2 = np.matrix(state2) predict = get_predict(state2, center, delta, w) target[0][a] = (1 - alpha) * target[0][a] + alpha * ( reward + gamma * np.max(predict)) center, delta, w, loss = bp_train(state1, np.matrix(target), n_hidden, 1, 0.01, 3, center, delta, w) steps += 1 if terminate: print("Finished after: " + str(episode) + " steps" + str(steps)) print("Cumulated Reward: " + str(total_reward)) print("Complete!") break #while True: #env.render()