示例#1
0
def start_nn():
    print("--------- 1.load data ------------")
    feature, label, n_output = load_data("data.txt")
    print("--------- 2.training ------------")
    center, delta, w = bp_train(feature, label, 20, 5000, 0.008, n_output)
    print("--------- 3.get prediction ------------")
    result = get_predict(feature, center, delta, w)
    # print("result:", (1 - err_rate(label, result)))
    print("--------- 4.save model and result ------------")
    save_model_result(center, delta, w, result)
    return jsonify({"res": "success"})
示例#2
0
    alpha = min_lr
    steps = 0
    center = mat(np.random.rand(n_hidden, n))
    delta = mat(np.random.rand(1, n_hidden))
    w = mat(np.random.rand(n_hidden, n_output))
    while steps <= 3000:
        env.render()
        pos, vel = discretization(env, obs)
        state = normalize(np.array([pos, vel]).reshape(1, 2),
                          axis=1,
                          norm='max')
        state = np.matrix(state)
        a = get_action(state)
        obs, reward, terminate, _ = env.step(a)
        total_reward += abs(obs[0] + 0.5)
        pos_, vel_ = discretization(env, obs)
        q_table[pos][vel][a] = (1 - alpha) * q_table[pos][vel][a] + alpha * (
            reward + gamma * np.max(q_table[pos_][vel_]))
        center, delta, w, loss = bp_train(state, np.matrix(q_table[pos][vel]),
                                          n_hidden, 1, 0.005, 3, center, delta,
                                          w)
        steps += 1
        if terminate:
            print("Finished after: " + str(episode) + " steps" + str(steps))
            print("Cumulated Reward: " + str(total_reward))
            print("Complete!")
            break

while True:
    env.render()
    steps = 0
    center = mat(np.random.rand(n_hidden, n))
    delta = mat(np.random.rand(1, n_hidden))
    w = mat(np.random.rand(n_hidden, n_output))
    while steps <= 3000:
        env.render()
        #pos, vel = obs[0],obs[1]
        state1 = normalize_state(obs)
        state1 = np.matrix(state1)
        a = get_action(state1)
        #print('action',a)
        obs, reward, terminate, _ = env.step(a)
        total_reward += abs(obs[0] + 0.5)
        state2 = normalize_state(obs)
        state2 = np.matrix(state2)
        predict = get_predict(state2, center, delta, w)
        target[0][a] = (1 - alpha) * target[0][a] + alpha * (
            reward + gamma * np.max(predict))

        center, delta, w, loss = bp_train(state1, np.matrix(target), n_hidden,
                                          1, 0.01, 3, center, delta, w)
        steps += 1
        if terminate:
            print("Finished after: " + str(episode) + " steps" + str(steps))
            print("Cumulated Reward: " + str(total_reward))
            print("Complete!")
            break

#while True:
#env.render()