Exemplo n.º 1
0
    game = GAME()
    game.set_timer(100)
    network = Network(n_output, lr_rate)
    agent = Agent(network, gamma, batch_size, n_action, input_dims, mem_size)

    scores = []
    avg_rewards = []

    score_max = 0
    n_total = 0

    for n in range(n_games):
        if not n == 0:
            scores.append(score)
            avg_rewards.append(total_reward / n_moves)
            agent.learn(n_total, n_total + n_moves)
        score = 0
        game.score = 0
        total_reward = 0
        n_moves = 0
        n_total = n_moves+n_total
        done = False
        observation = game.get_states()
        while not done:
            action = agent.choose_action(observation)
            if n < -1:
                game.draw()
                action = int(msvcrt.getch().decode("utf-8"))

            observation_, reward, done, score = game.spin_once(action, n_moves)
            agent.store_data(observation, action, reward, observation_, done)
Exemplo n.º 2
0
score_record = []
score_record_step = []
count_record = []
count_record_step = []
time_record = []
time_record_step = []
for i in range(800):
    done = False
    score = 0
    obs = env.get_init_state()
    # 没分配完
    while not done:
        act = MECSnet.choose_action(obs)
        new_state, reward, done = env.step(act)
        MECSnet.remember(obs, act, reward, new_state, int(done))
        MECSnet.learn()
        score += reward
        obs = new_state
        # print('reward is: {}'.format(reward))

    # 本轮的reward追加到list中
    score_record.append(score)
    # print('episode ', i, 'score %.2f' % score, '100 game average %.2f' % np.mean(score[-100:]))
    print('episode ', i, 'score %.2f' % score, "    wrong: ", env.count_wrong)
    count_record.append(1 - env.count_wrong / num_task)
    time_record.append(env.time)
    if i % 25 == 0:
        # UEnet.save_models()
        MECSnet.save_models()
        score_record_step.append(np.mean(score_record))
        count_record_step.append(np.mean(count_record))