Пример #1
0
  scores = []
  agent = ReinforceAgent(model, actions=ACTIONS, noise=EXPLORE_RATE)
  for _ in range(TEST_EPISODES_PER_EPOCH):
    env.reset()
    agent.reset()
    replay = []
    done = False
    while not done:
      action = agent.process(env.state)
      _, reward, done, prevState = env.apply(action)
      replay.append((prevState, action, reward))
    ##
    states, actions, rewards = zip( *replay )
    
    actions = ACTIONS.toIndex(actions)
    trainable.fit(
      [
        np.array(states),
        np.array(actions),
        np.array(discountedReturns(rewards, GAMMA))
      ],
      epochs=1, verbose=0
    )
    ##
    scores.append(env.score)

  Utils.trackScores(scores, metrics)
  ##################
  EXPLORE_RATE = max((0.001, EXPLORE_RATE * EXPLORE_RATE_DECAY))
  plotData2file(metrics, 'chart.jpg')
Пример #2
0
    print('[N = %d, M = %d] Start of %d epoch. Explore rate: %.3f' %
          (N, M, epoch, exploreRate))
    # train model
    model.updateTargetModel()
    losses = []
    for _ in range(TRAIN_EPISODES):
        states, actions, rewards, nextStates, nextStateScoreMultiplier = memory.sampleBatch(
            batch_size=BATCH_SIZE, maxSamplesFromEpisode=16)
        actions = ACTIONS.toIndex(actions)

        _, loss = model.fit(states, actions, rewards, nextStates,
                            nextStateScoreMultiplier * GAMMA)
        losses.append(loss)
        ######

    print('Avg. train loss: %.4f' % (np.mean(losses)))
    ##################
    print('Testing...')
    scores = Utils.testAgent(
        DQNAgent(model, actions=ACTIONS, exploreRate=exploreRate), memory,
        TEST_EPISODES)
    Utils.trackScores(scores, metrics)
    ##################
    if ((epoch % 10) == 0) and not COLAB_ENV:  # debug
        Utils.showAgentPlay(DQNAgent(model, actions=ACTIONS, exploreRate=0))
    ##################
    exploreRate = max((0.001, exploreRate * EXPLORE_RATE_DECAY))
    plotData2file(metrics, 'chart-%d-%d.jpg' % (
        N,
        M,
    ))