print("[MESSAGE] Model built.")

# training schedule
reward_sum = 0
running_reward = None
episode_number = 0
xs, dlogps, drs, probs = [], [], [], []
train_X, train_Y = [], []
num_victory = 0
# go through entire game space
while True:
    for game_idx in xrange(num_train):
        for start_pos in [start_tot[game_idx][0]]:
            game = grid.Grid(data[game_idx],
                             value[game_idx],
                             imsize,
                             start_pos,
                             is_po=False)
            # until the game is failed
            while True:
                #  game_state = game.get_state()
                #  plt.subplot(1, 3, 1)
                #  plt.imshow(game_state[0, 0], cmap="gray")
                #  plt.subplot(1, 3, 2)
                #  plt.imshow(game_state[0, 1], cmap="gray")
                #  plt.subplot(1, 3, 3)
                #  plt.imshow(game_state[0, 2], cmap="gray")
                #  plt.show()
                #  print (game_state[0, 0])
                # compute probability
                aprob = model.predict(game.get_state()).flatten()
agent = DQN()

total_win = 0.
total_games = 0.

for game_idx in xrange(num_train):
    for start_idx in range(len(start_tot[game_idx])):
        print(start_tot[game_idx])
        start_pos = start_tot[game_idx][start_idx]
        curr_win = 0
        start_pos_flag = True
        for episode in xrange(EPISODE):
            total_games += 1.
            print ("\nThis is game %d, start position %d, %s" % (game_idx + 1, start_idx + 1, map(str,start_pos)))
            print ("Start position is marked 33; Goal is marked 99; Other index indicate the latest step number; 1 is free space; 0 is obstacle.")
            game = grid.Grid(data[game_idx], value[game_idx], imsize, start_pos = start_pos, is_po=True)
            if start_pos_flag:
                if start_pos != game.pos_history[0]:
                    start_pos = game.pos_history[0]
                    print (game.pos_history[0])
                start_pos_flag = False
            # Train
            for step in xrange(STEP):
                #update game state
                if step == 0:
                    tmp_value_pos = value[game_idx].copy()
                    tmp_value_pos.reshape(dim, dim)[game.curr_pos] = 1
                    state = np.array([game.curr_map.ravel(), tmp_value_pos]).transpose()
                else:
                    state = next_state
                # get next action from current state
示例#3
0
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)

actor = ActorNet(sess, state_dim, action_dim, batch_size, tau, lra)
critic = CriticNet(sess, state_dim, action_dim, batch_size, tau, lrc)
buff = ReplayBuffer(buffer_size)

# the game loop
for game_idx in xrange(episode_count):
    print("Episode : " + str(game_idx) + " Replay Buffer " + str(buff.count()))
    for start_pos in [start_tot[0][0]]:
        # start game
        game = grid.Grid(data[0], value[0], imsize, start_pos, is_po=False)
        done = False

        s_t = game.get_state()
        s_t = s_t.transpose((0, 2, 3, 1))

        total_reward = 0.
        while True:
            #  plt.subplot(1, 3, 1)
            #  plt.imshow(s_t[0, :, :, 0], cmap="gray")
            #  plt.subplot(1, 3, 2)
            #  plt.imshow(s_t[0, :, :, 1], cmap="gray")
            #  plt.show()
            loss = 0
            epsilon -= 1.0 / explore