print("[MESSAGE] Model built.") # training schedule reward_sum = 0 running_reward = None episode_number = 0 xs, dlogps, drs, probs = [], [], [], [] train_X, train_Y = [], [] num_victory = 0 # go through entire game space while True: for game_idx in xrange(num_train): for start_pos in [start_tot[game_idx][0]]: game = grid.Grid(data[game_idx], value[game_idx], imsize, start_pos, is_po=False) # until the game is failed while True: # game_state = game.get_state() # plt.subplot(1, 3, 1) # plt.imshow(game_state[0, 0], cmap="gray") # plt.subplot(1, 3, 2) # plt.imshow(game_state[0, 1], cmap="gray") # plt.subplot(1, 3, 3) # plt.imshow(game_state[0, 2], cmap="gray") # plt.show() # print (game_state[0, 0]) # compute probability aprob = model.predict(game.get_state()).flatten()
agent = DQN() total_win = 0. total_games = 0. for game_idx in xrange(num_train): for start_idx in range(len(start_tot[game_idx])): print(start_tot[game_idx]) start_pos = start_tot[game_idx][start_idx] curr_win = 0 start_pos_flag = True for episode in xrange(EPISODE): total_games += 1. print ("\nThis is game %d, start position %d, %s" % (game_idx + 1, start_idx + 1, map(str,start_pos))) print ("Start position is marked 33; Goal is marked 99; Other index indicate the latest step number; 1 is free space; 0 is obstacle.") game = grid.Grid(data[game_idx], value[game_idx], imsize, start_pos = start_pos, is_po=True) if start_pos_flag: if start_pos != game.pos_history[0]: start_pos = game.pos_history[0] print (game.pos_history[0]) start_pos_flag = False # Train for step in xrange(STEP): #update game state if step == 0: tmp_value_pos = value[game_idx].copy() tmp_value_pos.reshape(dim, dim)[game.curr_pos] = 1 state = np.array([game.curr_map.ravel(), tmp_value_pos]).transpose() else: state = next_state # get next action from current state
config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) K.set_session(sess) actor = ActorNet(sess, state_dim, action_dim, batch_size, tau, lra) critic = CriticNet(sess, state_dim, action_dim, batch_size, tau, lrc) buff = ReplayBuffer(buffer_size) # the game loop for game_idx in xrange(episode_count): print("Episode : " + str(game_idx) + " Replay Buffer " + str(buff.count())) for start_pos in [start_tot[0][0]]: # start game game = grid.Grid(data[0], value[0], imsize, start_pos, is_po=False) done = False s_t = game.get_state() s_t = s_t.transpose((0, 2, 3, 1)) total_reward = 0. while True: # plt.subplot(1, 3, 1) # plt.imshow(s_t[0, :, :, 0], cmap="gray") # plt.subplot(1, 3, 2) # plt.imshow(s_t[0, :, :, 1], cmap="gray") # plt.show() loss = 0 epsilon -= 1.0 / explore