def main(argv): args = get_args(argv) print(args) policy_mod = import_module(args.policy_source) policy_class = getattr(policy_mod, 'DaggerPolicy') policy = policy_class(**vars(args)) dagger = Dagger(None, policy, **vars(args)) # dagger.learn_all_samples(save_file_name="dagger_block_world", load_file_name="dagger_block_world") dagger.learn_all_samples()
def main(argv): args = get_args(argv) x = env_args(args) print(x) env = make(**vars(x)) policy_mod = import_module(args.policy_source) policy_class = getattr(policy_mod, 'DaggerPolicy') policy = policy_class(**vars(args)) dagger = Dagger(env, policy, **vars(args)) dagger.test(args.save_file_name) env.close()
def main(argv): args = get_args(argv) policy_mod = import_module(args.policy_source) policy_class = getattr(policy_mod, 'DaggerPolicy') policy = policy_class(**vars(args)) x = env_args(args) env = make(**vars(x)) args.iterations = 0 args.num_rollouts = 250 print(args.num_rollouts) dagger = Dagger(env, policy, **vars(args)) dagger.num_probes = 0 dagger.explore_only() env.close()
def main(argv): parser = argparse.ArgumentParser(description='block_world') parser.add_argument('--greedy', dest='greedy', action='store_true') parser.add_argument('--no-greedy', dest='greedy', action='store_false') parser.add_argument('--dims', type=int, default=3) parser.add_argument('--span', type=int, default=10) parser.add_argument('--max-timesteps', type=int, default=2000000) parser.add_argument('--exploration_fraction', type=float, default=0.1) parser.add_argument('--exploration_final_eps', type=float, default=0.01) parser.add_argument('--l2-penalty', type=float, default=None) parser.add_argument('--continous-actions', dest='continous_actions', action='store_true') parser.add_argument('--no-continous-actions', dest='continous_actions', action='store_false') parser.add_argument('--reach-minimum', type=float, default=0.1) parser.set_defaults(greedy=False) parser.set_defaults(continous_actions=False) cmd_args = parser.parse_args(argv) print(cmd_args) env = make(span=cmd_args.span, dims=cmd_args.dims, greedy=cmd_args.greedy, l2_penalty=cmd_args.l2_penalty, continous_actions=cmd_args.continous_actions, reach_minimum=cmd_args.reach_minimum) dagger = Dagger(env, DaggerPolicy, num_rollouts=25, train_batch_size=25, train_epochs=20, iterations=20, dir_name='tmp_storage') dagger.learn(save_file_name="dagger_dist_world") env.close()
def check_keydown_events(event, ai_settings, screen, player, daggers, game_state, screen_state, shop_menu): # Respond to key presses if game_state.get_state() == GS.VICTORY: # TODO: add victory closing animation here screen_state.set_state(ScS.FADE_OUT) elif game_state.get_state() == GS.INVASION: if event.key == pygame.K_UP or event.key == pygame.K_w: player.moving_up = True # player.sprite_loop = player.moving_up_sprite # debug print('moving up') if event.key == pygame.K_DOWN or event.key == pygame.K_s: player.moving_down = True # player.sprite_loop = player.moving_down_sprite # debug print('moving down') if event.key == pygame.K_LEFT or event.key == pygame.K_a: player.moving_left = True # player.sprite_loop = player.moving_left_sprite # debug print('moving left') if event.key == pygame.K_RIGHT or event.key == pygame.K_d: player.moving_right = True # player.sprite_loop = player.moving_right_sprite # debug print('moving right') if event.key == pygame.K_SPACE: # Create a new dagger and add it to the daggers group if len(daggers) < ai_settings.daggers_allowed: new_dagger = Dagger(ai_settings, screen, player) daggers.add(new_dagger) if event.key == pygame.K_BACKQUOTE: ai_settings.dagger_height = 300 elif game_state.get_state() == GS.SHOP: if event.key == pygame.K_RETURN: shop_menu.get_selections()[ shop_menu.get_current_selection()].select() else: if event.key == pygame.K_UP or event.key == pygame.K_w: shop_menu.update_selection_rev() if event.key == pygame.K_DOWN or event.key == pygame.K_s: shop_menu.update_selection()
def learn(env, policy): dagger = Dagger(env) dagger.learn(policy)
if t == 0: il_analysis.count_states(sup.get_states()) il_analysis.save_states("comparisons/comparisons/net_classic_il.png") il_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards = grid.reward_states, sinks = grid.sink_states, filename='comparisons/comparisons/net_classic_il_state_action.png') classic_il_data[t,:] = np.zeros(ITER) + r # DAGGER dagger_data = np.zeros((TRIALS, ITER)) dagger_train, dagger_test = np.zeros((TRIALS, ITER)), np.zeros((TRIALS, ITER)) dagger_analysis = Analysis(H, W, ITER, rewards = grid.reward_states, sinks=grid.sink_states, desc="Dagger's policy progression") for t in range(TRIALS): mdp.load_policy() dagger = Dagger(grid, mdp) dagger.rollout() r_D = np.zeros(ITER) dagger_test_acc = np.zeros(ITER) dagger_train_acc = np.zeros(ITER) for _ in range(ITER): print "Dagger iteration:", _ dagger.retrain() dagger_train_acc[_], dagger_test_acc[_] = dagger.net.return_stats() iteration_states = [] for i in range(SAMP): dagger.rollout() iteration_states += dagger.get_recent_rollout_states().tolist() r_D[_] = r_D[_]+dagger.get_reward() / SAMP if _ == ITER - 1 and t == 0:
def main(argv): parser = argparse.ArgumentParser(description='block_world') parser.add_argument('--dims', type=int, default=3) parser.add_argument('--span', type=int, default=10) parser.add_argument('--episodes', type=int, default=100) parser.add_argument('--run', dest='run', action='store_true') parser.add_argument('--no-run', dest='run', action='store_false') parser.add_argument('--vis', dest='vis', action='store_true') parser.add_argument('--no-vis', dest='vis', action='store_false') parser.set_defaults(vis=False, run=True) cmd_args = parser.parse_args(argv) print(cmd_args) np.random.seed() env = make(span=cmd_args.span, dims=cmd_args.dims) dagger = Dagger(env) with tf.Session() as sess: dagger.test(DaggerPolicy,"dagger_dist_world") if cmd_args.vis: block_env = make_block_env(run=cmd_args.run) y_origin = 0.2 + cmd_args.span * 0.1 block_env.set_params(tray_length=2. * float(cmd_args.span) * 0.1, tray_width=2. * float(cmd_args.span) * 0.1, tray_height=0.1, rim_height=0.05, rim_width=0.05) total_reward = 0 def shift_y(pos): shifted = np.copy(pos) shifted[1] += y_origin return shifted for _ in range(cmd_args.episodes): obs, done = env.reset(), False if cmd_args.vis: block_env.clear_tray() pos = env.finger_pos.copy() pos[1] += y_origin block_env.set_finger(shift_y(env.finger_pos.astype(float) * 0.1)) block_env.set_target(shift_y(env.target_pos.astype(float) * 0.1)) episode_rew = 0 while not done: env.render() action = dagger.eval_policy(obs)[0] obs, rew, done, _ = env.step(action) episode_rew += rew if cmd_args.vis: action = env.map_discrete_action(action) block_env.move_finger(action.astype(float) * 0.1) print("Episode reward", episode_rew) total_reward += episode_rew print("average reward " + str(float(total_reward)/float(cmd_args.episodes))) env.close() if cmd_args.vis: block_env.close()