def main(): user_input_module = user_input.UserInputModule(is_asyn=False) env = gym.make('Assault-v0') for i_episode in range(20): observation = env.reset() for t in range(100): env.render() print(observation) human_action = user_input_module.getInput() if human_action == 'j': action = 0 elif human_action == 'f': action = 1 elif human_action == 'd': action = 2 elif human_action == 's': action = 3 elif human_action == 'a': action = 4 elif human_action == 'k': action = 5 else: action = 0 print(action) observation, reward, done, info = env.step(action) if done: print("Episode finished after {} time steps".format(t + 1)) break env.close()
def run_episodes(self): user_input_module = user_input.UserInputModule(is_asyn=True) env = gym.make('Assault-v0') human_actions = {} for i_episode in range(5): episode_log_dir = self.expr_log_dir + '/' + 'episode-' + str( i_episode) if not os.path.exists(episode_log_dir): os.makedirs(episode_log_dir) env.reset() for t in range(10000): env.render() env.env.ale.saveScreenPNG( six.b(episode_log_dir + '/' + str(t) + '.png')) human_action = user_input_module.getInput() if human_action is None: action = env.action_space.sample() else: print(human_action) human_actions[str(i_episode) + '_' + str(t)] = human_action if human_action == 'j': action = 0 elif human_action == 'f': action = 1 elif human_action == 'd': action = 2 elif human_action == 's': action = 3 elif human_action == 'a': action = 4 elif human_action == 'k': action = 5 else: action = 0 observation, reward, done, info = env.step(action) if done: print("Episode finished after {} time steps".format(t + 1)) break counter_saver.saveDictToFile( self.expr_log_dir + '/' + 'human_actions.txt', human_actions) env.close()
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0 , noise=0, epsilon=0.3, display_speed=0.5 , grid_size=150, text_only=False, n_episodes=100 , agent_window_size=1 , agent_max_n_experiences=1000 , check_policy_converge=False , optimal_policy=None , expr_log_dir=None , agent_type="qLearningAgent" , init_temp=1024.0 , temp_decrease_rate=2.0 , is_asyn_input=True): """ :param agent_type: "qLearningAgent" or "TamerAgent" or "preferenceTAMERAgent" """ ########################### # GENERAL CONTROL ########################### self.text_only = text_only self.display_speed = display_speed self.n_episodes = n_episodes self.discount = discount self.check_policy_converge = check_policy_converge self.optimal_policy = optimal_policy self.expr_log_dir = expr_log_dir self.save_VDBE = False ########################### # GET THE INPUT MODULE ########################### if agent_type == qlearningAgents.QLearningAgent.getAgentType(): self.user_input_module = None else: self.user_input_module = user_input.UserInputModule(is_asyn=is_asyn_input) ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # Variables used to store parameters values ########################### # init VDBE values records global VDBE_RECORDS VDBE_RECORDS = dict() for state in self.env.getGridWorld().getNonTerminalStates(): VDBE_RECORDS[state] = list() ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda m_state: self.mdp.getPossibleActions(m_state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function, 'init_temp': init_temp, 'temp_decrease_rate': temp_decrease_rate } if agent_type == qlearningAgents.QLearningAgent.getAgentType(): self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) elif agent_type == qlearningAgents.TamerQAgent.getAgentType(): self.agent = qlearningAgents.TamerQAgent(max_n_experiences=agent_max_n_experiences , window_size=agent_window_size , is_asyn_input=is_asyn_input , **q_learn_opts) elif agent_type == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType(): self.agent = preferenceTamerAgent.PreferenceTAMERAgent(max_n_experiences=agent_max_n_experiences , window_size=agent_window_size , is_asyn_input=is_asyn_input , **q_learn_opts)
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0, noise=0, epsilon=0.3, display_speed=0.5, grid_size=150, text_only=False, n_episodes=100, agent_window_size=1, agent_max_n_experiences=1000, check_value_converge=False, check_policy_converge=False, optimal_policy=None, expr_log_dir=None, delta=0.02, is_use_q_agent=False, init_temp=1024.0, temp_decrease_rate=2.0, is_asyn_input=True): ########################### # GENERAL CONTROL ########################### self.text_only = text_only self.display_speed = display_speed self.n_episodes = n_episodes self.discount = discount self.check_value_converge = check_value_converge self.check_policy_converge = check_policy_converge self.optimal_policy = optimal_policy self.expr_log_dir = expr_log_dir self.delta = delta ########################### # GET THE INPUT MODULE ########################### if is_use_q_agent: self.user_input_module = None else: self.user_input_module = user_input.UserInputModule( is_asyn=is_asyn_input) ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay( self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### import qlearningAgents # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda state: self.mdp.getPossibleActions(state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function, 'init_temp': init_temp, 'temp_decrease_rate': temp_decrease_rate } if is_use_q_agent: self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) else: self.agent = qlearningAgents.TamerQAgent( max_n_experiences=agent_max_n_experiences, window_size=agent_window_size, is_asyn_input=is_asyn_input, **q_learn_opts)
def __init__(self, optimal_policy=None, expr_log_dir=None): ########################### # GENERAL CONTROL ########################### self.text_only = ExperimentConfigurator.experimentConfig['text_only'] self.display_speed = ExperimentConfigurator.experimentConfig[ 'display_speed'] self.n_episodes = ExperimentConfigurator.gridWorldConfig['n_episodes'] self.discount = ExperimentConfigurator.gridWorldConfig['discount'] self.check_policy_converge = ExperimentConfigurator.experimentConfig[ 'check_policy_converge'] self.optimal_policy = optimal_policy self.expr_log_dir = expr_log_dir self.save_VDBE = ExperimentConfigurator.experimentConfig['save_VDBE'] ########################### # GET THE INPUT MODULE ########################### if ExperimentConfigurator.experimentConfig[ 'agent_type'] == qlearningAgents.QLearningAgent.getAgentType(): self.user_input_module = None else: self.user_input_module = user_input.UserInputModule( is_asyn=ExperimentConfigurator.TamerConfig['is_asyn_input']) self.auto_feedback = AutoFeedback() ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr( gridworld, "get" + ExperimentConfigurator.gridWorldConfig['grid_name']) self.mdp = mdp_function() self.mdp.setLivingReward( ExperimentConfigurator.gridWorldConfig['living_reward']) self.mdp.setNoise(ExperimentConfigurator.gridWorldConfig['noise']) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # Variables used to store parameters values ########################### # init VDBE values records global VDBE_RECORDS VDBE_RECORDS = dict() for state in self.env.getGridWorld().getNonTerminalStates(): VDBE_RECORDS[state] = list() ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not self.text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay( self.mdp, ExperimentConfigurator.gridWorldConfig['grid_size'], self.display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda m_state: self.mdp.getPossibleActions(m_state) q_learn_opts = {'actionFn': action_function} if ExperimentConfigurator.experimentConfig[ 'agent_type'] == qlearningAgents.QLearningAgent.getAgentType(): self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) elif ExperimentConfigurator.experimentConfig[ 'agent_type'] == qlearningAgents.TamerQAgent.getAgentType(): self.agent = qlearningAgents.TamerQAgent( max_n_experiences=ExperimentConfigurator. TamerConfig['agent_max_n_experiences'], window_size=ExperimentConfigurator. TamerConfig['agent_window_size'], is_asyn_input=ExperimentConfigurator. TamerConfig['is_asyn_input'], **q_learn_opts) elif ExperimentConfigurator.experimentConfig[ 'agent_type'] == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType( ): self.agent = preferenceTamerAgent.PreferenceTAMERAgent( max_n_experiences=ExperimentConfigurator. TamerConfig['agent_max_n_experiences'], window_size=ExperimentConfigurator. TamerConfig['agent_window_size'], is_asyn_input=ExperimentConfigurator. TamerConfig['is_asyn_input'], **q_learn_opts)
def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0, noise=0.2, epsilon=0.3, display_speed=0.5, grid_size=150, text_only=False, n_episodes=100, agent_window_size=1, agent_max_n_experiences=1000, is_use_q_agent=False): self.text_only = text_only self.display_speed = display_speed self.n_episodes = n_episodes self.discount = discount ########################### # GET THE INPUT MODULE ########################### if is_use_q_agent: self.user_input_module = None else: self.user_input_module = user_input.UserInputModule() ########################### # GET THE GRIDWORLD ########################### # noinspection PyUnresolvedReferences import gridworld mdp_function = getattr(gridworld, "get" + grid_name) self.mdp = mdp_function() self.mdp.setLivingReward(living_reward) self.mdp.setNoise(noise) self.env = gridworld.GridworldEnvironment(self.mdp) ########################### # GET THE DISPLAY ADAPTER ########################### import textGridworldDisplay self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp) if not text_only: import graphicsGridworldDisplay self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay( self.mdp, grid_size, display_speed) try: self.display.start() except KeyboardInterrupt: sys.exit(0) ########################### # GET THE TAMER AGENT ########################### import qlearningAgents # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp) self.gridWorldEnv = GridworldEnvironment(self.mdp) action_function = lambda state: self.mdp.getPossibleActions(state) q_learn_opts = { 'gamma': discount, 'alpha': learning_rate, 'epsilon': epsilon, 'actionFn': action_function } if is_use_q_agent: self.agent = qlearningAgents.QLearningAgent(**q_learn_opts) else: self.agent = qlearningAgents.TamerQAgent( max_n_experiences=agent_max_n_experiences, window_size=agent_window_size, **q_learn_opts)