Exemplo n.º 1
0
    import gridworld
    mdpFunction = getattr(gridworld, "get" + opts.grid)
    mdp = mdpFunction()
    mdp.setLivingReward(opts.livingReward)
    mdp.setNoise(opts.noise)
    env = gridworld.GridworldEnvironment(mdp)

    ###########################
    # GET THE DISPLAY ADAPTER
    ###########################

    import textGridworldDisplay
    display = textGridworldDisplay.TextGridworldDisplay(mdp)
    if not opts.textDisplay:
        import graphicsGridworldDisplay
        display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
            mdp, opts.gridSize, opts.speed)
    try:
        display.start()
    except KeyboardInterrupt:
        sys.exit(0)

    ###########################
    # GET THE AGENT
    ###########################

    import valueIterationAgents, qlearningAgents
    a = None
    if opts.agent == 'value':
        a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount,
                                                     opts.iters)
    elif opts.agent == 'q':
Exemplo n.º 2
0
    def __init__(self, optimal_policy=None, expr_log_dir=None):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = ExperimentConfigurator.experimentConfig['text_only']
        self.display_speed = ExperimentConfigurator.experimentConfig[
            'display_speed']
        self.n_episodes = ExperimentConfigurator.gridWorldConfig['n_episodes']
        self.discount = ExperimentConfigurator.gridWorldConfig['discount']
        self.check_policy_converge = ExperimentConfigurator.experimentConfig[
            'check_policy_converge']
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.save_VDBE = ExperimentConfigurator.experimentConfig['save_VDBE']

        ###########################
        # GET THE INPUT MODULE
        ###########################
        if ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.QLearningAgent.getAgentType():
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(
                is_asyn=ExperimentConfigurator.TamerConfig['is_asyn_input'])

        self.auto_feedback = AutoFeedback()

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(
            gridworld,
            "get" + ExperimentConfigurator.gridWorldConfig['grid_name'])
        self.mdp = mdp_function()
        self.mdp.setLivingReward(
            ExperimentConfigurator.gridWorldConfig['living_reward'])
        self.mdp.setNoise(ExperimentConfigurator.gridWorldConfig['noise'])
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # Variables used to store parameters values
        ###########################

        # init VDBE values records
        global VDBE_RECORDS
        VDBE_RECORDS = dict()
        for state in self.env.getGridWorld().getNonTerminalStates():
            VDBE_RECORDS[state] = list()

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not self.text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, ExperimentConfigurator.gridWorldConfig['grid_size'],
                self.display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda m_state: self.mdp.getPossibleActions(m_state)
        q_learn_opts = {'actionFn': action_function}

        if ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.QLearningAgent.getAgentType():
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        elif ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.TamerQAgent.getAgentType():
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=ExperimentConfigurator.
                TamerConfig['agent_max_n_experiences'],
                window_size=ExperimentConfigurator.
                TamerConfig['agent_window_size'],
                is_asyn_input=ExperimentConfigurator.
                TamerConfig['is_asyn_input'],
                **q_learn_opts)
        elif ExperimentConfigurator.experimentConfig[
                'agent_type'] == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType(
                ):
            self.agent = preferenceTamerAgent.PreferenceTAMERAgent(
                max_n_experiences=ExperimentConfigurator.
                TamerConfig['agent_max_n_experiences'],
                window_size=ExperimentConfigurator.
                TamerConfig['agent_window_size'],
                is_asyn_input=ExperimentConfigurator.
                TamerConfig['is_asyn_input'],
                **q_learn_opts)
Exemplo n.º 3
0
    def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0
                 , noise=0, epsilon=0.3, display_speed=0.5
                 , grid_size=150, text_only=False, n_episodes=100
                 , agent_window_size=1, agent_max_n_experiences=1000
                 , is_use_q_agent=False, is_asyn_input=True):
        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount

        ###########################
        # GET THE INPUT MODULE
        ###########################

        if is_use_q_agent:
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(is_asyn=is_asyn_input)

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function
        }

        if is_use_q_agent:
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        else:
            self.agent = qlearningAgents.TamerQAgent(max_n_experiences=agent_max_n_experiences
                                                     , window_size=agent_window_size
                                                     , is_asyn_input=is_asyn_input
                                                     , **q_learn_opts)
Exemplo n.º 4
0
    def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0
                 , noise=0, epsilon=0.3, display_speed=0.5
                 , grid_size=150, text_only=False, n_episodes=100
                 , agent_window_size=1, agent_max_n_experiences=1000
                 , check_value_converge=False
                 , check_policy_converge=False
                 , optimal_policy=None
                 , expr_log_dir=None
                 , delta=0.02
                 , is_use_q_agent=False
                 , init_temp=1024.0
                 , temp_decrease_rate=2.0
                 , is_asyn_input=True):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount
        self.check_value_converge = check_value_converge
        self.check_policy_converge = check_policy_converge
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.delta = delta
        self.save_VDBE = False

        ###########################
        # GET THE INPUT MODULE
        ###########################

        if is_use_q_agent:
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(is_asyn=is_asyn_input)

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # Variables used to store parameters values
        ###########################

        # init VDBE values records
        global VDBE_RECORDS
        VDBE_RECORDS = dict()
        for state in self.env.getGridWorld().getNonTerminalStates():
            VDBE_RECORDS[state] = list()

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function,
            'init_temp': init_temp,
            'temp_decrease_rate': temp_decrease_rate
        }

        if is_use_q_agent:
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        else:
            self.agent = qlearningAgents.TamerQAgent(max_n_experiences=agent_max_n_experiences
                                                     , window_size=agent_window_size
                                                     , is_asyn_input=is_asyn_input
                                                     , **q_learn_opts)
def main(myargs):
    sys.argv = myargs.split()
    opts = parseOptions()

    ###########################
    # GET THE GRIDWORLD
    ###########################

    if opts.grid == 'VerticalBridgeGrid':
        opts.gridSize = 120

    import gridworld
    mdpFunction = getattr(gridworld, "get" + opts.grid)
    mdp = mdpFunction()
    mdp.setLivingReward(opts.livingReward)
    mdp.setNoise(opts.noise)
    env = gridworld.GridworldEnvironment(mdp)

    ###########################
    # GET THE DISPLAY ADAPTER
    ###########################

    import textGridworldDisplay
    display = textGridworldDisplay.TextGridworldDisplay(mdp)
    if not opts.textDisplay:
        import graphicsGridworldDisplay
        display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
            mdp, opts.gridSize, opts.speed)
    try:
        display.start()
    except KeyboardInterrupt:
        sys.exit(0)

    ###########################
    # GET THE AGENT
    ###########################

    import valueIterationAgents, qlearningAgents
    a = None
    if opts.agent == 'value':
        a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount,
                                                     opts.iters)
    elif opts.agent == 'q':
        #env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        #simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        gridWorldEnv = GridworldEnvironment(mdp)
        actionFn = lambda state: mdp.getPossibleActions(state)
        qLearnOpts = {
            'gamma': opts.discount,
            'alpha': opts.learningRate,
            'epsilon': opts.epsilon,
            'actionFn': actionFn
        }
        a = qlearningAgents.QLearningAgent(**qLearnOpts)
    elif opts.agent == 'random':
        # # No reason to use the random agent without episodes
        if opts.episodes == 0:
            opts.episodes = 10

        class RandomAgent:
            def getAction(self, state):
                return random.choice(mdp.getPossibleActions(state))

            def getValue(self, state):
                return 0.0

            def getQValue(self, state, action):
                return 0.0

            def getPolicy(self, state):
                "NOTE: 'random' is a special policy value; don't use it in your code."
                return 'random'

            def update(self, state, action, nextState, reward):
                pass

        a = RandomAgent()
    else:
        if not opts.manual: raise 'Unknown agent type: ' + opts.agent

    ###########################
    # RUN EPISODES
    ###########################
    # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
    try:
        if not opts.manual and opts.agent == 'value':
            if opts.valueSteps:
                for i in range(opts.iters):
                    tempAgent = valueIterationAgents.ValueIterationAgent(
                        mdp, opts.discount, i)
                    display.displayValues(tempAgent,
                                          message="VALUES AFTER " + str(i) +
                                          " ITERATIONS")
                    display.pause()

            display.displayValues(a,
                                  message="VALUES AFTER " + str(opts.iters) +
                                  " ITERATIONS")
            display.pause()
            display.displayQValues(a,
                                   message="Q-VALUES AFTER " +
                                   str(opts.iters) + " ITERATIONS")
            display.pause()
    except KeyboardInterrupt:
        sys.exit(0)

    # FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
    displayCallback = lambda x: None
    if not opts.quiet:
        if opts.manual and opts.agent == None:
            displayCallback = lambda state: display.displayNullValues(state)
        else:
            if opts.agent == 'random':
                displayCallback = lambda state: display.displayValues(
                    a, state, "CURRENT VALUES")
            if opts.agent == 'value':
                displayCallback = lambda state: display.displayValues(
                    a, state, "CURRENT VALUES")
            if opts.agent == 'q':
                displayCallback = lambda state: display.displayQValues(
                    a, state, "CURRENT Q-VALUES")

    messageCallback = lambda x: printString(x)
    if opts.quiet:
        messageCallback = lambda x: None

    # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
    pauseCallback = lambda: None
    if opts.pause:
        pauseCallback = lambda: display.pause()

    # FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
    if opts.manual:
        decisionCallback = lambda state: getUserAction(state, mdp.
                                                       getPossibleActions)
    else:
        decisionCallback = a.getAction

    # RUN EPISODES
    if opts.episodes > 0:
        print()
        print("RUNNING", opts.episodes, "EPISODES")
        print()
    returns = 0
    for episode in range(1, opts.episodes + 1):
        returns += runEpisode(a, env, opts.discount, decisionCallback,
                              displayCallback, messageCallback, pauseCallback,
                              episode)
    if opts.episodes > 0:
        print()
        print("AVERAGE RETURNS FROM START STATE: " +
              str((returns + 0.0) / opts.episodes))
        print()
        print()

    # DISPLAY POST-LEARNING VALUES / Q-VALUES
    if opts.agent == 'q' and not opts.manual:
        try:
            display.displayQValues(a,
                                   message="Q-VALUES AFTER " +
                                   str(opts.episodes) + " EPISODES")
            display.pause()
            display.displayValues(a,
                                  message="VALUES AFTER " +
                                  str(opts.episodes) + " EPISODES")
            display.pause()
        except KeyboardInterrupt:
            sys.exit(0)