Python ValueIteration.plan примеры использования

Язык программирования: Python

Пространство имен/Пакет: simple_rl.planning.ValueIterationClass

Класс/Тип: ValueIteration

Метод/Функция: plan

Примеров на hotexamples.com: 6

Python ValueIteration.plan - 6 примеров найдено. Это лучшие примеры Python кода для simple_rl.planning.ValueIterationClass.ValueIteration.plan, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ValueIteration(29)

run_vi(21)

get_states(13)

plan(6)

__init__(5)

get_q_function(4)

run_vi_histories(2)

_compute_matrix_from_trans_func(1)

get_max_q_actions(1)

get_q_value(1)

Пример #1

Показать файл

def run_no_speech(task_block, task_room, photo_pos, drone_pos, pub,
                  drone_path):
    """
    Assume the block is on the floor of each cell
    Get initial pos of drone from caller
    """
    height = 2  # vertical space
    task = DroneTask(task_block, task_room)
    room1 = DroneRoom("room1", [(x, y, z) for x in range(4) for y in range(1)
                                for z in range(height)], "red")
    room2 = DroneRoom("room2", [(x, y, z) for x in range(0, 2)
                                for y in range(2, 4) for z in range(height)],
                      color="green")
    room3 = DroneRoom("room3", [(x, y, z) for x in range(3, 4)
                                for y in range(2, 4) for z in range(height)],
                      color="blue")
    block1 = DroneBlock("block1",
                        photo_pos[0],
                        photo_pos[1],
                        photo_pos[2] - 1,
                        color="photo")
    rooms = [room1, room2, room3]
    blocks = [block1]
    doors = [DroneDoor(1, 1, height), DroneDoor(3, 1, height)]
    mdp = DroneMDP(drone_pos, task, rooms=rooms, blocks=blocks, doors=doors)

    print("Start Value Iteration")
    vi = ValueIteration(mdp)
    vi.run_vi()
    action_seq, state_seq = vi.plan(mdp.init_state)
    policy = defaultdict()
    for i in range(len(action_seq)):
        policy[state_seq[i]] = action_seq[i]
    print("Start Flying")
    mdp.send_path(policy, pub, drone_path)

Пример #2

Показать файл

def main():
    height = 2  # vertical space
    task = DroneTask("red", "None")
    room1 = DroneRoom("room1", [(x, y, z) for x in range(0, 4)
                                for y in range(0, 1)
                                for z in range(height)], "red")
    room2 = DroneRoom("room2", [(x, y, z) for x in range(0, 2)
                                for y in range(2, 3) for z in range(height)],
                      color="green")
    room3 = DroneRoom("room3", [(x, y, z) for x in range(3, 4)
                                for y in range(2, 3) for z in range(height)],
                      color="blue")
    block1 = DroneBlock("block1", 0, 2, 0, color="red")
    block2 = DroneBlock("block2", 2, 0, -1, color="green")
    block3 = DroneBlock("block3", 3, 2, 0, color="blue")
    rooms = [room1, room2, room3]
    blocks = [block1, block2, block3]
    doors = [DroneDoor(1, 1, height), DroneDoor(3, 1, height)]
    mdp = DroneMDP((0, 0, 0), task, rooms=rooms, blocks=blocks, doors=doors)

    # print("Start Q learning")
    # ql_agent = QLearningAgent(actions=mdp.get_actions())
    # # run_agents_on_mdp([ql_agent], mdp, instances=2, episodes=2500, steps=100, reset_at_terminal=True, verbose=True)
    # run_single_agent_on_mdp(ql_agent, mdp, episodes=2000, steps=200)
    print("Start Value Iteration")
    vi = ValueIteration(mdp)
    vi.run_vi()
    action_seq, state_seq = vi.plan(mdp.init_state)
    policy = defaultdict()
    for i in range(len(action_seq)):
        policy[state_seq[i]] = action_seq[i]
    print("Start AirSim")
    # mdp.visualize_agent(ql_agent)
    mdp.visualize_policy(policy)

Пример #3

Показать файл

Файл: BeliefSparseSamplingClass.py Проект: dwhit/simple_rl

def plan_with_vi(gamma=0.99):
    '''
    Args:
        gamma (float): discount factor

    Running value iteration on the problem to test the correctness of the policy returned by BSS
    '''
    mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0)
    value_iter = ValueIteration(mdp, sample_rate=5)
    value_iter.run_vi()

    action_seq, state_seq = value_iter.plan(mdp.get_init_state())

    print "[ValueIteration] Plan for {}".format(mdp)
    for i in range(len(action_seq)):
        print 'pi({}) --> {}'.format(state_seq[i], action_seq[i])

Пример #4

Показать файл

Файл: AMDPPolicyGeneratorClass.py Проект: roma-patel/ltl-amdp

    def get_policy(self, mdp, verbose=False):
        '''
        Args:
            mdp (MDP): MDP (same level as the current Policy Generator)
        Returns:
            policy (defaultdict): optimal policy in mdp
        '''
        vi = ValueIteration(mdp, sample_rate=1)
        vi.run_vi()

        policy = defaultdict()
        action_seq, state_seq = vi.plan(mdp.init_state)

        if verbose: print('Plan for {}:'.format(mdp))
        for i in range(len(action_seq)):
            if verbose:
                print("\tpi[{}] -> {}".format(state_seq[i], action_seq[i]))
            policy[state_seq[i]] = action_seq[i]
        return policy

Пример #5

Показать файл

Файл: RRT.py Проект: awandzel/multiobject-search-oopomdp

    def planFromAtoB(self, Maps, nearestVertex, kStepConfig):

        # if not self.computedMDP:
        #     self.wallLocations = []
        #     for x in range(len(self.Maps.occupancyMap)):
        #         for y in range(len(self.Maps.occupancyMap[x])):
        #             if self.Maps.occupancyMap[x][y] == Env.WALL:
        #                 self.wallLocations.append(Loc.Location(x,y))
        #     self.computedMDP = True

        mdp = GridWorldMDP(width=len(Maps.occupancyMap),
                           height=len(Maps.occupancyMap[0]),
                           init_loc=(nearestVertex.x, nearestVertex.y),
                           goal_locs=[(kStepConfig.x, kStepConfig.y)],
                           gamma=0.95)
        vi = ValueIteration(mdp)
        vi.run_vi()
        action_seq, state_seq = vi.plan()

        #check if conflict
        for s in state_seq:
            if Maps.occupancyMap[s[0], s[1]] == env.WALL:
                return False
        return True

Пример #6

Показать файл

def main():
    # This accepts arguments from the command line with flags.
    # Example usage: python value_iteration_demo.py -w 4 -H 3 -s 0.05 -g 0.95 -il [(0,0)] -gl [(4,3)] -ll [(4,2)]  -W [(2,2)]
    parser = argparse.ArgumentParser(
        description=
        'Run a demo that shows a visualization of value iteration on a GridWorld MDP'
    )

    # Add the relevant arguments to the argparser
    parser.add_argument(
        '-w',
        '--width',
        type=int,
        nargs="?",
        const=5,
        default=5,
        help=
        'an integer representing the number of cells for the GridWorld width')
    parser.add_argument(
        '-H',
        '--height',
        type=int,
        nargs="?",
        const=5,
        default=5,
        help=
        'an integer representing the number of cells for the GridWorld height')
    parser.add_argument(
        '-s',
        '--slip',
        type=float,
        nargs="?",
        const=0.05,
        default=0.05,
        help=
        'a float representing the probability that the agent will "slip" and not take the intended action'
    )
    parser.add_argument(
        '-g',
        '--gamma',
        type=float,
        nargs="?",
        const=0.95,
        default=0.95,
        help='a float representing the decay probability for Value Iteration')
    parser.add_argument(
        '-il',
        '--i_loc',
        type=tuple,
        nargs="?",
        const=(0, 0),
        default=(0, 0),
        help=
        'two integers representing the starting cell location of the agent [with zero-indexing]'
    )
    parser.add_argument(
        '-gl',
        '--g_loc',
        type=list,
        nargs="?",
        const=[(3, 3)],
        default=[(3, 3)],
        help=
        'a sequence of integer-valued coordinates where the agent will receive a large reward and enter a terminal state'
    )
    args = parser.parse_args()
    mdp = generate_MDP(args.width, args.height, args.i_loc, args.g_loc,
                       args.gamma, args.slip)

    # Run value iteration on the mdp and save the history of value backups until convergence
    vi = ValueIteration(mdp, max_iterations=1)
    _, _, histories = vi.run_vi_histories()

    # For every value backup, visualize the policy
    for value_dict in histories:
        #mdp.visualize_policy(lambda in_state: value_dict[in_state]) # Note: This lambda is necessary because the policy must be a function
        #time.sleep(0.5)
        print("========================")
        for k in value_dict.keys(
        ):  # Note: This lambda is necessary because the policy must be a function
            print(str(k) + " " + str(value_dict[k]))
        print(vi.plan(state=mdp.init_state))