Python Agents.RandomAgent примеры использования

Язык программирования: Python

Класс/Тип: Agents

Метод/Функция: RandomAgent

Примеров на hotexamples.com: 3

Python Agents.RandomAgent - 3 примера найдено. Это лучшие примеры Python кода для Agents.RandomAgent из пакета prewikka, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

QAgent(5)

NormalMCTS(4)

RLWM_actionSoftmax(4)

Critic(4)

RandomAgent(3)

RLWM_modulation(3)

IdealValueAgent(2)

RLWM_allFree(2)

addToJobList(2)

RLWM_noise(2)

Agent(2)

DepMCTS(2)

DepUnDepMCTS(2)

Actor(2)

SimulatedMCTS(2)

FlippingMCTS(2)

RLWM_noneFree(2)

A_star_real_time_tree_search(1)

ReinforcementAgent3(1)

SarsaLambda(1)

SmallFC_FW(1)

UTreeAgent(1)

VIAgent(1)

Vandal(1)

Zooming(1)

SmallAgent(1)

Human(1)

QTableAgent(1)

DiegoConvAgent(1)

Agent1d(1)

Agents(1)

AgentsQWidget(1)

Att(1)

BasicAgent(1)

CAgent(1)

FeatEmbed(1)

Laser(1)

Greedy(1)

Greedy_tree_search(1)

GridExplorer3dof(1)

GridExplorer6dof(1)

HingeArm3dof(1)

HingeArm6dof(1)

A_star_tree_search(1)

agent(1)

Пример #1

Показать файл

Файл: value_representation.py Проект: architgupta93/RL.Hippocampus

def testMaze(n_train, n_nav):
    ValueLearning.DBG_LVL = 1
    move_distance = 0.99

    # Experiment parameters
    nx = 6
    ny = 6
    n_fields = round(1.0 * (nx + 3) * (ny + 3))

    Hippocampus.N_CELLS_PER_FIELD = 1
    n_cells = n_fields * Hippocampus.N_CELLS_PER_FIELD

    # Maze creation
    maze = Environment.RandomGoalOpenField(nx, ny, move_distance)

    # Generate place fields and place cells
    place_fields = Hippocampus.setupPlaceFields(maze, n_fields)
    place_cells = Hippocampus.assignPlaceCells(n_cells, place_fields)

    # Create Actor and Critic
    actor = Agents.RandomAgent(maze.getActions(), n_cells)
    critic = Agents.Critic(n_cells)

    ValueLearning.learnValueFunction(n_train,
                                     maze,
                                     place_cells,
                                     actor,
                                     critic,
                                     max_steps=1000)

Пример #2

Показать файл

Файл: multi_maze_random_agent.py Проект: architgupta93/RL.Hippocampus

def testMaze():
    """
    No comments here. Look at single_maze_learning_agent.py for more details!
    """
    ValueLearning.DBG_LVL = 0

    nx = 6
    ny = 6

    # Set the number of cells to be used per "place field" - Same for all the environments
    Hippocampus.N_CELLS_PER_FIELD = 1

    n_fields = round(1.0 * (nx + 3) * (ny + 3))
    n_cells = Hippocampus.N_CELLS_PER_FIELD * n_fields
    move_distance = 0.99

    n_training_trials = 100
    n_single_env_episodes = 2
    n_alternations = 1
    max_train_steps = 1000

    # First Environment: Has its own place cells and place fields
    env_E1 = Environment.RandomGoalOpenField(nx, ny, move_distance)
    canvas_E1 = Graphics.WallMazeCanvas(env_E1)
    place_fields_E1 = Hippocampus.setupPlaceFields(env_E1, n_fields)
    place_cells_E1 = Hippocampus.assignPlaceCells(n_cells, place_fields_E1)

    # Train a critic on the first environment
    print('Training Critic solely on Env A')
    critic_E1 = None
    weights_E1 = np.empty((n_cells, n_single_env_episodes), dtype=float)
    for episode in range(n_single_env_episodes):
        (_, critic_E1,
         _) = ValueLearning.learnValueFunction(n_training_trials,
                                               env_E1,
                                               place_cells_E1,
                                               critic=critic_E1,
                                               max_steps=max_train_steps)
        weights_E1[:, episode] = critic_E1.getWeights()

    # Get a trajectory in the environment and plot the value function
    canvas_E1.plotValueFunction(place_cells_E1, critic_E1, continuous=True)
    input('Press return to run next environment...')

    components_E1 = Graphics.showDecomposition(weights_E1,
                                               title='Environment 01')

    # Create empty actors and critics
    actor = Agents.RandomAgent(env_E1.getActions(), n_cells)
    critic = Agents.Critic(n_cells)

    # Second Environment: This has a different set (but the same number) of
    # place fields and place cells (also has a bunch of walls)
    nx = 6
    ny = 6
    lp_wall = Environment.Wall((0, 3), (3, 3))
    rp_wall = Environment.Wall((4, 3), (6, 3))
    env_E2 = Environment.MazeWithWalls(nx,
                                       ny, [lp_wall, rp_wall],
                                       move_distance=move_distance)
    canvas_E2 = Graphics.WallMazeCanvas(env_E2)
    place_fields_E2 = Hippocampus.setupPlaceFields(env_E2, n_fields)
    place_cells_E2 = Hippocampus.assignPlaceCells(n_cells, place_fields_E2)

    # Train another critic on the second environment
    print()
    print('Training Critic solely on Env B')
    critic_E2 = None
    weights_E2 = np.empty((n_cells, n_single_env_episodes), dtype=float)
    for episode in range(n_single_env_episodes):
        (_, critic_E2,
         _) = ValueLearning.learnValueFunction(n_training_trials,
                                               env_E2,
                                               place_cells_E2,
                                               critic=critic_E2,
                                               max_steps=max_train_steps)
        weights_E2[:, episode] = critic_E2.getWeights()

    components_E2 = Graphics.showDecomposition(weights_E2,
                                               title='Environment 02')
    canvas_E2.plotValueFunction(place_cells_E2, critic_E2, continuous=True)

    # Look at the projection of one environment's weights on the other's principal components
    Graphics.showDecomposition(weights_E1,
                               components=components_E2,
                               title='E2 on E1')
    Graphics.showDecomposition(weights_E2,
                               components=components_E1,
                               title='E1 on E2')
    input('Press any key to start Alternation.')

    # This can be used to just reinforce the fact that the agent is indeed
    # random! The steps taken to goal would not change over time because of the
    # way the agent behaves.
    learning_steps_E1 = np.zeros((n_alternations, 1), dtype=float)
    learning_steps_E2 = np.zeros((n_alternations, 1), dtype=float)

    # keep track of weights for PCA
    weights = np.empty((n_cells, n_alternations * 2), dtype=float)
    for alt in range(n_alternations):
        n_alternation_trials = n_single_env_episodes * n_training_trials
        # n_alternation_trials = n_training_trials
        print('Alternation: %d' % alt)
        # First look at the performance of the agent in the task before it is
        # allowed to learn anything. Then allow learning
        print('Learning Environment A')
        (actor, critic, steps_E1) = ValueLearning.learnValueFunction(
            n_alternation_trials, env_E1, place_cells_E1, actor, critic,
            max_train_steps)
        learning_steps_E1[alt] = np.mean(steps_E1)
        weights[:, 2 * alt] = critic.getWeights()

        # Repeat for environment 1
        print('Learning Environment B')
        (actor, critic, steps_E2) = ValueLearning.learnValueFunction(
            n_alternation_trials, env_E2, place_cells_E2, actor, critic,
            max_train_steps)
        learning_steps_E2[alt] = np.mean(steps_E2)
        weights[:, 2 * alt + 1] = critic.getWeights()

    # Show the alternation weights in the two basis
    Graphics.showDecomposition(weights,
                               components=components_E1,
                               title='Alternation weights in E1')
    Graphics.showDecomposition(weights,
                               components=components_E2,
                               title='Alternation weights in E2')

    # Show the value functions for both the environments
    input('Press return for Value Function of E1')
    canvas_E1.plotValueFunction(place_cells_E1, critic, continuous=True)
    canvas_E1.plotValueFunction(place_cells_E1, critic_E1, continuous=True)
    canvas_E1.plotValueFunction(place_cells_E1, critic_E2, continuous=True)

    # Plot the ideal value function
    ideal_critic = Agents.IdealValueAgent(env_E1, place_cells_E1)
    optimal_value_function = ideal_critic.getValueFunction()

    scaling_factor = 1.0 / (1 - critic_E1.getDiscountFactor())
    # Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), range=(maze.NON_GOAL_STATE_REWARD, scaling_factor * maze.GOAL_STATE_REWARD))
    Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), \
        range=(env_E1.NON_GOAL_STATE_REWARD, scaling_factor * env_E1.GOAL_STATE_REWARD))

    input('Press return for Value Function of E2')
    canvas_E2.plotValueFunction(place_cells_E2, critic, continuous=True)
    canvas_E2.plotValueFunction(place_cells_E2, critic_E2, continuous=True)
    canvas_E2.plotValueFunction(place_cells_E2, critic_E1, continuous=True)

    # Plot the ideal value function
    ideal_critic = Agents.IdealValueAgent(env_E2, place_cells_E2)
    optimal_value_function = ideal_critic.getValueFunction()

    scaling_factor = 1.0 / (1 - critic_E2.getDiscountFactor())
    # Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), range=(maze.NON_GOAL_STATE_REWARD, scaling_factor * maze.GOAL_STATE_REWARD))
    Graphics.showImage(optimal_value_function, xticks=range(1,nx), yticks=range(1,ny), \
        range=(env_E2.NON_GOAL_STATE_REWARD, scaling_factor * env_E2.GOAL_STATE_REWARD))
    input('Press any key to exit!')

Пример #3

Показать файл

Файл: test.py Проект: diegogutierrez2001/deep-rts

		outcomes_path = os.path.join(results_path, "outcomes.txt")
		durations_path = os.path.join(results_path, "durations.txt")

		outcomes_file = open(outcomes_path, "w+")
		durations_file = open(durations_path, "w+")

		# agents

		state_size = env.observation_space.shape
		action_size = env.action_space.n

		agent_a = Agents.SmallAgent(4410, action_size)
		agent_a.load(file_path)

		agent_b = Agents.RandomAgent()

		for trial in range(TRIALS):

			state = env.reset()
			flat_state = state.flatten()

			# video stuff

			filenames = []

			terminal = False
			changed = False
			count = 0

			# play game