Python Agents.Actor примеры использования

Язык программирования: Python

Класс/Тип: Agents

Метод/Функция: Actor

Примеров на hotexamples.com: 2

Python Agents.Actor - 2 примера найдено. Это лучшие примеры Python кода для Agents.Actor из пакета prewikka, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

QAgent(5)

NormalMCTS(4)

RLWM_actionSoftmax(4)

Critic(4)

RandomAgent(3)

RLWM_modulation(3)

IdealValueAgent(2)

RLWM_allFree(2)

addToJobList(2)

RLWM_noise(2)

Agent(2)

DepMCTS(2)

DepUnDepMCTS(2)

Actor(2)

SimulatedMCTS(2)

FlippingMCTS(2)

RLWM_noneFree(2)

A_star_real_time_tree_search(1)

ReinforcementAgent3(1)

SarsaLambda(1)

SmallFC_FW(1)

UTreeAgent(1)

VIAgent(1)

Vandal(1)

Zooming(1)

SmallAgent(1)

Human(1)

QTableAgent(1)

DiegoConvAgent(1)

Agent1d(1)

Agents(1)

AgentsQWidget(1)

Att(1)

BasicAgent(1)

CAgent(1)

FeatEmbed(1)

Laser(1)

Greedy(1)

Greedy_tree_search(1)

GridExplorer3dof(1)

GridExplorer6dof(1)

HingeArm3dof(1)

HingeArm6dof(1)

A_star_tree_search(1)

agent(1)

Пример #1

Показать файл

def learnValueFunction(n_trials,
                       environment,
                       place_cells,
                       actor=None,
                       critic=None,
                       max_steps=np.Inf):
    """
    Main function responsible for learning value function for a given environment
    INPUTS:
    -------
    n_trials: (INTEGER) Number of trials allowed on the task
    environment: (Maze) Physical space in which the task has to be learnt
    place_cells: (PlaceCell) Entity that encodes a particular location as a population

    <OPTIONAL INPUTS>
    actor: Pre-trained actor
    critic: Pre-trained critic

    OUTPUTS:
    --------
    actor: (Actor Class) Entity that learns actions for a given state
    critic: (Critic Class) Entity that evaluates the value for a
        particular state. These values are used for taking actions.
    """

    # Visualize place fields for a few cells and then the aggregate activity
    # Set up the actor and critic based on the place fields
    if critic is None:
        critic = Agents.Critic(len(place_cells))
    else:
        assert (critic.getNFields() == len(place_cells))

    if actor is None:
        actor = Agents.Actor(environment.getActions(), len(place_cells))
        # actor = Agents.RandomAgent(environment.getActions(), len(place_cells))
        # actor = Agents.IdealActor(environment, critic, place_cells)
    else:
        assert (actor.getNFields() == len(place_cells))

    n_steps = np.zeros(n_trials, dtype=float)
    for trial in range(n_trials):
        # Path is visualized using a graphics object
        canvas = Graphics.WallMazeCanvas(environment)
        if DBG_LVL > 2:
            n_cells_to_visualize = 4
            for _ in range(n_cells_to_visualize):
                sample_cell = random.randint(0, len(place_cells))
                canvas.visualizePlaceField(place_cells[sample_cell])
            canvas.visualizeAggregatePlaceFields(place_cells)

        # Initialize a new location and adjust for the optimal number of steps
        # needed to get to the goal.
        environment.redrawInitLocation()
        optimal_steps_to_goal = environment.getOptimalDistanceToGoal()
        n_steps[trial] = -optimal_steps_to_goal

        initial_state = environment.getCurrentState()
        canvas.update(initial_state)
        terminate_trial = False
        while not terminate_trial:
            terminate_trial = environment.reachedGoalState()
            if (n_steps[trial] > max_steps * environment.MOVE_DISTANCE):
                break

            n_steps[trial] += environment.MOVE_DISTANCE
            current_state = environment.getCurrentState()
            if DBG_LVL > 1:
                print('On state: (%.2f, %.2f)' %
                      (current_state[0], current_state[1]))

            # Get the place field activity based on the current location
            pf_activity = [pf.getActivity(current_state) for pf in place_cells]

            # Get an action based on the place field activity
            next_action = actor.getAction(pf_activity)
            if DBG_LVL > 1:
                print('Selected Action: %s' % next_action)

            # Apply this action onto the environment
            reward = environment.move(next_action)
            # canvas.update(environment.getCurrentState())

            # Use the obtained reward to update the value
            new_environment_state = environment.getCurrentState()
            canvas.update(new_environment_state)

            new_pf_activity = [
                pf.getActivity(new_environment_state) for pf in place_cells
            ]
            prediction_error = critic.updateValue(pf_activity, new_pf_activity,
                                                  reward)
            actor.updateWeights(pf_activity, prediction_error)

        if (DBG_LVL > 0):
            print('Ended trial %d moving %.1f.' % (trial, n_steps[trial]))
            # At debug level 1, only the first and the last trajectories, and
            # corresponding value functions are shown. At higher debug levels,
            # the entire trajectory is shown for every iteration
            if (DBG_LVL > 1) or (trial == 1) or (trial == n_trials - 1):
                # Plot the trajectory taken for this trial
                canvas.plotTrajectory()

                # This takes extremely long when using a population of neurons
                canvas.plotValueFunction(place_cells,
                                         critic,
                                         limits=False,
                                         continuous=True)

                # Plot a histogram of the weightS
                """
                critic_weights = np.reshape(critic.getWeights(), -1)
                Graphics.histogram(critic_weights)
                """

    if (DBG_LVL > 0):
        Graphics.plot(n_steps)
    else:
        print('Step Statistics - Mean (%.2f), STD (%.2f)' %
              (np.mean(n_steps), np.std(n_steps)))

    return (actor, critic, n_steps)

Пример #2

Показать файл

def testMaze(n_training_trials, n_navigation_trials):
    """
    No comments here. Look at single_maze_learning_agent.py for more details!
    """
    ValueLearning.DBG_LVL = 0
    move_distance = 0.29

    nx = 6
    ny = 6

    n_fields = round(1.0 * (nx + 3) * (ny + 3))
    Hippocampus.N_CELLS_PER_FIELD = 4
    n_cells = Hippocampus.N_CELLS_PER_FIELD * n_fields

    n_alternations = 1
    max_nav_steps = 400
    max_train_steps = 4000

    # First Environment: Has its own place cells and place fields
    env_E1 = Environment.RandomGoalOpenField(nx, ny, move_distance)
    canvas_E1 = Graphics.WallMazeCanvas(env_E1)
    place_fields_E1 = Hippocampus.setupPlaceFields(env_E1, n_fields)
    place_cells_E1 = Hippocampus.assignPlaceCells(n_cells, place_fields_E1)

    # Create empty actors and critics
    actor = Agents.Actor(env_E1.getActions(), n_cells)
    critic = Agents.Critic(n_cells)

    # Second Environment: This has a different set (but the same number) of
    # place fields and place cells
    nx = 6
    ny = 6
    lp_wall = Environment.Wall((0, 3), (3, 3))
    rp_wall = Environment.Wall((4, 3), (6, 3))
    env_E2 = Environment.MazeWithWalls(nx, ny, [lp_wall, rp_wall],
                                       move_distance)
    canvas_E2 = Graphics.WallMazeCanvas(env_E2)
    place_fields_E2 = Hippocampus.setupPlaceFields(env_E2, n_fields)
    place_cells_E2 = Hippocampus.assignPlaceCells(n_cells, place_fields_E2)

    learning_steps_E1 = np.zeros((n_training_trials, 1), dtype=float)
    learning_steps_E2 = np.zeros((n_training_trials, 1), dtype=float)
    for alt in range(n_alternations):
        print('Alternation: %d' % alt)
        # First look at the performance of the agent in the task before it is
        # allowed to learn anything. Then allow learning

        print('Learning Environment A')
        (actor, critic, steps_E1) = ValueLearning.learnValueFunction(
            n_training_trials, env_E1, place_cells_E1, actor, critic,
            max_train_steps)
        learning_steps_E1 = steps_E1

        print('Learning Environment B')
        (actor, critic, steps_E2) = ValueLearning.learnValueFunction(
            n_training_trials, env_E2, place_cells_E2, actor, critic,
            max_train_steps)
        learning_steps_E2 = steps_E2

    # canvas_E1.plotValueFunction(place_cells_E1, critic)
    # canvas_E2.plotValueFunction(place_cells_E2, critic)

    # Plot a histogram of the weights
    # Critic
    # critic_weights = np.reshape(critic.getWeights(), -1)
    # Graphics.histogram(critic_weights)
    """
    # Actor
    actor_weights = np.reshape(actor.getWeights(), -1)
    Graphics.histogram(actor_weights)
    """

    # After alternation, check the behavior on both the tasks
    n_trials = n_navigation_trials
    ValueLearning.DBG_LVL = 0
    print('Navigating Environment A')
    navigation_steps_E1 = ValueLearning.navigate(n_trials, env_E1,
                                                 place_cells_E1, actor, critic,
                                                 max_nav_steps)

    print('Navigating Environment B')
    navigation_steps_E2 = ValueLearning.navigate(n_trials, env_E2,
                                                 place_cells_E2, actor, critic,
                                                 max_nav_steps)

    return (learning_steps_E1, learning_steps_E2, navigation_steps_E1,
            navigation_steps_E2)