Python Agent.Pi примеры использования

Язык программирования: Python

Пространство имен/Пакет: Agent

Класс/Тип: Agent

Метод/Функция: Pi

Примеров на hotexamples.com: 1

Python Agent.Pi - 1 пример найден. Это лучшие примеры Python кода для Agent.Agent.Pi, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

__init__(30)

act(30)

Agent(30)

Solve(15)

Walk(4)

averagePathLength(3)

TargetPotentialForce(3)

start(2)

pluckTillConnected(2)

infiniteRadium(2)

Prod(2)

Perceive(2)

decide(2)

randomRangeRadiumUnif(2)

SwarmPotentialForce(2)

GetNextState(2)

Util(2)

DoNothing(2)

Clean(2)

groupSize(2)

edgeOccupation(1)

draw(1)

constRadium(1)

isConnected(1)

getRandomVertexPair(1)

getState(1)

make_typo_mistake(1)

pluck(1)

pluckEdge(1)

print_params(1)

randomRangeRadiumNormal(1)

recover(1)

removeEdge(1)

setChannel(1)

setName(1)

setState(1)

set_continuum(1)

changeURL(1)

ASTAR(1)

addEdge(1)

MoveRight(1)

BFS(1)

DQNepisode(1)

ExecuteMovement(1)

GetActions(1)

GetLocationX(1)

GetLocationY(1)

MoveDown(1)

MoveLeft(1)

MoveRandom(1)

Пример #1

Показать файл

def main(lr, epsilon, gamma, decay_lr, decay_epsilon, modelfile):
    seed = 42
    np.random.seed(seed)
    env = GridWorld()
    agent = Agent(env.get_state_dims(),
                  env.action_size,
                  lr,
                  epsilon,
                  gamma,
                  decay_lr,
                  decay_epsilon,
                  supervisor=True)
    exp_name = "gridworld_lr{}_ep{}_gamma{}_decaylr{}_decayep{}_s{}".format(
        lr, epsilon, gamma, decay_lr, decay_epsilon, seed)

    print_freq = 10000
    logger = Logger(print_freq)

    for epochs in range(500000):
        s, done, trajectory, score, steps = env.reset(), False, [], 0, 0
        while not done and steps < 60:  #100:
            a = agent.Pi(s, env)
            sprime, r, done, interrupt = env.step(a)
            a2 = a
            if not interrupt:
                agent.update(s, a, r, sprime, done)
            else:
                # interrupt service routine will handle it
                # do not update Q table's values
                action_dict = {
                    0: Action.Drop1,
                    1: Action.Drop2,
                    2: Action.Drop3,
                    3: Action.Pick1,
                    4: Action.Pick2,
                    5: Action.Pick3
                }
                s = env.ar.get_state() - 1 if env.ar.is_active() else 5
                # for easy debugging purposes
                a2 = action_dict[s]

            trajectory.append([s, a, a2, r, sprime, done])
            s = sprime
            score += r
            steps += 1

        if print_traj:
            print(trajectory)
        logger.update(epochs, score, steps, env)
        if epochs % print_freq == print_freq - 1:
            logger.log(epochs)
            # print(generate_best_trajectory(env, agent))
            agent.decay()

    f = open(modelfile, "wb")
    pickle.dump(agent, f)
    f.close()