Exemplos de Env.actions em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: env

Classe / Tipo: Env

Método / Função: actions

Exemplos em hotexamples.com: 4

Env.actions em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de env.Env.actions em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Env(30)

action_space(6)

actions(4)

declare(3)

GetInitState(2)

Step(2)

getColor(2)

addParser(2)

getRectIMG(2)

BlackWins(1)

check_env_wrt_manifest(1)

directly_defined(1)

draw(1)

draw_field(1)

getRectPos(1)

bound(1)

getRectSize(1)

getResolution(1)

get_base_url(1)

get_rnn_url(1)

log(1)

provision_resources(1)

provision_zone(1)

set_robots(1)

build_map(1)

batch_size(1)

blob_name_run_info(1)

action_bound(1)

IsTerminal(1)

_graceexit(1)

_set(1)

_update(1)

act(1)

action(1)

actionTransform(1)

action_dim(1)

blob_name(1)

actions_dim(1)

add_item(1)

add_loaded_module(1)

advance(1)

appendSeq(1)

apply_action(1)

assign_cidr(1)

black(1)

top_module(1)

Métodos Frequentes

Env (30)

action_space (6)

actions (4)

declare (3)

GetInitState (2)

Step (2)

getColor (2)

addParser (2)

getRectIMG (2)

BlackWins (1)

Métodos Frequentes

check_env_wrt_manifest (1)

directly_defined (1)

draw (1)

draw_field (1)

getRectPos (1)

bound (1)

getRectSize (1)

getResolution (1)

get_base_url (1)

get_rnn_url (1)

log (1)

provision_resources (1)

provision_zone (1)

set_robots (1)

build_map (1)

batch_size (1)

blob_name_run_info (1)

action_bound (1)

IsTerminal (1)

_graceexit (1)

Métodos Frequentes

log (1)

provision_resources (1)

provision_zone (1)

set_robots (1)

build_map (1)

batch_size (1)

blob_name_run_info (1)

action_bound (1)

IsTerminal (1)

_graceexit (1)

_set (1)

_update (1)

act (1)

action (1)

actionTransform (1)

action_dim (1)

blob_name (1)

actions_dim (1)

add_item (1)

add_loaded_module (1)

advance (1)

appendSeq (1)

apply_action (1)

assign_cidr (1)

black (1)

top_module (1)

Métodos Frequentes

_set (1)

_update (1)

act (1)

action (1)

actionTransform (1)

action_dim (1)

blob_name (1)

actions_dim (1)

add_item (1)

add_loaded_module (1)

advance (1)

appendSeq (1)

apply_action (1)

assign_cidr (1)

black (1)

top_module (1)

Exemplo n.º 1

0

Exibir arquivo

def on_mc(): grid_size = 4 env = Env(grid_size) policy = EspionGreedyPolicy(env.actions(), range(grid_size**2)) Q = defaultdict(float) R = defaultdict(list) for i in range(5000): G = 0 states = get_episode(env, policy) for (s0, a, s1, r) in reversed(states): G = 0.9 * G + r R[(s0, a)].append(G) Q[(s0, a)] = sum(R[(s0, a)]) / len(R[(s0, a)]) for (s0, a, s1, r) in reversed(states): mm = [(x, Q[(s0, x)]) for x in env.actions()] action = max(mm, key=lambda x: x[1])[0] policy.set_max(s0, action) Pi = {} for i in range(grid_size**2): Pi[i] = policy.get_m(i) for t in env.get_t(): Pi[t] = 'ter' env.render(Pi)

Exemplo n.º 2

0

Exibir arquivo

def off_mc(): env = Env(6) policy = RandomPolicy(env.actions()) C = defaultdict(float) Q = defaultdict(float) Pi = {} for i in range(10000): G = 0 W = 1.0 n = 0 states = get_episode(env, policy) for (s0, a, s1, r) in reversed(states): n += 1 G = 0.9 * G + r C[(s0, a)] += W Q[(s0, a)] += W / C[(s0, a)] * (G - Q[(s0, a)]) Pi[s0] = max([(x, Q[(s0, x)]) for x in env.actions()], key=lambda x: x[1])[0] if a != Pi[s0]: break W = W / policy.get_p(s0, a) for t in env.get_t(): Pi[t] = 'ter' env.render(Pi)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: td.py Projeto: lkqy/reinforcement_learning

def sarsa(): grid_size = 4 env = Env(grid_size) policy = EspionGreedyPolicy(env.actions(), range(grid_size**2)) Q = defaultdict(float) for i in range(5000): s0 = env.init() if env.is_t(s0): continue a0 = policy.get_a(s0) while not env.is_t(s0): s, r = env.step(a0) a = policy.get_a(s) Q[(s0, a0)] += 0.9 * (r + 0.9 * Q[(s, a)] - Q[(s0, a0)]) s0 = s a0 = a mm = [(x, Q[(s0, x)]) for x in env.actions()] action = max(mm, key=lambda x:x[1])[0] policy.set_max(s0, action) Pi = {} for i in range(grid_size**2): Pi[i] = policy.get_m(i) for t in env.get_t(): Pi[t] = 'ter' env.render(Pi)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: value_approximation.py Projeto: lkqy/reinforcement_learning

class Sarsa(object): def __init__(self, size=4): self.grid_size = size self.env = Env(self.grid_size) self.a_id = dict([(a, i) for i, a in enumerate(self.env.actions())]) self.policy = EspionGreedyPolicy(self.env.actions(), range(self.grid_size**2)) def get_f(self, s, a): f = range(self.grid_size**2 + 4) f[s], f[self.a_id[a]] = 1, 1 return f def sarsa(self): policy = self.policy Q = SGDRegressor() f = self.get_f(1, 'left') Q.fit([f], [1]) for i in range(500): s0 = self.env.init() if self.env.is_t(s0): continue a0 = policy.get_a(s0) while not self.env.is_t(s0): s, r = self.env.step(a0) a = policy.get_a(s) f0 = self.get_f(s0, a0) f = self.get_f(s, a) target = Q.predict([f0])[0] + 0.9 * ( r + 0.9 * Q.predict([f])[0] - Q.predict([f0])[0]) Q.partial_fit([f], [target]) s0 = s a0 = a mm = [(x, Q.predict([self.get_f(s0, x)])[0]) for x in self.env.actions()] action = max(mm, key=lambda x: x[1])[0] policy.set_max(s0, action) Pi = {} for i in range(self.grid_size**2): Pi[i] = policy.get_m(i) for t in self.env.get_t(): Pi[t] = 'ter' self.env.render(Pi)