Exemplos de Sarsa.seed em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: sarsa

Classe / Tipo: Sarsa

Método / Função: seed

Exemplos em hotexamples.com: 3

Sarsa.seed em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de sarsa.Sarsa.seed em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

Sarsa(16)

on_policy_td_control(2)

train(2)

seed(2)

run(2)

iterate(2)

learn(2)

sarsa(1)

q(1)

learnQ(1)

plot_avg_cost(1)

choose_action(1)

get_actions(1)

eval(1)

estimate(1)

eGreedyAction(1)

update_q(1)

Métodos Frequentes

Sarsa (16)

on_policy_td_control (2)

train (2)

seed (2)

run (2)

iterate (2)

learn (2)

sarsa (1)

q (1)

learnQ (1)

Métodos Frequentes

plot_avg_cost (1)

choose_action (1)

get_actions (1)

eval (1)

estimate (1)

eGreedyAction (1)

update_q (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: figures.py Projeto: YeFeiyangx/grownup_share

def example_6_6(): fig, ax = plt.subplots() fig.suptitle(f'Example 6.6 (Averaged over {EX_6_6_N_SEEDS} seeds)') ax.set_xlabel('Episodes') ax.set_ylabel( f'(Average of last {EX_6_6_N_AVG}) sum of rewards during episodes') ax.set_yticks(EX_6_6_YTICKS) ax.set_ylim(bottom=min(EX_6_6_YTICKS)) n_ep = EX_6_6_N_EPS env = TheCliff() qlearning_alg = QLearning(env, step_size=EX_6_5_STEP_SIZE, gamma=UNDISCOUNTED, eps=EX_6_5_EPS) sarsa_alg = Sarsa(env, step_size=EX_6_5_STEP_SIZE, gamma=UNDISCOUNTED, eps=EX_6_5_EPS) qlearning_rew = np.zeros(n_ep) sarsa_rew = np.zeros(n_ep) for seed in range(EX_6_6_N_SEEDS): print(f"seed={seed}") qlearning_alg.seed(seed) qlearning_rew += qlearning_alg.q_learning(n_ep) sarsa_alg.seed(seed) sarsa_rew += sarsa_alg.on_policy_td_control(n_ep, rews=True) plt.plot(smooth_rewards(qlearning_rew / EX_6_6_N_SEEDS, EX_6_6_N_AVG), color='r', label='Q learning') plt.plot(smooth_rewards(sarsa_rew / EX_6_6_N_SEEDS, EX_6_6_N_AVG), color='b', label='Sarsa') plt.legend() plt.savefig('example6.6.png') plt.show()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: figures.py Projeto: YeFeiyangx/grownup_share

def plot_sarsa(ax, n_ep, label=None, diags=False, stay=False, stoch=False, seed=0): env = WindyGridworld(diags, stay, stoch) alg = Sarsa(env, step_size=EX_6_5_STEP_SIZE, gamma=UNDISCOUNTED, eps=EX_6_5_EPS) alg.seed(seed) kwargs = {"label": label} if label else {} plt.plot(alg.on_policy_td_control(n_ep), **kwargs)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: project.py Projeto: jorgeecardona/neurothesis

# Randomly locate the food on the barn. amount_food = randint(max_size / 2, 2 * max_size) food = [] while len(food) < amount_food: # Add a new piece of food. food.append((randint(0, max_size-1), randint(0, max_size-1))) # Ensure uniqueness. food = list(set(food)) # Start the algorithm. sarsa = Sarsa(BarnState((0,0), food, max_size), epsilon=epsilon, alpha=alpha, gamma=gamma) sarsa.seed(int(100 * time.time())) # keep track of how much do we move the q. track = [] for it in range(1, max_iters + 1): if it % 10 == 0: print "Scenario %d: %d/%d\r" % (n, it, max_iters) , sys.stdout.flush() history, corrections = sarsa.iterate() track.append(numpy.sqrt(sum(map(lambda x: x*x, corrections)))) # We're just selecting nice places to evaluate the current policy and create a picture. if (it % 10 ** int(log10(it)) == 0) and (it / 10 ** int(log10(it)) in [1, 2, 4, 8]):