Exemplo n.º 1
0
import gym
import Agent
import random
import numpy as np


def RandomActionDiscrete(ActionProp):
    ActionSize = len(ActionProp)
    Action = np.random.choice(np.arange(ActionSize), p=ActionProp)
    ActionOut = np.zeros(ActionSize)
    ActionOut[Action] = 1
    return Action, ActionOut


env = gym.make("CartPole-v1")
Ag = Agent.ACPGAg(2, 4)

EpisodeTime = 200
CurrentEpisode = 0
DoneTimes = 0
while True:
    Observation = env.reset()
    O_array = []
    R_array = []
    A_array = []
    AccReward = 0
    CurrentEpisode += 1
    if CurrentEpisode % 50 == 0:
        print(DoneTimes)
        DoneTimes = 0
    for t in range(EpisodeTime):
Exemplo n.º 2
0
import random
import numpy as np

def RandomActionDiscrete(ActionProp):
    ActionSize = len(ActionProp)
    ActionOut = np.zeros(ActionSize)
    rand = random.uniform(0,1)
    for ActionIndex in range(ActionSize):
        if rand < ActionProp[ActionIndex]:
            ActionOut[ActionIndex] = 1
            return ActionIndex, ActionOut
    ActionOut[ActionSize-1] = 1
    return ActionSize-1, ActionOut

env = gym.make("Acrobot-v1")
Ag = Agent.ACPGAg(3, 6)

CurrentEpisode = 0
while True:
    Observation = env.reset()
    O_array = []
    R_array = []
    A_array = []
    CurrentEpisode += 1
    rememberTimeStep = 0
    StepTime = 0
    score = 0
    while True:
        StepTime += 1
        if CurrentEpisode%100 == 0:
            env.render()