def createExperimentInstance(): gymRawEnv = gym.make('MountainCarContinuous-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4) actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) # print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues()) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer, actionDedigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) # env.setCumulativeRewardMode() # create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information) ## gamma -- discount factor (importance of future reward) # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, actionDedigitizer.states) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) agent = createAgent(table) experiment = Experiment(task, agent) experiment = ProcessExperiment(experiment, doSingleExperiment) return experiment
def createExperimentInstance(): gymRawEnv = gym.make('MountainCar-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) # env.setCumulativeRewardMode() # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, env.numActions) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) agent = createAgent(table) experiment = Experiment(task, agent) experiment = ProcessExperiment(experiment, ExperimentIteration()) return experiment
## Gym observation: [position, velocity, pole angle, pole velocity] ## position: (-2.5, 2.5) ## velocity (-inf, inf) ## pole angle (-41.8, 41.8) ## pole vel: (-inf, inf) ## Reward: ## A reward of +1 is provided for every timestep that the pole remains upright. gymRawEnv = gym.make('CartPole-v1') # gymRawEnv = gym.make('CartPole-v0') ## env.tags['wrapper_config.TimeLimit.max_episode_steps'] = 500 cartPositionGroup = Digitizer.buildBins( -2.4, 2.4, 6) ## terminates when outside range (-2.4, 2.4) cartVelocityGroup = Digitizer.buildBins(-1.0, 1.0, 3) poleAngleGroup = Digitizer.buildBins( -12.0, 12.0, 2) ## terminates when outside range (-12, 12) poleVelocityGroup = Digitizer.buildBins(-4.0, 4.0, 4) print("Cart position bins:", cartPositionGroup) print("Cart velocity bins:", cartVelocityGroup) print("Pole angle bins:", poleAngleGroup) print("Pole velocity bins:", poleVelocityGroup) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup, poleAngleGroup, poleVelocityGroup]) transformation = EnvTransformation(observationDigitizer) task = GymTask.createTask(gymRawEnv)
## ============================================================================= ## Gym expected action: ## 0 -- left ## 1 -- neutral ## 2 -- right ## Gym observation: [position, velocity] ## position: (-1.2, 0.6) ## velocity (-0.07, 0.07) gymRawEnv = gym.make('MountainCar-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) observationDigitizer = ArrayDigitizer([cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) ## env.setCumulativeRewardMode() # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, env.numActions)
def test_buildBins_bins04_edges(self): bins = Digitizer.buildBins(0.0, 12.0, 4, True) npt.assert_equal(bins, [0.0, 6.0, 12.0])
def test_buildBins_bins03(self): bins = Digitizer.buildBins(0.0, 12.0, 3) npt.assert_equal(bins, [4.0, 8.0])
def test_buildBins_bins02(self): bins = Digitizer.buildBins(0.0, 12.0, 2) npt.assert_equal(bins, [6.0])