def createExperimentInstance(): gymRawEnv = gym.make('MountainCar-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) # env.setCumulativeRewardMode() # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, env.numActions) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) agent = createAgent(table) experiment = Experiment(task, agent) experiment = ProcessExperiment(experiment, ExperimentIteration()) return experiment
def createExperimentInstance(): gymRawEnv = gym.make('MountainCarContinuous-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4) actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) # print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues()) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer, actionDedigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) # env.setCumulativeRewardMode() # create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information) ## gamma -- discount factor (importance of future reward) # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, actionDedigitizer.states) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) agent = createAgent(table) experiment = Experiment(task, agent) experiment = ProcessExperiment(experiment, doSingleExperiment) return experiment
## Gym observation: [position, velocity, pole angle, pole velocity] ## position: (-2.5, 2.5) ## velocity (-inf, inf) ## pole angle (-41.8, 41.8) ## pole vel: (-inf, inf) ## Reward: ## A reward of +1 is provided for every timestep that the pole remains upright. gymRawEnv = gym.make('CartPole-v1') # gymRawEnv = gym.make('CartPole-v0') ## env.tags['wrapper_config.TimeLimit.max_episode_steps'] = 500 cartPositionGroup = Digitizer.buildBins( -2.4, 2.4, 6) ## terminates when outside range (-2.4, 2.4) cartVelocityGroup = Digitizer.buildBins(-1.0, 1.0, 3) poleAngleGroup = Digitizer.buildBins( -12.0, 12.0, 2) ## terminates when outside range (-12, 12) poleVelocityGroup = Digitizer.buildBins(-4.0, 4.0, 4) print("Cart position bins:", cartPositionGroup) print("Cart velocity bins:", cartVelocityGroup) print("Pole angle bins:", poleAngleGroup) print("Pole velocity bins:", poleVelocityGroup) observationDigitizer = ArrayDigitizer( [cartPositionGroup, cartVelocityGroup, poleAngleGroup, poleVelocityGroup]) transformation = EnvTransformation(observationDigitizer) task = GymTask.createTask(gymRawEnv)
def test_values_noEdges_5(self): digitizer = Digitizer.build(-1.0, 1.0, 5, False) npt.assert_array_almost_equal(digitizer.values, [-0.6, -0.3, 0.0, 0.3, 0.6], 3)
## ============================================================================= ## Gym expected action: ## 0 -- left ## 1 -- neutral ## 2 -- right ## Gym observation: [position, velocity] ## position: (-1.2, 0.6) ## velocity (-0.07, 0.07) gymRawEnv = gym.make('MountainCar-v0') cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16) cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16) # print("Cart position bins:", cartPositionGroup) # print("Cart velocity bins:", cartVelocityGroup) observationDigitizer = ArrayDigitizer([cartPositionGroup, cartVelocityGroup]) transformation = EnvTransformation(observationDigitizer) task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) ## env.setCumulativeRewardMode() # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, env.numActions)
def test_value_states(self): digitizer = Digitizer( [0.0, 2.0] ) value = digitizer.value( digitizer.states - 1 ) npt.assert_array_almost_equal(value, 2.0, 2)
def test_values_edges_5(self): digitizer = Digitizer.build(-1.0, 1.0, 5, True) npt.assert_array_almost_equal(digitizer.values, [-1.0, -0.5, 0.0, 0.5, 1.0], 3)
def test_value_badInput_positive(self): digitizer = Digitizer( [0.0, 2.0] ) self.assertRaises( ValueError, digitizer.value, 3 )
def test_value_2(self): digitizer = Digitizer( [0.0, 2.0] ) value = digitizer.value(2) npt.assert_array_almost_equal(value, 2.0, 2)
def test_numstates(self): digitizer = Digitizer([0.0]) states = digitizer.numstates() self.assertEqual(states, 2)
def test_state(self): digitizer = Digitizer( [0.0] ) indexes = digitizer.digitize( [-1.0, 1.0] ) npt.assert_equal(indexes, [0, 1])
def test_build_edge_5(self): digitizer = Digitizer.build(0.0, 10.0, 5, True) self.assertEqual(digitizer.states, 5) npt.assert_array_almost_equal(digitizer.bins, [0., 3.333, 6.667, 10.], 3) npt.assert_array_almost_equal(digitizer.values, [0., 2.5, 5., 7.5, 10.], 3)
def test_build_noEdge_5(self): digitizer = Digitizer.build(0.0, 10.0, 5, False) self.assertEqual(digitizer.states, 5) npt.assert_array_almost_equal(digitizer.bins, [2., 4., 6., 8.], 3) npt.assert_array_almost_equal(digitizer.values, [2., 3.5, 5., 6.5, 8.], 3)
def test_buildBins_bins04_edges(self): bins = Digitizer.buildBins(0.0, 12.0, 4, True) npt.assert_equal(bins, [0.0, 6.0, 12.0])
def test_buildBins_bins03(self): bins = Digitizer.buildBins(0.0, 12.0, 3) npt.assert_equal(bins, [4.0, 8.0])
def test_buildBins_bins02(self): bins = Digitizer.buildBins(0.0, 12.0, 2) npt.assert_equal(bins, [6.0])