def createExperimentInstance():
    gymRawEnv = gym.make('MountainCar-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states, env.numActions)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, ExperimentIteration())
    return experiment
示例#2
0
def createExperimentInstance():
    gymRawEnv = gym.make('MountainCarContinuous-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4)
    actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)
    #     print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues())

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer, actionDedigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    ## alpha -- learning rate (preference of new information)
    ## gamma -- discount factor (importance of future reward)

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states,
                             actionDedigitizer.states)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, doSingleExperiment)
    return experiment
示例#3
0
## Gym observation: [position, velocity, pole angle, pole velocity]
##        position: (-2.5, 2.5)
##        velocity (-inf, inf)
##        pole angle (-41.8, 41.8)
##        pole vel: (-inf, inf)

## Reward:
##        A reward of +1 is provided for every timestep that the pole remains upright.

gymRawEnv = gym.make('CartPole-v1')
# gymRawEnv = gym.make('CartPole-v0')

## env.tags['wrapper_config.TimeLimit.max_episode_steps'] = 500

cartPositionGroup = Digitizer.buildBins(
    -2.4, 2.4, 6)  ## terminates when outside range (-2.4, 2.4)
cartVelocityGroup = Digitizer.buildBins(-1.0, 1.0, 3)
poleAngleGroup = Digitizer.buildBins(
    -12.0, 12.0, 2)  ## terminates when outside range (-12, 12)
poleVelocityGroup = Digitizer.buildBins(-4.0, 4.0, 4)

print("Cart position bins:", cartPositionGroup)
print("Cart velocity bins:", cartVelocityGroup)
print("Pole angle bins:", poleAngleGroup)
print("Pole velocity bins:", poleVelocityGroup)

observationDigitizer = ArrayDigitizer(
    [cartPositionGroup, cartVelocityGroup, poleAngleGroup, poleVelocityGroup])
transformation = EnvTransformation(observationDigitizer)

task = GymTask.createTask(gymRawEnv)
示例#4
0
 def test_values_noEdges_5(self):
     digitizer = Digitizer.build(-1.0, 1.0, 5, False)
     npt.assert_array_almost_equal(digitizer.values, [-0.6, -0.3, 0.0, 0.3, 0.6], 3)
示例#5
0

## =============================================================================

## Gym expected action:
##   0 -- left
##   1 -- neutral
##   2 -- right

## Gym observation: [position, velocity]
##        position: (-1.2, 0.6)
##        velocity (-0.07, 0.07)

gymRawEnv = gym.make('MountainCar-v0')

cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16)

# print("Cart position bins:", cartPositionGroup)
# print("Cart velocity bins:", cartVelocityGroup)

observationDigitizer = ArrayDigitizer([cartPositionGroup, cartVelocityGroup])
transformation = EnvTransformation(observationDigitizer)

task = GymTask.createTask(gymRawEnv)
env = task.env
env.setTransformation(transformation)
## env.setCumulativeRewardMode()

# create value table and initialize with ones
table = ActionValueTable(observationDigitizer.states, env.numActions)
示例#6
0
 def test_value_states(self):
     digitizer = Digitizer( [0.0, 2.0] )
     value = digitizer.value( digitizer.states - 1 )
     npt.assert_array_almost_equal(value, 2.0, 2)
示例#7
0
 def test_values_edges_5(self):
     digitizer = Digitizer.build(-1.0, 1.0, 5, True)
     npt.assert_array_almost_equal(digitizer.values, [-1.0, -0.5, 0.0, 0.5, 1.0], 3)
示例#8
0
 def test_value_badInput_positive(self):
     digitizer = Digitizer( [0.0, 2.0] )
     self.assertRaises( ValueError, digitizer.value, 3 )
示例#9
0
 def test_value_2(self):
     digitizer = Digitizer( [0.0, 2.0] )
     value = digitizer.value(2)
     npt.assert_array_almost_equal(value, 2.0, 2)
示例#10
0
 def test_numstates(self):
     digitizer = Digitizer([0.0])
     states = digitizer.numstates()
     self.assertEqual(states, 2)
示例#11
0
 def test_state(self):
     digitizer = Digitizer( [0.0] )
     indexes = digitizer.digitize( [-1.0, 1.0] )
     npt.assert_equal(indexes, [0, 1])
示例#12
0
 def test_build_edge_5(self):
     digitizer = Digitizer.build(0.0, 10.0, 5, True)
     self.assertEqual(digitizer.states, 5)
     npt.assert_array_almost_equal(digitizer.bins, [0., 3.333, 6.667, 10.], 3)
     npt.assert_array_almost_equal(digitizer.values, [0., 2.5, 5., 7.5, 10.], 3)
示例#13
0
 def test_build_noEdge_5(self):
     digitizer = Digitizer.build(0.0, 10.0, 5, False)
     self.assertEqual(digitizer.states, 5)
     npt.assert_array_almost_equal(digitizer.bins, [2., 4., 6., 8.], 3)
     npt.assert_array_almost_equal(digitizer.values, [2., 3.5, 5., 6.5, 8.], 3)
示例#14
0
 def test_buildBins_bins04_edges(self):
     bins = Digitizer.buildBins(0.0, 12.0, 4, True)
     npt.assert_equal(bins, [0.0, 6.0, 12.0])
示例#15
0
 def test_buildBins_bins03(self):
     bins = Digitizer.buildBins(0.0, 12.0, 3)
     npt.assert_equal(bins, [4.0, 8.0])
示例#16
0
 def test_buildBins_bins02(self):
     bins = Digitizer.buildBins(0.0, 12.0, 2)
     npt.assert_equal(bins, [6.0])