Exemplo n.º 1
0
def createExperimentInstance():
    gymRawEnv = gym.make('MountainCarContinuous-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4)
    actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)
    #     print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues())

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer, actionDedigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    ## alpha -- learning rate (preference of new information)
    ## gamma -- discount factor (importance of future reward)

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states,
                             actionDedigitizer.states)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, doSingleExperiment)
    return experiment
Exemplo n.º 2
0
 def test_values_noEdges_5(self):
     digitizer = Digitizer.build(-1.0, 1.0, 5, False)
     npt.assert_array_almost_equal(digitizer.values, [-0.6, -0.3, 0.0, 0.3, 0.6], 3)
Exemplo n.º 3
0
 def test_build_edge_5(self):
     digitizer = Digitizer.build(0.0, 10.0, 5, True)
     self.assertEqual(digitizer.states, 5)
     npt.assert_array_almost_equal(digitizer.bins, [0., 3.333, 6.667, 10.], 3)
     npt.assert_array_almost_equal(digitizer.values, [0., 2.5, 5., 7.5, 10.], 3)
Exemplo n.º 4
0
 def test_values_edges_5(self):
     digitizer = Digitizer.build(-1.0, 1.0, 5, True)
     npt.assert_array_almost_equal(digitizer.values, [-1.0, -0.5, 0.0, 0.5, 1.0], 3)
Exemplo n.º 5
0
 def test_build_noEdge_5(self):
     digitizer = Digitizer.build(0.0, 10.0, 5, False)
     self.assertEqual(digitizer.states, 5)
     npt.assert_array_almost_equal(digitizer.bins, [2., 4., 6., 8.], 3)
     npt.assert_array_almost_equal(digitizer.values, [2., 3.5, 5., 6.5, 8.], 3)