Exemplo n.º 1
0
def test_physicality_hanging():
    """
    Test that energy does not spontaneously enter system
    """
    # Apply a bunch of non-force actions, ensure that monotonically accelerate

    #     LEFT_FORCE = 0, RIGHT_FORCE = 1
    LEFT_FORCE = 0
    NO_FORCE = 1
    RIGHT_FORCE = 2
    domain = FiniteCartPoleBalanceModern()
    domain.force_noise_max = 0  # no stochasticity in applied force
    domain.ANGLE_LIMITS = [-np.pi, np.pi]  # We actually want to test hanging
    # Positive angle (right)
    s = np.array([179.6 * np.pi / 180.0, 0.0, -2.0,
                  0.0])  # pendulum hanging down
    domain.state = s.copy()

    for i in np.arange(5):  # do for 5 steps and ensure works
        domain.step(NO_FORCE)
        #         assert np.abs(domain.state[0]) <=179.5 # angle does not increase
        assert np.abs(domain.state[1]) <= 0.1  # angular rate does not increase
        # no energy should enter or leave system under no force action
        assert np.abs(
            _cartPoleEnergy(domain, s) -
            _cartPoleEnergy(domain, domain.state)) < 0.01
        s = domain.state

    # Ensure that running out of x bounds causes experiment to terminate
    assert domain.isTerminal(s=np.array([0.0, 0.0, 2.5, 0.0]))
    assert domain.isTerminal(s=np.array([0.0, 0.0, -2.5, 0.0]))
Exemplo n.º 2
0
def test_physicality_hanging():
    """
    Test that energy does not spontaneously enter system
    """
    # Apply a bunch of non-force actions, ensure that monotonically accelerate

#     LEFT_FORCE = 0, RIGHT_FORCE = 1
    LEFT_FORCE = 0
    NO_FORCE = 1
    RIGHT_FORCE = 2
    domain = FiniteCartPoleBalanceModern()
    domain.force_noise_max = 0 # no stochasticity in applied force
    domain.ANGLE_LIMITS = [-np.pi, np.pi] # We actually want to test hanging
    # Positive angle (right)
    s = np.array([179.6 * np.pi/180.0, 0.0, -2.0, 0.0]) # pendulum hanging down
    domain.state = s.copy()
    
    for i in np.arange(5): # do for 5 steps and ensure works
        domain.step(NO_FORCE)
#         assert np.abs(domain.state[0]) <=179.5 # angle does not increase
        assert np.abs(domain.state[1]) <= 0.1 # angular rate does not increase
        # no energy should enter or leave system under no force action
        assert np.abs(_cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01
        s = domain.state
    
    # Ensure that running out of x bounds causes experiment to terminate
    assert domain.isTerminal(s=np.array([0.0, 0.0, 2.5, 0.0]))
    assert domain.isTerminal(s=np.array([0.0, 0.0, -2.5, 0.0]))
Exemplo n.º 3
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=2120,
                    initial_learn_rate=.26,
                    lambda_=0.9,
                    resolution=8,
                    num_rbfs=4958):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = FiniteCartPoleBalanceModern()
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 4
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=.21,
                    boyan_N0=200.,
                    initial_learn_rate=.1,
                    kernel_resolution=13.14):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    active_threshold = 0.01
    max_base_feat_sim = 0.5
    sparsify = 1

    domain = FiniteCartPoleBalanceModern()
    opt["domain"] = domain
    kernel_width = old_div(
        (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]),
        kernel_resolution)

    representation = KernelizediFDD(domain,
                                    sparsify=sparsify,
                                    kernel=gaussian_kernel,
                                    kernel_args=[kernel_width],
                                    active_threshold=active_threshold,
                                    discover_threshold=discover_threshold,
                                    normalization=True,
                                    max_active_base_feat=10,
                                    max_base_feat_sim=max_base_feat_sim)
    policy = eGreedy(representation, epsilon=0.)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemplo n.º 5
0
def test_physicality():
    """
    Test coordinate system [vertical up is 0]
        1) gravity acts in proper direction based on origin
        2) force actions behave as expected in that frame
    """
    # Apply a bunch of non-force actions, ensure that monotonically accelerate

    LEFT_FORCE = 0
    NO_FORCE = 1
    RIGHT_FORCE = 2
    domain = FiniteCartPoleBalanceModern()
    domain.force_noise_max = 0  # no stochasticity in applied force

    domain.int_type = 'rk4'

    # Slightly positive angle, just right of vertical up
    s = np.array([10.0 * np.pi / 180.0, 0.0, 0.0,
                  0.0])  # pendulum slightly right
    domain.state = s

    for i in np.arange(5):  # do for 5 steps and ensure works
        domain.step(NO_FORCE)
        assert np.all(
            domain.state[0:2] > s[0:2])  # angle and angular velocity increase
        # no energy should enter or leave system under no force action
        assert np.abs(
            _cartPoleEnergy(domain, s) -
            _cartPoleEnergy(domain, domain.state)) < 0.01

        s = domain.state

    # Negative angle (left)
    s = np.array([-10.0 * np.pi / 180.0, 0.0, 0.0,
                  0.0])  # pendulum slightly right
    domain.state = s

    for i in np.arange(5):  # do for 5 steps and ensure works
        domain.step(NO_FORCE)
        assert np.all(
            domain.state[0:2] < s[0:2])  # angle and angular velocity increase
        # no energy should enter or leave system under no force action
        assert np.abs(
            _cartPoleEnergy(domain, s) -
            _cartPoleEnergy(domain, domain.state)) < 0.01
        s = domain.state

    # Start vertical, ensure that force increases angular velocity in direction
    # Negative force on cart, yielding positive rotation
    s = np.array([0.0, 0.0, 0.0, 0.0])
    domain.state = s

    for i in np.arange(5):  # do for 5 steps and ensure works
        domain.step(LEFT_FORCE)
        assert np.all(
            domain.state[0:2] > s[0:2])  # angle and angular velocity increase
        s = domain.state

    # Positive force on cart, yielding negative rotation
    s = np.array([0.0, 0.0, 0.0, 0.0])
    domain.state = s

    for i in np.arange(5):  # do for 5 steps and ensure works
        domain.step(RIGHT_FORCE)
        assert np.all(
            domain.state[0:2] < s[0:2])  # angle and angular velocity increase
        s = domain.state
Exemplo n.º 6
0
def test_physicality():
    """
    Test coordinate system [vertical up is 0]
        1) gravity acts in proper direction based on origin
        2) force actions behave as expected in that frame
    """
    # Apply a bunch of non-force actions, ensure that monotonically accelerate

    LEFT_FORCE = 0
    NO_FORCE = 1
    RIGHT_FORCE = 2
    domain = FiniteCartPoleBalanceModern()
    domain.force_noise_max = 0 # no stochasticity in applied force
    
    domain.int_type = 'rk4'
    
    # Slightly positive angle, just right of vertical up
    s = np.array([10.0 * np.pi/180.0, 0.0, 0.0, 0.0]) # pendulum slightly right
    domain.state = s
    
    for i in np.arange(5): # do for 5 steps and ensure works
        domain.step(NO_FORCE)
        assert np.all(domain.state[0:2] > s[0:2]) # angle and angular velocity increase
        # no energy should enter or leave system under no force action
        assert np.abs(_cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01

        s = domain.state
    
    # Negative angle (left)
    s = np.array([-10.0 * np.pi/180.0, 0.0, 0.0, 0.0]) # pendulum slightly right
    domain.state = s
    
    for i in np.arange(5): # do for 5 steps and ensure works
        domain.step(NO_FORCE)
        assert np.all(domain.state[0:2] < s[0:2]) # angle and angular velocity increase
        # no energy should enter or leave system under no force action
        assert np.abs(_cartPoleEnergy(domain, s) - _cartPoleEnergy(domain, domain.state)) < 0.01
        s = domain.state
    
    
    # Start vertical, ensure that force increases angular velocity in direction
    # Negative force on cart, yielding positive rotation
    s = np.array([0.0, 0.0, 0.0, 0.0])
    domain.state = s
    
    for i in np.arange(5): # do for 5 steps and ensure works
        domain.step(LEFT_FORCE)
        assert np.all(domain.state[0:2] > s[0:2]) # angle and angular velocity increase
        s = domain.state
        
    # Positive force on cart, yielding negative rotation
    s = np.array([0.0, 0.0, 0.0, 0.0])
    domain.state = s
    
    for i in np.arange(5): # do for 5 steps and ensure works
        domain.step(RIGHT_FORCE)
        assert np.all(domain.state[0:2] < s[0:2]) # angle and angular velocity increase
        s = domain.state