Exemplo n.º 1
0
    def test_out_of_bounds_num_action(self):

        with self.assertRaises(ValueError):
            RadialBasisFunction(self.means, self.gamma, 0)

        with self.assertRaises(ValueError):
            RadialBasisFunction(self.means, self.gamma, -1)
Exemplo n.º 2
0
    def setUp(self):

        self.means = [-np.ones((3, )), np.zeros((3, )), np.ones((3, ))]
        self.gamma = 1
        self.num_actions = 2
        self.basis = RadialBasisFunction(self.means, self.gamma,
                                         self.num_actions)
        self.state = np.zeros((3, ))
Exemplo n.º 3
0
def start_landing(init_position, target_position, weights):
    quad_domain = QuadcopterDomain()
    num_actions = quad_domain.num_actions()

    mean_bf = [np.random.uniform(0, 1, size=(6, ))]

    basis_func = RadialBasisFunction(mean_bf, 0.5, num_actions)
    quad_policy = QuadcopterPolicy(basis_func, weights=weights)

    gui_object = GUI(quad_domain.quad_dict)

    threshold_err = 0.01
    distance_err = distance(target_position, init_position)
    quad_domain.reset(np.array([1, 0, 4, 0, 0, 0]))

    time_limit = 15 * 60  # 15min

    time_elapsed = 0
    start_time = time.clock()
    while distance_err > threshold_err and time_elapsed <= time_limit:
        action = quad_policy.best_action(quad_domain.current_state())
        quad_domain.apply_action(action)
        new_position = quad_domain.quad.get_position(quad_domain.key)
        distance_err = distance(target_position, new_position)
        print(new_position)
        #for i in range(300):
        gui_object.quads['q1']['position'] = [
            new_position[0], new_position[1], new_position[2]
        ]
        gui_object.quads['q1'][
            'orientation'] = quad_domain.quad.get_orientation(quad_domain.key)
        gui_object.update()

        time_elapsed = time.clock() - start_time
Exemplo n.º 4
0
            quad_domain.set_target_state(target_list[c % 7])
            c += 1

    return sample_data



if __name__ == "__main__":


    quad_domain = QuadcopterDomain()
    num_actions = quad_domain.num_actions()
    #print(num_actions)
    mean_bf = [np.random.uniform(0,1, size = (6,))]
    #print(mean_bf, '************')
    basis_func = RadialBasisFunction(mean_bf, 0.5, num_actions)
    quad_policy = QuadcopterPolicy(basis_func)
    
    sample_data = collect_samples(quad_domain, quad_policy)
    print(sample_data[0])
    solver = LSTDQSolver()
    start = time.clock()
    new_policy = lspi.learn(sample_data, quad_policy, solver)
    print('Done!', (time.clock() - start))


    with open('weights.pickle', 'wb') as weights_file:
        pickle.dump(new_policy.weights, weights_file)
    

Exemplo n.º 5
0
 def test_out_of_bounds_gamma(self):
     with self.assertRaises(ValueError):
         RadialBasisFunction(self.means, 0, self.num_actions)
Exemplo n.º 6
0
 def test_mismatched_mean_shapes(self):
     with self.assertRaises(ValueError):
         RadialBasisFunction(
             [np.zeros((3, )), -np.ones(
                 (2, )), np.ones((3, ))], self.gamma, self.num_actions)
Exemplo n.º 7
0
 def test_empty_means_list(self):
     with self.assertRaises(ValueError):
         RadialBasisFunction([], self.gamma, self.num_actions)
Exemplo n.º 8
0
class TestRadialBasisFunction(TestCase):
    def setUp(self):

        self.means = [-np.ones((3, )), np.zeros((3, )), np.ones((3, ))]
        self.gamma = 1
        self.num_actions = 2
        self.basis = RadialBasisFunction(self.means, self.gamma,
                                         self.num_actions)
        self.state = np.zeros((3, ))

    def test_specify_means(self):

        for mean, expected_mean in zip(self.basis.means, self.means):
            np.testing.assert_array_almost_equal(mean, expected_mean)

    def test_empty_means_list(self):
        with self.assertRaises(ValueError):
            RadialBasisFunction([], self.gamma, self.num_actions)

    def test_mismatched_mean_shapes(self):
        with self.assertRaises(ValueError):
            RadialBasisFunction(
                [np.zeros((3, )), -np.ones(
                    (2, )), np.ones((3, ))], self.gamma, self.num_actions)

    def test_specify_gamma(self):
        self.assertAlmostEqual(self.gamma, self.basis.gamma)

    def test_out_of_bounds_gamma(self):
        with self.assertRaises(ValueError):
            RadialBasisFunction(self.means, 0, self.num_actions)

    def test_specify_actions(self):

        self.assertEqual(self.basis.num_actions, self.num_actions)

    def test_num_actions_setter(self):
        self.basis.num_actions = 10

        self.assertEqual(self.basis.num_actions, 10)

    def test_num_actions_setter_invalid_value(self):
        with self.assertRaises(ValueError):
            self.basis.num_actions = 0

    def test_out_of_bounds_num_action(self):

        with self.assertRaises(ValueError):
            RadialBasisFunction(self.means, self.gamma, 0)

        with self.assertRaises(ValueError):
            RadialBasisFunction(self.means, self.gamma, -1)

    def test_size(self):

        self.assertEqual(self.basis.size(), 8)

    def test_evaluate(self):

        phi = self.basis.evaluate(self.state, 0)
        self.assertEqual(phi.shape, (8, ))
        np.testing.assert_array_almost_equal(
            phi, np.array([1., 0.0498, 1., 0.0498, 0., 0., 0., 0.]), 4)

    def test_evaluate_out_of_bounds_action(self):

        with self.assertRaises(IndexError):
            self.basis.evaluate(self.state, 2)

        with self.assertRaises(IndexError):
            self.basis.evaluate(self.state, -1)

    def test_evaluate_incorrect_state_dimensions(self):

        with self.assertRaises(ValueError):
            self.basis.evaluate(np.zeros((2, )), 0)