def test_leaf_to_pomdp(self): h2p = HTMToPOMDP(2., 8., 5., ['A1'], end_reward=50.) task = HierarchicalTask( root=LeafCombination(CollaborativeAction('bottom left', 'A1'))) p = h2p.task_to_pomdp(task) self.assertEqual(p.states, ['before-bottom-left', 'end']) self.assertEqual(p.actions, ['get-A1', 'ask-A1']) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([1, 0.])) # checked manually: T = np.array([ # get [[0., 1.], [1., 0.]], # ask [[1., 0.], [1., 0.]], ]) np.testing.assert_allclose(T, p.T) O = np.array([ # get [[0., 0., 0., 1.], [.9, 0., 0., .1]], # ask [[0., 1., 0., 0.], [0., 0., 1., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = np.broadcast_to( np.array([ [[-5., -8.], [50., 50.]], [[-2., -2.], [50., 50.]], ])[..., None], (2, 2, 2, 4)) np.testing.assert_array_equal(R, p.R)
def test_alt_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=AlternativeCombination([ LeafCombination(CollaborativeAction('Bottom left', 'A1')), LeafCombination(CollaborativeAction('Top left', 'A2')), ], name='Do all')) h2p = HTMToPOMDP(2., 8., 5., ['A1', 'A2'], end_reward=50.) p = h2p.task_to_pomdp(task) self.assertEqual(p.states, ['before-bottom-left', 'before-top-left', 'end']) self.assertEqual(p.actions, ['get-A1', 'get-A2', 'ask-A1', 'ask-A2']) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([.5, .5, 0])) # checked manually: T = np.array([ # get A1 [[0., 0., 1.], [0., 1., 0.], [.5, .5, 0.]], # get A2 [[1., 0., 0.], [0., 0., 1.], [.5, .5, 0.]], # ask A1 [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]], # ask A2 [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]], ]) np.testing.assert_allclose(T, p.T, atol=1.e-4) O = np.array([ # get A1 [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]], # get A2 [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]], # ask A1 [[0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.]], # ask A2 [[0., 0., 1., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = np.broadcast_to( np.array([ # get A1 [[-5, -5, -8], [-5] * 3, [50, 50, 50]], # get A2 [[-5.] * 3, [-5, -5, -8], [50, 50, 50]], # ask A1 [[-2.] * 3, [-2.] * 3, [50, 50, 50]], # ask A2 [[-2.] * 3, [-2.] * 3, [50, 50, 50]], ])[..., None], (4, 3, 3, 4)) np.testing.assert_array_equal(R, p.R)
def test_end_reward_on_seq(self): h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.) task = HierarchicalTask(root=SequentialCombination([ LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), fail_probability=0., no_probability=0.)), LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), fail_probability=0., no_probability=0.)), ], name='Do all')) p = h2p.task_to_pomdp(task) self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
from task_models.task import (HierarchicalTask, AbstractAction, SequentialCombination, ParallelCombination, LeafCombination) take_base = LeafCombination(AbstractAction('Take base')) mount_leg_combinations = [ SequentialCombination( [LeafCombination(AbstractAction('Take leg {}'.format(i))), LeafCombination(AbstractAction('Attach leg {}'.format(i))) ], name='Mount leg {}'.format(i)) for i in range(4) ] mount_frame = SequentialCombination( [LeafCombination(AbstractAction('Take frame'), highlighted=True), LeafCombination(AbstractAction('Attach frame')) ], name='Mount frame') chair_task = HierarchicalTask( root=SequentialCombination( [take_base, ParallelCombination(mount_leg_combinations, name='Mount legs'), mount_frame, ], name='Mount chair') ) print(json.dumps(chair_task.as_dictionary(), indent=2))
def _learner_to_htm(self): j = json.loads(get().text) return HierarchicalTask(build_htm_recursively(j['nodes']))
def test_end_reward(self): h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.) task = HierarchicalTask( root=LeafCombination(CollaborativeAction('Do it', (3., 2., 5.)))) p = h2p.task_to_pomdp(task) self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
def test_alt_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=AlternativeCombination([ LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), fail_probability=0., no_probability=0.)), LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), fail_probability=0., no_probability=0.)), ], name='Do either')) p = self.h2p.task_to_pomdp(task) self.assertEqual(p.states, [ 'init-do-a', 'H-do-a', 'R-do-a', 'init-do-b', 'H-do-b', 'R-do-b', 'end' ]) self.assertEqual(p.actions, [ 'wait', 'phy-do-a', 'com-ask-intention-do-a', 'com-tell-intention-do-a', 'com-ask-finished-do-a', 'phy-do-b', 'com-ask-intention-do-b', 'com-tell-intention-do-b', 'com-ask-finished-do-b' ]) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([.5, 0, 0, .5, 0, 0, 0])) # checked manually: T = np.array([ # Wait [[1., 0., 0., 0., 0., 0., 0.], [0., 0.71653, 0., 0., 0., 0., 0.28347], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Physical a [[1., 0., 0., 0., 0., 0., 0.], [0., 0.18888, 0., 0., 0., 0., 0.81112], [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.08208, 0., 0.91792], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask intention a [[0., 1., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Tell intention a [[0., 0., 1., 0., 0., 0., 0.], [0., 0.71653, 0., 0., 0., 0., 0.28347], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask finished a [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Physical b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.2636, 0., 0., 0., 0., 0.7364], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.13534, 0., 0.86466], [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 1.]], # Ask intention b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Tell intention b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.71653, 0., 0., 0., 0., 0.28347], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask finished b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0., 0., 0., 0.48658], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], ]) np.testing.assert_allclose(T, p.T, atol=1.e-4) O = np.array([ # Wait [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Physical a [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention a [[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Tell intention a [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Ask finished a [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.]], # Physical b [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.]], # Tell intention [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Ask finished b [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = -np.broadcast_to( np.array([[1] * 6 + [0], [6, 6, 3, 6, 6, 6, 1], [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1], [5, 5, 5, 5, 5, 4, 1], [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1]])[:, :, None, None], (9, 7, 7, 4)) np.testing.assert_array_equal(R, p.R)
def test_leaf_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=LeafCombination( CollaborativeAction( 'Do it', (3., 2., 5.), fail_probability=0., no_probability=0.))) p = self.h2p.task_to_pomdp(task) self.assertEqual(p.states, ['init-do-it', 'H-do-it', 'R-do-it', 'end']) self.assertEqual(p.actions, [ 'wait', 'phy-do-it', 'com-ask-intention-do-it', 'com-tell-intention-do-it', 'com-ask-finished-do-it' ]) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([1, 0., 0., 0.])) # checked manually: T = np.array([ [[1., 0., 0., 0.], [0., 0.71653131, 0., 0.28346869], [0., 0., 1., 0.], [0., 0., 0., 1.]], [[1., 0., 0., 0.], [0., 0.1888756, 0., 0.8111244], [0., 0., 0., 1.], [0., 0., 0., 1.]], [[0., 1., 0., 0.], [0., 0.51341712, 0., 0.48658288], [0., 0., 1., 0.], [0., 0., 0., 1.]], [[0., 0., 1., 0.], [0., 0.71653131, 0., 0.28346869], [0., 0., 1., 0.], [0., 0., 0., 1.]], [[1., 0., 0., 0.], [0., 0.51341712, 0., 0.48658288], [0., 0., 1., 0.], [0., 0., 0., 1.]], ]) np.testing.assert_allclose(T, p.T) O = np.array([ # Wait [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Act [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention [ [1., 0., 0., 0.], # not possible in T [0., 1., 0., 0.], [0., 0., 1., 0.], # robot has told its int. to act [1., 0., 0., 0.] ], # human acts while robot ask again # # TODO: change to Yes? # Tell intention [ [1., 0., 0., 0.], # not possible in T [1., 0., 0., 0.], # TODO: No? [1., 0., 0., 0.], # TODO: maybe H answers to R tell [1., 0., 0., 0.] ], # TODO: answer? # Ask finished [ [0., 0., 1., 0.], # not started [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.] ], ]) np.testing.assert_array_equal(O, p.O) R = -np.broadcast_to( np.array([ [1, 1, 1, 0], [6, 6, 3, 1], [3, 3, 3, 1], [2, 2, 2, 1], [3, 3, 3, 1], ])[:, :, None, None], (5, 4, 4, 4)) np.testing.assert_array_equal(R, p.R)
], name='Mount right leg'), ], name='Mount legs') release_central = LeafCombination( CollaborativeAction('Release central frame', (INF, 1., 1.), no_probability=.1)) mount_top = SequentialCombination([ LeafCombination(CollaborativeAction('Get top', (INF, 20., 30.))), LeafCombination( CollaborativeAction('Snap top', (5., INF, INF), fail_probability=.1)) ], name='Mount top') chair_task = HierarchicalTask(root=SequentialCombination( [mount_central, mount_legs, release_central, mount_top], name='Mount chair')) ## Convert the task into a POMDP h2p = HTMToPOMDP(T_WAIT, T_COMM, C_INTR, end_reward=R_END, loop=LOOP, reward_state=False, subtask_reward=R_SUBTASK) p = h2p.task_to_pomdp(chair_task) #p.discount = .99 gp = p.solve(method='grid', n_iterations=500, verbose=True)
import json from task_models.task import (HierarchicalTask, AbstractAction, SequentialCombination, ParallelCombination, LeafCombination) take_base = LeafCombination(AbstractAction('Take base')) mount_leg_combinations = [ SequentialCombination([ LeafCombination(AbstractAction('Take leg {}'.format(i))), LeafCombination(AbstractAction('Attach leg {}'.format(i))) ], name='Mount leg {}'.format(i)) for i in range(4) ] mount_frame = SequentialCombination([ LeafCombination(AbstractAction('Take frame'), highlighted=True), LeafCombination(AbstractAction('Attach frame')) ], name='Mount frame') chair_task = HierarchicalTask(root=SequentialCombination([ take_base, ParallelCombination(mount_leg_combinations, name='Mount legs'), mount_frame, ], name='Mount chair')) print(json.dumps(chair_task.as_dictionary(), indent=2))
def task_from_orders(self, orders, names=None): return HierarchicalTask( root=self.alternative_combination_from_orders(orders, names=names) )