Exemplo n.º 1
0
 def test_leaf_to_pomdp(self):
     h2p = HTMToPOMDP(2., 8., 5., ['A1'], end_reward=50.)
     task = HierarchicalTask(
         root=LeafCombination(CollaborativeAction('bottom left', 'A1')))
     p = h2p.task_to_pomdp(task)
     self.assertEqual(p.states, ['before-bottom-left', 'end'])
     self.assertEqual(p.actions, ['get-A1', 'ask-A1'])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start, np.array([1, 0.]))
     # checked manually:
     T = np.array([
         # get
         [[0., 1.], [1., 0.]],
         # ask
         [[1., 0.], [1., 0.]],
     ])
     np.testing.assert_allclose(T, p.T)
     O = np.array([
         # get
         [[0., 0., 0., 1.], [.9, 0., 0., .1]],
         # ask
         [[0., 1., 0., 0.], [0., 0., 1., 0.]],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = np.broadcast_to(
         np.array([
             [[-5., -8.], [50., 50.]],
             [[-2., -2.], [50., 50.]],
         ])[..., None], (2, 2, 2, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 2
0
 def test_alt_to_pomdp(self):
     # No probability of failure or human saying no here
     task = HierarchicalTask(root=AlternativeCombination([
         LeafCombination(CollaborativeAction('Bottom left', 'A1')),
         LeafCombination(CollaborativeAction('Top left', 'A2')),
     ],
                                                         name='Do all'))
     h2p = HTMToPOMDP(2., 8., 5., ['A1', 'A2'], end_reward=50.)
     p = h2p.task_to_pomdp(task)
     self.assertEqual(p.states,
                      ['before-bottom-left', 'before-top-left', 'end'])
     self.assertEqual(p.actions, ['get-A1', 'get-A2', 'ask-A1', 'ask-A2'])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start, np.array([.5, .5, 0]))
     # checked manually:
     T = np.array([
         # get A1
         [[0., 0., 1.], [0., 1., 0.], [.5, .5, 0.]],
         # get A2
         [[1., 0., 0.], [0., 0., 1.], [.5, .5, 0.]],
         # ask A1
         [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]],
         # ask A2
         [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]],
     ])
     np.testing.assert_allclose(T, p.T, atol=1.e-4)
     O = np.array([
         # get A1
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]],
         # get A2
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]],
         # ask A1
         [[0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.]],
         # ask A2
         [[0., 0., 1., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = np.broadcast_to(
         np.array([
             # get A1
             [[-5, -5, -8], [-5] * 3, [50, 50, 50]],
             # get A2
             [[-5.] * 3, [-5, -5, -8], [50, 50, 50]],
             # ask A1
             [[-2.] * 3, [-2.] * 3, [50, 50, 50]],
             # ask A2
             [[-2.] * 3, [-2.] * 3, [50, 50, 50]],
         ])[..., None],
         (4, 3, 3, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 3
0
 def test_end_reward_on_seq(self):
     h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.)
     task = HierarchicalTask(root=SequentialCombination([
         LeafCombination(
             CollaborativeAction('Do a', (3., 2., 5.),
                                 fail_probability=0.,
                                 no_probability=0.)),
         LeafCombination(
             CollaborativeAction('Do b', (2., 3., 4.),
                                 fail_probability=0.,
                                 no_probability=0.)),
     ],
                                                        name='Do all'))
     p = h2p.task_to_pomdp(task)
     self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
Exemplo n.º 4
0
from task_models.task import (HierarchicalTask, AbstractAction,
                              SequentialCombination, ParallelCombination,
                              LeafCombination)


take_base = LeafCombination(AbstractAction('Take base'))
mount_leg_combinations = [
    SequentialCombination(
        [LeafCombination(AbstractAction('Take leg {}'.format(i))),
         LeafCombination(AbstractAction('Attach leg {}'.format(i)))
         ],
        name='Mount leg {}'.format(i))
    for i in range(4)
    ]
mount_frame = SequentialCombination(
    [LeafCombination(AbstractAction('Take frame'), highlighted=True),
     LeafCombination(AbstractAction('Attach frame'))
     ],
    name='Mount frame')

chair_task = HierarchicalTask(
    root=SequentialCombination(
        [take_base,
         ParallelCombination(mount_leg_combinations, name='Mount legs'),
         mount_frame,
         ],
        name='Mount chair')
    )

print(json.dumps(chair_task.as_dictionary(), indent=2))
Exemplo n.º 5
0
 def _learner_to_htm(self):
     j = json.loads(get().text)
     return HierarchicalTask(build_htm_recursively(j['nodes']))
Exemplo n.º 6
0
 def test_end_reward(self):
     h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.)
     task = HierarchicalTask(
         root=LeafCombination(CollaborativeAction('Do it', (3., 2., 5.))))
     p = h2p.task_to_pomdp(task)
     self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
Exemplo n.º 7
0
 def test_alt_to_pomdp(self):
     # No probability of failure or human saying no here
     task = HierarchicalTask(root=AlternativeCombination([
         LeafCombination(
             CollaborativeAction('Do a', (3., 2., 5.),
                                 fail_probability=0.,
                                 no_probability=0.)),
         LeafCombination(
             CollaborativeAction('Do b', (2., 3., 4.),
                                 fail_probability=0.,
                                 no_probability=0.)),
     ],
                                                         name='Do either'))
     p = self.h2p.task_to_pomdp(task)
     self.assertEqual(p.states, [
         'init-do-a', 'H-do-a', 'R-do-a', 'init-do-b', 'H-do-b', 'R-do-b',
         'end'
     ])
     self.assertEqual(p.actions, [
         'wait', 'phy-do-a', 'com-ask-intention-do-a',
         'com-tell-intention-do-a', 'com-ask-finished-do-a', 'phy-do-b',
         'com-ask-intention-do-b', 'com-tell-intention-do-b',
         'com-ask-finished-do-b'
     ])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start,
                                   np.array([.5, 0, 0, .5, 0, 0, 0]))
     # checked manually:
     T = np.array([
         # Wait
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.71653, 0., 0., 0., 0., 0.28347],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.60653, 0., 0.39347],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Physical a
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.18888, 0., 0., 0., 0., 0.81112],
          [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.08208, 0., 0.91792],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask intention a
         [[0., 1., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Tell intention a
         [[0., 0., 1., 0., 0., 0., 0.],
          [0., 0.71653, 0., 0., 0., 0., 0.28347],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.60653, 0., 0.39347],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask finished a
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Physical b
         [[1., 0., 0., 0., 0., 0.,
           0.], [0., 0.2636, 0., 0., 0., 0., 0.7364],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.13534, 0., 0.86466],
          [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask intention b
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Tell intention b
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.71653, 0., 0., 0., 0., 0.28347],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0.],
          [0., 0., 0., 0., 0.60653, 0., 0.39347],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask finished b
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
     ])
     np.testing.assert_allclose(T, p.T, atol=1.e-4)
     O = np.array([
         # Wait
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Physical a
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
         # Ask intention a
         [[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Tell intention a
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Ask finished a
         [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 1., 0., 0.]],
         # Physical b
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
         # Ask intention
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.],
          [1., 0., 0., 0.]],
         # Tell intention
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Ask finished b
         [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 1., 0., 0.]],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = -np.broadcast_to(
         np.array([[1] * 6 + [0], [6, 6, 3, 6, 6, 6, 1],
                   [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1],
                   [5, 5, 5, 5, 5, 4, 1], [3] * 6 + [1], [2] * 6 + [1],
                   [3] * 6 + [1]])[:, :, None, None], (9, 7, 7, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 8
0
 def test_leaf_to_pomdp(self):
     # No probability of failure or human saying no here
     task = HierarchicalTask(root=LeafCombination(
         CollaborativeAction(
             'Do it', (3., 2.,
                       5.), fail_probability=0., no_probability=0.)))
     p = self.h2p.task_to_pomdp(task)
     self.assertEqual(p.states, ['init-do-it', 'H-do-it', 'R-do-it', 'end'])
     self.assertEqual(p.actions, [
         'wait', 'phy-do-it', 'com-ask-intention-do-it',
         'com-tell-intention-do-it', 'com-ask-finished-do-it'
     ])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start, np.array([1, 0., 0., 0.]))
     # checked manually:
     T = np.array([
         [[1., 0., 0., 0.], [0., 0.71653131, 0., 0.28346869],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
         [[1., 0., 0., 0.], [0., 0.1888756, 0., 0.8111244],
          [0., 0., 0., 1.], [0., 0., 0., 1.]],
         [[0., 1., 0., 0.], [0., 0.51341712, 0., 0.48658288],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
         [[0., 0., 1., 0.], [0., 0.71653131, 0., 0.28346869],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
         [[1., 0., 0., 0.], [0., 0.51341712, 0., 0.48658288],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
     ])
     np.testing.assert_allclose(T, p.T)
     O = np.array([
         # Wait
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Act
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
         # Ask intention
         [
             [1., 0., 0., 0.],  # not possible in T
             [0., 1., 0., 0.],
             [0., 0., 1., 0.],  # robot has told its int. to act
             [1., 0., 0., 0.]
         ],  # human acts while robot ask again
         #                    # TODO: change to Yes?
         # Tell intention
         [
             [1., 0., 0., 0.],  # not possible in T
             [1., 0., 0., 0.],  # TODO: No?
             [1., 0., 0., 0.],  # TODO: maybe H answers to R tell
             [1., 0., 0., 0.]
         ],  # TODO: answer?
         # Ask finished
         [
             [0., 0., 1., 0.],  # not started
             [0., 0., 1., 0.],
             [0., 0., 1., 0.],
             [0., 1., 0., 0.]
         ],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = -np.broadcast_to(
         np.array([
             [1, 1, 1, 0],
             [6, 6, 3, 1],
             [3, 3, 3, 1],
             [2, 2, 2, 1],
             [3, 3, 3, 1],
         ])[:, :, None, None], (5, 4, 4, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 9
0
    ],
                          name='Mount right leg'),
],
                                   name='Mount legs')
release_central = LeafCombination(
    CollaborativeAction('Release central frame', (INF, 1., 1.),
                        no_probability=.1))
mount_top = SequentialCombination([
    LeafCombination(CollaborativeAction('Get top', (INF, 20., 30.))),
    LeafCombination(
        CollaborativeAction('Snap top', (5., INF, INF), fail_probability=.1))
],
                                  name='Mount top')

chair_task = HierarchicalTask(root=SequentialCombination(
    [mount_central, mount_legs, release_central, mount_top],
    name='Mount chair'))

## Convert the task into a POMDP

h2p = HTMToPOMDP(T_WAIT,
                 T_COMM,
                 C_INTR,
                 end_reward=R_END,
                 loop=LOOP,
                 reward_state=False,
                 subtask_reward=R_SUBTASK)
p = h2p.task_to_pomdp(chair_task)
#p.discount = .99

gp = p.solve(method='grid', n_iterations=500, verbose=True)
Exemplo n.º 10
0
import json

from task_models.task import (HierarchicalTask, AbstractAction,
                              SequentialCombination, ParallelCombination,
                              LeafCombination)

take_base = LeafCombination(AbstractAction('Take base'))
mount_leg_combinations = [
    SequentialCombination([
        LeafCombination(AbstractAction('Take leg {}'.format(i))),
        LeafCombination(AbstractAction('Attach leg {}'.format(i)))
    ],
                          name='Mount leg {}'.format(i)) for i in range(4)
]
mount_frame = SequentialCombination([
    LeafCombination(AbstractAction('Take frame'), highlighted=True),
    LeafCombination(AbstractAction('Attach frame'))
],
                                    name='Mount frame')

chair_task = HierarchicalTask(root=SequentialCombination([
    take_base,
    ParallelCombination(mount_leg_combinations, name='Mount legs'),
    mount_frame,
],
                                                         name='Mount chair'))

print(json.dumps(chair_task.as_dictionary(), indent=2))
Exemplo n.º 11
0
 def task_from_orders(self, orders, names=None):
     return HierarchicalTask(
         root=self.alternative_combination_from_orders(orders, names=names)
         )