Exemplo n.º 1
0
 def setUp(self):
     l1 = LeafCombination(
         CollaborativeAction('Do a', (3., 2., 5.), human_probability=.3))
     l2 = LeafCombination(
         CollaborativeAction('Do b', (2., 3., 4.), human_probability=.7))
     alt = AlternativeCombination([l1, l2], name='Do all')
     self.n2p = _NodeToPOMDP.from_node(alt, 2., 1.)
Exemplo n.º 2
0
 def setUp(self):
     self.bt = LeafCombination(BringTop())
     self.af = LeafCombination(AssembleFoot('leg-1'))
     self.atj = LeafCombination(AssembleTopJoint('leg-1'))
     self.alt = LeafCombination(AssembleLegToTop('leg-1'))
     self.htm = SequentialCombination([self.bt, self.af])
     self.p = SupportivePOMDP(self.htm)
     self.p.p_changed_by_human = 0.
     self.p.p_change_preference = 0.
Exemplo n.º 3
0
 def setUp(self):
     a = CollaborativeAction('Do a', (3., 2., 5.))
     b = CollaborativeAction('Do b', (2., 3., 4.))
     c = CollaborativeAction('Do c', (2., 3., 4.))
     d = CollaborativeAction('Do d', (3., 2., 5.))
     self.l1 = LeafCombination(a)
     self.l2 = LeafCombination(b)
     self.l3 = LeafCombination(c)
     self.l4 = LeafCombination(d)
Exemplo n.º 4
0
 def test_is_correct(self):
     a = LeafCombination(AbstractAction('a'))
     b = LeafCombination(AbstractAction('b'))
     c = LeafCombination(AbstractAction('c'))
     p = ParallelCombination([a, b, c])
     alt = p.to_alternative()
     self.assertIsInstance(alt, AlternativeCombination)
     self.assertEqual(len(alt.children), 6)
     self.assertIsInstance(alt.children[0], SequentialCombination)
     self.assertTrue(all([len(c.children) == 3 for c in alt.children]))
Exemplo n.º 5
0
 def test_alt_to_pomdp(self):
     # No probability of failure or human saying no here
     task = HierarchicalTask(root=AlternativeCombination([
         LeafCombination(CollaborativeAction('Bottom left', 'A1')),
         LeafCombination(CollaborativeAction('Top left', 'A2')),
     ],
                                                         name='Do all'))
     h2p = HTMToPOMDP(2., 8., 5., ['A1', 'A2'], end_reward=50.)
     p = h2p.task_to_pomdp(task)
     self.assertEqual(p.states,
                      ['before-bottom-left', 'before-top-left', 'end'])
     self.assertEqual(p.actions, ['get-A1', 'get-A2', 'ask-A1', 'ask-A2'])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start, np.array([.5, .5, 0]))
     # checked manually:
     T = np.array([
         # get A1
         [[0., 0., 1.], [0., 1., 0.], [.5, .5, 0.]],
         # get A2
         [[1., 0., 0.], [0., 0., 1.], [.5, .5, 0.]],
         # ask A1
         [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]],
         # ask A2
         [[1., 0., 0.], [0., 1., 0.], [.5, .5, 0.]],
     ])
     np.testing.assert_allclose(T, p.T, atol=1.e-4)
     O = np.array([
         # get A1
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]],
         # get A2
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]],
         # ask A1
         [[0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.]],
         # ask A2
         [[0., 0., 1., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = np.broadcast_to(
         np.array([
             # get A1
             [[-5, -5, -8], [-5] * 3, [50, 50, 50]],
             # get A2
             [[-5.] * 3, [-5, -5, -8], [50, 50, 50]],
             # ask A1
             [[-2.] * 3, [-2.] * 3, [50, 50, 50]],
             # ask A2
             [[-2.] * 3, [-2.] * 3, [50, 50, 50]],
         ])[..., None],
         (4, 3, 3, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 6
0
 def test_end_reward_on_seq(self):
     h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.)
     task = HierarchicalTask(root=SequentialCombination([
         LeafCombination(
             CollaborativeAction('Do a', (3., 2., 5.),
                                 fail_probability=0.,
                                 no_probability=0.)),
         LeafCombination(
             CollaborativeAction('Do b', (2., 3., 4.),
                                 fail_probability=0.,
                                 no_probability=0.)),
     ],
                                                        name='Do all'))
     p = h2p.task_to_pomdp(task)
     self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
Exemplo n.º 7
0
 def test_leaf_to_pomdp(self):
     h2p = HTMToPOMDP(2., 8., 5., ['A1'], end_reward=50.)
     task = HierarchicalTask(
         root=LeafCombination(CollaborativeAction('bottom left', 'A1')))
     p = h2p.task_to_pomdp(task)
     self.assertEqual(p.states, ['before-bottom-left', 'end'])
     self.assertEqual(p.actions, ['get-A1', 'ask-A1'])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start, np.array([1, 0.]))
     # checked manually:
     T = np.array([
         # get
         [[0., 1.], [1., 0.]],
         # ask
         [[1., 0.], [1., 0.]],
     ])
     np.testing.assert_allclose(T, p.T)
     O = np.array([
         # get
         [[0., 0., 0., 1.], [.9, 0., 0., .1]],
         # ask
         [[0., 1., 0., 0.], [0., 0., 1., 0.]],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = np.broadcast_to(
         np.array([
             [[-5., -8.], [50., 50.]],
             [[-2., -2.], [50., 50.]],
         ])[..., None], (2, 2, 2, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 8
0
 def test_with_no_probability(self):
     leaf = LeafCombination(
         CollaborativeAction('Do it', (3., 2., 5.), no_probability=.2))
     l2p = _NodeToPOMDP.from_node(leaf, 2., 1.)
     T = np.zeros((7, 8, 8))
     l2p.update_T(T, 0, 2, 3, [6, 7], [.2, .8], list(range(7)))
     p_ask_int = np.zeros((8))
     p_ask_int[4] = .8
     p_ask_int[5] = .2
     np.testing.assert_allclose(T[3, 3, :], p_ask_int)
Exemplo n.º 9
0
 def test_with_failure_probability(self):
     leaf = LeafCombination(
         CollaborativeAction('Do it', (3., 2., 5.), fail_probability=.2))
     l2p = _NodeToPOMDP.from_node(leaf, 2., 1.)
     T = np.zeros((7, 8, 8))
     l2p.update_T(T, 0, 2, 3, [6, 7], [.2, .8], list(range(7)))
     p_phy_robot = np.zeros((8))
     p_phy_robot[5] = .2
     p_phy_robot[6] = .8 * .2
     p_phy_robot[7] = .8 * .8
     np.testing.assert_allclose(T[2, 5, :], p_phy_robot)
Exemplo n.º 10
0
 def sequential_combination_from_order(self, order, name=None):
     children = [LeafCombination(CollaborativeAction(
         str(i) + ('-' + name if name is not None else ''),
         str(i))) for i in order]
     return SequentialCombination(children, name=name)
Exemplo n.º 11
0
        rospy.loginfo("Got human message: '%s'", ans)
        ans = ans.lower()
        if 'yes' in ans:
            return self.model.observations[self.model.O_YES]
        if 'no' in ans or "don't" in ans:
            return self.model.observations[self.model.O_NO]
        else:
            self.say("I didn't get what you meant.", sync=False)
            return self.model.observations[self.model.O_NONE]


# Problem definition
leg_i = 'leg-{}'.format
htm = SequentialCombination([
    SequentialCombination([
        LeafCombination(AssembleLeg(leg_i(i))),
        LeafCombination(AssembleLegToTop(leg_i(i), bring_top=(i == 0)))
    ]) for i in range(4)
])

p = SupportivePOMDP(htm)
# TODO put as default
p.r_subtask = 0.
p.r_preference = 20.
p.cost_hold = 3.
p.cost_get = 20.
try:
    pol = AsyncPOMCPPolicyRunner(p,
                                 iterations=ITERATIONS,
                                 horizon=NHTMHorizon.generator(p, n=HORIZON),
                                 exploration=EXPLORATION,
Exemplo n.º 12
0
C_INTR = 1.
C_ERR = 5.
INF = 100.

## Tested scenarios:
# 1. with full sequence of sequential actions
R_END = 0.1
LOOP = False
# 2. with full sequence of sequential actions
# R_END = 100
# LOOP = True
R_SUBTASK = None

## Define the task
mount_central = SequentialCombination([
    LeafCombination(CollaborativeAction('Get central frame', (INF, 20., 30.))),
    LeafCombination(
        CollaborativeAction('Start Hold central frame', (3., 10., INF)))
],
                                      name='Mount central frame')
#mount_legs = ParallelCombination([
mount_legs = SequentialCombination([
    SequentialCombination([
        LeafCombination(CollaborativeAction('Get left leg', (INF, 20., 30.))),
        LeafCombination(
            CollaborativeAction('Snap left leg', (5., INF, INF),
                                fail_probability=.1)),
    ],
                          name='Mount left leg'),
    SequentialCombination([
        LeafCombination(CollaborativeAction('Get right leg', (INF, 20., 30.))),
Exemplo n.º 13
0
 def setUp(self):
     self.bt = LeafCombination(BringTop())
     self.af = LeafCombination(AssembleFoot('leg-1'))
     self.htm = SequentialCombination([self.bt, self.af])
     self.model = SupportivePOMDP(self.htm)
     self.h = NHTMHorizon(self.model, 1)
Exemplo n.º 14
0
    def _task_def(self):
        gp_l1 = LeafCombination(
            PredAction('gatherparts_leg_1', self.NUM_FEATS,
                       self.OBS_PROBS['gp_l1_probs']))
        ass_l1 = LeafCombination(
            PredAction('assemble_leg_1', self.NUM_FEATS,
                       self.OBS_PROBS['ass_l1_probs']))
        gp_l2 = LeafCombination(
            PredAction('gatherparts_leg_2', self.NUM_FEATS,
                       self.OBS_PROBS['gp_l2_probs']))
        ass_l2 = LeafCombination(
            PredAction('assemble_leg_2', self.NUM_FEATS,
                       self.OBS_PROBS['ass_l2_probs']))
        gp_l3 = LeafCombination(
            PredAction('gatherparts_leg_3', self.NUM_FEATS,
                       self.OBS_PROBS['gp_l3_probs']))
        ass_l3 = LeafCombination(
            PredAction('assemble_leg_3', self.NUM_FEATS,
                       self.OBS_PROBS['ass_l3_probs']))
        gp_l4 = LeafCombination(
            PredAction('gatherparts_leg_4', self.NUM_FEATS,
                       self.OBS_PROBS['gp_l4_probs']))
        ass_l4 = LeafCombination(
            PredAction('assemble_leg_4', self.NUM_FEATS,
                       self.OBS_PROBS['ass_l4_probs']))
        gp_s = LeafCombination(
            PredAction('gatherparts_seat', self.NUM_FEATS,
                       self.OBS_PROBS['gp_s_probs']))
        ass_s = LeafCombination(
            PredAction('assemble_seat', self.NUM_FEATS,
                       self.OBS_PROBS['ass_s_probs']))
        gp_b = LeafCombination(
            PredAction('gatherparts_back', self.NUM_FEATS,
                       self.OBS_PROBS['gp_b_probs']))
        ass_b = LeafCombination(
            PredAction('assemble_back', self.NUM_FEATS,
                       self.OBS_PROBS['ass_b_probs']))
        f_l1 = SequentialCombination([gp_l1, ass_l1], name='finish_leg1')
        f_l2 = SequentialCombination([gp_l2, ass_l2], name='finish_leg2')
        f_l3 = SequentialCombination([gp_l3, ass_l3], name='finish_leg3')
        f_l4 = SequentialCombination([gp_l4, ass_l4], name='finish_leg4')
        f_s = SequentialCombination([gp_s, ass_s], name='finish_seat')
        f_b = SequentialCombination([gp_b, ass_b], name='finish_back')
        f_legs = ParallelCombination([f_l1, f_l2, f_l3, f_l4],
                                     name='finish_legs')
        f_rest = ParallelCombination([f_b, f_s], name='finish_rest')

        main_task = HierarchicalTaskHMMSuppRD(root=SequentialCombination(
            [f_legs, f_rest], name='complete'),
                                              name='TaskA',
                                              num_feats_action=self.NUM_FEATS,
                                              feats=self.FEAT,
                                              supp_bhvs=self.SUPP_BHVS,
                                              obj_presence=self.OBJ_PRESENCE,
                                              obj_count_idx=self.OBJ_COUNT_IDX,
                                              main_obj=self.MAIN_OBJ)

        tf_task = HierarchicalTaskHMMSuppRD(root=SequentialCombination(
            [f_rest, f_legs], name='complete'),
                                            name='TaskB',
                                            num_feats_action=self.NUM_FEATS,
                                            feats=self.FEAT,
                                            supp_bhvs=self.SUPP_BHVS,
                                            obj_presence=self.OBJ_PRESENCE,
                                            obj_count_idx=self.OBJ_COUNT_IDX,
                                            main_obj=self.MAIN_OBJ)

        self.main_task = main_task
        self.tf_task = tf_task
Exemplo n.º 15
0
 def setUp(self):
     leaf = LeafCombination(
         CollaborativeAction('Do it', (3., 2., 5.), human_probability=.3))
     self.l2p = _NodeToPOMDP.from_node(leaf, 2., 1.)
Exemplo n.º 16
0
import json

from task_models.task import (HierarchicalTask, AbstractAction,
                              SequentialCombination, ParallelCombination,
                              LeafCombination)

take_base = LeafCombination(AbstractAction('Take base'))
mount_leg_combinations = [
    SequentialCombination([
        LeafCombination(AbstractAction('Take leg {}'.format(i))),
        LeafCombination(AbstractAction('Attach leg {}'.format(i)))
    ],
                          name='Mount leg {}'.format(i)) for i in range(4)
]
mount_frame = SequentialCombination([
    LeafCombination(AbstractAction('Take frame'), highlighted=True),
    LeafCombination(AbstractAction('Attach frame'))
],
                                    name='Mount frame')

chair_task = HierarchicalTask(root=SequentialCombination([
    take_base,
    ParallelCombination(mount_leg_combinations, name='Mount legs'),
    mount_frame,
],
                                                         name='Mount chair'))

print(json.dumps(chair_task.as_dictionary(), indent=2))
Exemplo n.º 17
0
 def test_end_reward(self):
     h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.)
     task = HierarchicalTask(
         root=LeafCombination(CollaborativeAction('Do it', (3., 2., 5.))))
     p = h2p.task_to_pomdp(task)
     self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
Exemplo n.º 18
0
 def test_alt_to_pomdp(self):
     # No probability of failure or human saying no here
     task = HierarchicalTask(root=AlternativeCombination([
         LeafCombination(
             CollaborativeAction('Do a', (3., 2., 5.),
                                 fail_probability=0.,
                                 no_probability=0.)),
         LeafCombination(
             CollaborativeAction('Do b', (2., 3., 4.),
                                 fail_probability=0.,
                                 no_probability=0.)),
     ],
                                                         name='Do either'))
     p = self.h2p.task_to_pomdp(task)
     self.assertEqual(p.states, [
         'init-do-a', 'H-do-a', 'R-do-a', 'init-do-b', 'H-do-b', 'R-do-b',
         'end'
     ])
     self.assertEqual(p.actions, [
         'wait', 'phy-do-a', 'com-ask-intention-do-a',
         'com-tell-intention-do-a', 'com-ask-finished-do-a', 'phy-do-b',
         'com-ask-intention-do-b', 'com-tell-intention-do-b',
         'com-ask-finished-do-b'
     ])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start,
                                   np.array([.5, 0, 0, .5, 0, 0, 0]))
     # checked manually:
     T = np.array([
         # Wait
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.71653, 0., 0., 0., 0., 0.28347],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.60653, 0., 0.39347],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Physical a
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.18888, 0., 0., 0., 0., 0.81112],
          [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.08208, 0., 0.91792],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask intention a
         [[0., 1., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Tell intention a
         [[0., 0., 1., 0., 0., 0., 0.],
          [0., 0.71653, 0., 0., 0., 0., 0.28347],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.60653, 0., 0.39347],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask finished a
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Physical b
         [[1., 0., 0., 0., 0., 0.,
           0.], [0., 0.2636, 0., 0., 0., 0., 0.7364],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.13534, 0., 0.86466],
          [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask intention b
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Tell intention b
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.71653, 0., 0., 0., 0., 0.28347],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0.],
          [0., 0., 0., 0., 0.60653, 0., 0.39347],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
         # Ask finished b
         [[1., 0., 0., 0., 0., 0., 0.],
          [0., 0.51342, 0., 0., 0., 0., 0.48658],
          [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.],
          [0., 0., 0., 0., 0.36788, 0., 0.63212],
          [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]],
     ])
     np.testing.assert_allclose(T, p.T, atol=1.e-4)
     O = np.array([
         # Wait
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Physical a
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
         # Ask intention a
         [[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Tell intention a
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Ask finished a
         [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 1., 0., 0.]],
         # Physical b
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
         # Ask intention
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.],
          [1., 0., 0., 0.]],
         # Tell intention
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Ask finished b
         [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.],
          [0., 1., 0., 0.]],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = -np.broadcast_to(
         np.array([[1] * 6 + [0], [6, 6, 3, 6, 6, 6, 1],
                   [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1],
                   [5, 5, 5, 5, 5, 4, 1], [3] * 6 + [1], [2] * 6 + [1],
                   [3] * 6 + [1]])[:, :, None, None], (9, 7, 7, 4))
     np.testing.assert_array_equal(R, p.R)
Exemplo n.º 19
0
 def test_leaf_to_pomdp(self):
     # No probability of failure or human saying no here
     task = HierarchicalTask(root=LeafCombination(
         CollaborativeAction(
             'Do it', (3., 2.,
                       5.), fail_probability=0., no_probability=0.)))
     p = self.h2p.task_to_pomdp(task)
     self.assertEqual(p.states, ['init-do-it', 'H-do-it', 'R-do-it', 'end'])
     self.assertEqual(p.actions, [
         'wait', 'phy-do-it', 'com-ask-intention-do-it',
         'com-tell-intention-do-it', 'com-ask-finished-do-it'
     ])
     self.assertEqual(p.observations, ['none', 'yes', 'no', 'error'])
     np.testing.assert_array_equal(p.start, np.array([1, 0., 0., 0.]))
     # checked manually:
     T = np.array([
         [[1., 0., 0., 0.], [0., 0.71653131, 0., 0.28346869],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
         [[1., 0., 0., 0.], [0., 0.1888756, 0., 0.8111244],
          [0., 0., 0., 1.], [0., 0., 0., 1.]],
         [[0., 1., 0., 0.], [0., 0.51341712, 0., 0.48658288],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
         [[0., 0., 1., 0.], [0., 0.71653131, 0., 0.28346869],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
         [[1., 0., 0., 0.], [0., 0.51341712, 0., 0.48658288],
          [0., 0., 1., 0.], [0., 0., 0., 1.]],
     ])
     np.testing.assert_allclose(T, p.T)
     O = np.array([
         # Wait
         [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.],
          [1., 0., 0., 0.]],
         # Act
         [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.],
          [1., 0., 0., 0.]],
         # Ask intention
         [
             [1., 0., 0., 0.],  # not possible in T
             [0., 1., 0., 0.],
             [0., 0., 1., 0.],  # robot has told its int. to act
             [1., 0., 0., 0.]
         ],  # human acts while robot ask again
         #                    # TODO: change to Yes?
         # Tell intention
         [
             [1., 0., 0., 0.],  # not possible in T
             [1., 0., 0., 0.],  # TODO: No?
             [1., 0., 0., 0.],  # TODO: maybe H answers to R tell
             [1., 0., 0., 0.]
         ],  # TODO: answer?
         # Ask finished
         [
             [0., 0., 1., 0.],  # not started
             [0., 0., 1., 0.],
             [0., 0., 1., 0.],
             [0., 1., 0., 0.]
         ],
     ])
     np.testing.assert_array_equal(O, p.O)
     R = -np.broadcast_to(
         np.array([
             [1, 1, 1, 0],
             [6, 6, 3, 1],
             [3, 3, 3, 1],
             [2, 2, 2, 1],
             [3, 3, 3, 1],
         ])[:, :, None, None], (5, 4, 4, 4))
     np.testing.assert_array_equal(R, p.R)