def setUp(self): l1 = LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), human_probability=.3)) l2 = LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), human_probability=.7)) seq = SequentialCombination([l1, l2], name='Do all') self.n2p = _NodeToPOMDP.from_node(seq, 2., 1.)
def setUp(self): self.bt = LeafCombination(BringTop()) self.af = LeafCombination(AssembleFoot('leg-1')) self.atj = LeafCombination(AssembleTopJoint('leg-1')) self.alt = LeafCombination(AssembleLegToTop('leg-1')) self.htm = SequentialCombination([self.bt, self.af]) self.p = SupportivePOMDP(self.htm) self.p.p_changed_by_human = 0. self.p.p_change_preference = 0.
def test_mixed(self): res = _HTMToDAG( SequentialCombination([ self.l1, AlternativeCombination([self.l2, self.l3], name='Do b or c'), self.l4, ], name='Do a b|c d')) self.assertEqual(res.nodes, [self.l1, self.l2, self.l3, self.l4]) self.assertEqual(res.succs, [[1, 2], [3], [3], []]) self.assertEqual(res.init, [0])
def test_sample_transition_hold(self): htm = SequentialCombination([self.alt, self.af]) p = SupportivePOMDP(htm) p.p_fail = 0. _s = p._int_to_state() _s.set_object(p.objects.index('screws'), 1) _s.set_object(p.objects.index('screwdriver'), 1) # Preference for holding _s.set_preference(0, 1) # - Wait s, o, r = p.sample_transition(p.A_WAIT, _s.to_int(), random=False) self.assertEqual(p._int_to_state(s).htm, 1) self.assertEqual(o, p.O_NONE) self.assertEqual(r, 10.) # - Hold s, o, r = p.sample_transition(p.A_HOLD_V, _s.to_int(), random=False) self.assertEqual(p._int_to_state(s).htm, 1) self.assertEqual(o, p.O_NONE) self.assertEqual(r, 10. - p.cost_hold + 10.) # - Wrong Hold s, o, r = p.sample_transition(p.A_HOLD_H, _s.to_int(), random=False) self.assertEqual(p._int_to_state(s).htm, 0) self.assertEqual(o, p.O_FAIL) self.assertEqual(r, -p.cost_hold) # No preference for holding _s.set_preference(0, 0) # - Wait s, o, r = p.sample_transition(p.A_WAIT, _s.to_int(), random=False) self.assertEqual(p._int_to_state(s).htm, 1) self.assertEqual(o, p.O_NONE) self.assertEqual(r, 10.) # - Hold s, o, r = p.sample_transition(p.A_HOLD_V, _s.to_int(), random=False) self.assertEqual(p._int_to_state(s).htm, 0) self.assertEqual(o, p.O_FAIL) self.assertEqual(r, -p.cost_hold) # Not required in task (Bring-top is first node in self.p) _s = self.p._int_to_state() _s.set_object(self.p.objects.index('top'), 1) _s.set_preference(0, 1) s, o, r = self.p.sample_transition(self.p.A_HOLD_V, _s.to_int(), random=False) self.assertEqual(r, -p.cost_hold) # Does not apply on final node _s = self.p._int_to_state() _s.htm = self.p.htm_final _s.set_preference(0, 1) s, o, r = self.p.sample_transition(self.p.A_HOLD_V, _s.to_int(), random=False) self.assertEqual(r, -p.cost_hold)
def test_seq_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=SequentialCombination([ LeafCombination(CollaborativeAction('Bottom left', 'A1')), LeafCombination(CollaborativeAction('Top left', 'A2')), ], name='Do all')) h2p = HTMToPOMDP(2., 8., 5., ['A2', 'A1'], end_reward=50.) p = h2p.task_to_pomdp(task) self.assertEqual(p.states, ['before-bottom-left', 'before-top-left', 'end']) self.assertEqual(p.actions, ['get-A2', 'get-A1', 'ask-A2', 'ask-A1']) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([1, 0, 0])) # checked manually: T = np.array([ # get A2 [[1., 0., 0.], [0., 0., 1.], [1., 0., 0.]], # get A1 [[0., 1., 0.], [0., 1., 0.], [1., 0., 0.]], # ask A2 [[1., 0., 0.], [0., 1., 0.], [1., 0., 0.]], # ask A1 [[1., 0., 0.], [0., 1., 0.], [1., 0., 0.]], ]) np.testing.assert_allclose(T, p.T, atol=1.e-4) O = np.array([ # get A2 [[0., 0., 0., 1.], [0., 0., 0., 1.], [.9, 0., 0., .1]], # get A1 [[0., 0., 0., 1.], [.9, 0., 0., .1], [0., 0., 0., 1.]], # ask A2 [[0., 0., 1., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.]], # ask A1 [[0., 1., 0., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = np.broadcast_to( np.array([ # get A2 [[-5.] * 3, [-5, -5, -8], [50, 50, 50]], # get A1 [[-5, -8, -5], [-5] * 3, [50, 50, 50]], # ask A2 [[-2.] * 3, [-2.] * 3, [50, 50, 50]], # ask A1 [[-2.] * 3, [-2.] * 3, [50, 50, 50]], ])[..., None], (4, 3, 3, 4)) np.testing.assert_array_equal(R, p.R)
def test_end_reward_on_seq(self): h2p = HTMToPOMDP(1., 2., 1., 1., end_reward=13.) task = HierarchicalTask(root=SequentialCombination([ LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), fail_probability=0., no_probability=0.)), LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), fail_probability=0., no_probability=0.)), ], name='Do all')) p = h2p.task_to_pomdp(task) self.assertTrue((p.R[h2p.wait, h2p.end, h2p.end, :] == 13.).all())
rospy.loginfo("Got human message: '%s'", ans) ans = ans.lower() if 'yes' in ans: return self.model.observations[self.model.O_YES] if 'no' in ans or "don't" in ans: return self.model.observations[self.model.O_NO] else: self.say("I didn't get what you meant.", sync=False) return self.model.observations[self.model.O_NONE] # Problem definition leg_i = 'leg-{}'.format htm = SequentialCombination([ SequentialCombination([ LeafCombination(AssembleLeg(leg_i(i))), LeafCombination(AssembleLegToTop(leg_i(i), bring_top=(i == 0))) ]) for i in range(4) ]) p = SupportivePOMDP(htm) # TODO put as default p.r_subtask = 0. p.r_preference = 20. p.cost_hold = 3. p.cost_get = 20. try: pol = AsyncPOMCPPolicyRunner(p, iterations=ITERATIONS, horizon=NHTMHorizon.generator(p, n=HORIZON), exploration=EXPLORATION, relative_exploration=RELATIVE_EXPLO,
def setUp(self): self.bt = LeafCombination(BringTop()) self.af = LeafCombination(AssembleFoot('leg-1')) self.htm = SequentialCombination([self.bt, self.af]) self.model = SupportivePOMDP(self.htm) self.h = NHTMHorizon(self.model, 1)
def test_on_sequence(self): res = _HTMToDAG( SequentialCombination([self.l1, self.l2, self.l3], name='Do all')) self.assertEqual(res.nodes, [self.l1, self.l2, self.l3]) self.assertEqual(res.succs, [[1], [2], []]) self.assertEqual(res.init, [0])
def test_seq_to_pomdp(self): # No probability of failure or human saying no here task = HierarchicalTask(root=SequentialCombination([ LeafCombination( CollaborativeAction('Do a', (3., 2., 5.), fail_probability=0., no_probability=0.)), LeafCombination( CollaborativeAction('Do b', (2., 3., 4.), fail_probability=0., no_probability=0.)), ], name='Do all')) p = self.h2p.task_to_pomdp(task) self.assertEqual(p.states, [ 'init-do-a', 'H-do-a', 'R-do-a', 'init-do-b', 'H-do-b', 'R-do-b', 'end' ]) self.assertEqual(p.actions, [ 'wait', 'phy-do-a', 'com-ask-intention-do-a', 'com-tell-intention-do-a', 'com-ask-finished-do-a', 'phy-do-b', 'com-ask-intention-do-b', 'com-tell-intention-do-b', 'com-ask-finished-do-b' ]) self.assertEqual(p.observations, ['none', 'yes', 'no', 'error']) np.testing.assert_array_equal(p.start, np.array([1, 0, 0, 0, 0, 0, 0])) # checked manually: T = np.array([ # Wait [[1., 0., 0., 0., 0., 0., 0.], [0., 0.71653, 0., 0.28347, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Physical a [[1., 0., 0., 0., 0., 0., 0.], [0., 0.18888, 0., 0.81112, 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.08208, 0., 0.91792], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask intention a [[0., 1., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0.48658, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Tell intention a [[0., 0., 1., 0., 0., 0., 0.], [0., 0.71653, 0., 0.28347, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask finished a [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0.48658, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Physical b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.2636, 0., 0.7364, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.13534, 0., 0.86466], [0., 0., 0., 0., 0., 0., 1.], [0., 0., 0., 0., 0., 0., 1.]], # Ask intention b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0.48658, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 1., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Tell intention b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.71653, 0., 0.28347, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0.60653, 0., 0.39347], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], # Ask finished b [[1., 0., 0., 0., 0., 0., 0.], [0., 0.51342, 0., 0.48658, 0., 0., 0.], [0., 0., 1., 0., 0., 0., 0.], [0., 0., 0., 1., 0., 0., 0.], [0., 0., 0., 0., 0.36788, 0., 0.63212], [0., 0., 0., 0., 0., 1., 0.], [0., 0., 0., 0., 0., 0., 1.]], ]) np.testing.assert_allclose(T, p.T, atol=1.e-4) O = np.array([ # Wait [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Physical a [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.]], # Ask intention a [[1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Tell intention a [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Ask finished [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.], [0., 1., 0., 0.], [0., 1., 0., 0.], [0., 1., 0., 0.]], # Physical b [[0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [0., 0., 0., 1.], [1., 0., 0., 0.]], # Ask intention [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [0., 1., 0., 0.], [0., 0., 1., 0.], [1., 0., 0., 0.]], # Tell intention [[1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.], [1., 0., 0., 0.]], # Ask finished b [[0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 0., 1., 0.], [0., 1., 0., 0.]], ]) np.testing.assert_array_equal(O, p.O) R = -np.broadcast_to( np.array([[1] * 6 + [0], [6, 6, 3, 6, 6, 6, 1], [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1], [5, 5, 5, 5, 5, 4, 1], [3] * 6 + [1], [2] * 6 + [1], [3] * 6 + [1]])[:, :, None, None], (9, 7, 7, 4)) np.testing.assert_array_equal(R, p.R)
def _task_def(self): gp_l1 = LeafCombination( PredAction('gatherparts_leg_1', self.NUM_FEATS, self.OBS_PROBS['gp_l1_probs'])) ass_l1 = LeafCombination( PredAction('assemble_leg_1', self.NUM_FEATS, self.OBS_PROBS['ass_l1_probs'])) gp_l2 = LeafCombination( PredAction('gatherparts_leg_2', self.NUM_FEATS, self.OBS_PROBS['gp_l2_probs'])) ass_l2 = LeafCombination( PredAction('assemble_leg_2', self.NUM_FEATS, self.OBS_PROBS['ass_l2_probs'])) gp_l3 = LeafCombination( PredAction('gatherparts_leg_3', self.NUM_FEATS, self.OBS_PROBS['gp_l3_probs'])) ass_l3 = LeafCombination( PredAction('assemble_leg_3', self.NUM_FEATS, self.OBS_PROBS['ass_l3_probs'])) gp_l4 = LeafCombination( PredAction('gatherparts_leg_4', self.NUM_FEATS, self.OBS_PROBS['gp_l4_probs'])) ass_l4 = LeafCombination( PredAction('assemble_leg_4', self.NUM_FEATS, self.OBS_PROBS['ass_l4_probs'])) gp_s = LeafCombination( PredAction('gatherparts_seat', self.NUM_FEATS, self.OBS_PROBS['gp_s_probs'])) ass_s = LeafCombination( PredAction('assemble_seat', self.NUM_FEATS, self.OBS_PROBS['ass_s_probs'])) gp_b = LeafCombination( PredAction('gatherparts_back', self.NUM_FEATS, self.OBS_PROBS['gp_b_probs'])) ass_b = LeafCombination( PredAction('assemble_back', self.NUM_FEATS, self.OBS_PROBS['ass_b_probs'])) f_l1 = SequentialCombination([gp_l1, ass_l1], name='finish_leg1') f_l2 = SequentialCombination([gp_l2, ass_l2], name='finish_leg2') f_l3 = SequentialCombination([gp_l3, ass_l3], name='finish_leg3') f_l4 = SequentialCombination([gp_l4, ass_l4], name='finish_leg4') f_s = SequentialCombination([gp_s, ass_s], name='finish_seat') f_b = SequentialCombination([gp_b, ass_b], name='finish_back') f_legs = ParallelCombination([f_l1, f_l2, f_l3, f_l4], name='finish_legs') f_rest = ParallelCombination([f_b, f_s], name='finish_rest') main_task = HierarchicalTaskHMMSuppRD(root=SequentialCombination( [f_legs, f_rest], name='complete'), name='TaskA', num_feats_action=self.NUM_FEATS, feats=self.FEAT, supp_bhvs=self.SUPP_BHVS, obj_presence=self.OBJ_PRESENCE, obj_count_idx=self.OBJ_COUNT_IDX, main_obj=self.MAIN_OBJ) tf_task = HierarchicalTaskHMMSuppRD(root=SequentialCombination( [f_rest, f_legs], name='complete'), name='TaskB', num_feats_action=self.NUM_FEATS, feats=self.FEAT, supp_bhvs=self.SUPP_BHVS, obj_presence=self.OBJ_PRESENCE, obj_count_idx=self.OBJ_COUNT_IDX, main_obj=self.MAIN_OBJ) self.main_task = main_task self.tf_task = tf_task
C_ERR = 5. INF = 100. ## Tested scenarios: # 1. with full sequence of sequential actions R_END = 0.1 LOOP = False # 2. with full sequence of sequential actions # R_END = 100 # LOOP = True R_SUBTASK = None ## Define the task mount_central = SequentialCombination([ LeafCombination(CollaborativeAction('Get central frame', (INF, 20., 30.))), LeafCombination( CollaborativeAction('Start Hold central frame', (3., 10., INF))) ], name='Mount central frame') #mount_legs = ParallelCombination([ mount_legs = SequentialCombination([ SequentialCombination([ LeafCombination(CollaborativeAction('Get left leg', (INF, 20., 30.))), LeafCombination( CollaborativeAction('Snap left leg', (5., INF, INF), fail_probability=.1)), ], name='Mount left leg'), SequentialCombination([ LeafCombination(CollaborativeAction('Get right leg', (INF, 20., 30.))), LeafCombination( CollaborativeAction('Snap right leg', (5., INF, INF),
import json from task_models.task import (HierarchicalTask, AbstractAction, SequentialCombination, ParallelCombination, LeafCombination) take_base = LeafCombination(AbstractAction('Take base')) mount_leg_combinations = [ SequentialCombination([ LeafCombination(AbstractAction('Take leg {}'.format(i))), LeafCombination(AbstractAction('Attach leg {}'.format(i))) ], name='Mount leg {}'.format(i)) for i in range(4) ] mount_frame = SequentialCombination([ LeafCombination(AbstractAction('Take frame'), highlighted=True), LeafCombination(AbstractAction('Attach frame')) ], name='Mount frame') chair_task = HierarchicalTask(root=SequentialCombination([ take_base, ParallelCombination(mount_leg_combinations, name='Mount legs'), mount_frame, ], name='Mount chair')) print(json.dumps(chair_task.as_dictionary(), indent=2))
def sequential_combination_from_order(self, order, name=None): children = [LeafCombination(CollaborativeAction( str(i) + ('-' + name if name is not None else ''), str(i))) for i in order] return SequentialCombination(children, name=name)