def main(): ap_map = {'a': (2, 2), 'b': (6, 3), 'c': (5, 3), 'd': (4, 2)} print('Automic propositions, ', ap_map) ltlformula = 'F (b & Fa)' print('LTL Formula, ', ltlformula) # Setup MDP, Agents. print('translatinggg') a = spot.translate('(a U b) & GFc & GFd', 'BA', 'complete') a.show("v" "") return mdp = LTLGridWorldMDP(ltltask=ltlformula, ap_map=ap_map, width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2) mdp.automata.subproblem_flag = 0 mdp.automata.subproblem_stay = 1 mdp.automata.subproblem_goal = 0 value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() # Value Iteration. print('Value iteration') action_seq, state_seq = value_iter.plan(mdp.get_init_state()) print("Plan for", mdp) for i in range(len(action_seq)): print("\t", action_seq[i], state_seq[i])
def get_l1_policy(start_room=None, goal_room=None, mdp=None, starting_items=None, goal_items=None, actions=None, doors=None, rooms=None): if mdp is None: mdp = FourRoomL1MDP(start_room, goal_room, starting_items=starting_items, goal_items=goal_items, actions=actions, doors=doors, rooms=rooms) vi = ValueIteration(mdp) vi.run_vi() policy = defaultdict() action_seq, state_seq = vi.plan(mdp.init_state) print 'Plan for {}:'.format(mdp) for i in range(len(action_seq)): print "\tpi[{}] -> {}".format(state_seq[i], action_seq[i]) policy[state_seq[i]] = action_seq[i] return policy
def main(): ap_map = {'a': (2, 2), 'b': (6, 3), 'c': (5, 3), 'd': (4, 2)} ltlformula = 'F (b & Fa)' # Setup MDP, Agents. mdp = LTLGridWorldMDP(ltltask=ltlformula, ap_map=ap_map, width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2) mdp.automata.subproblem_flag = 0 mdp.automata.subproblem_stay = 1 mdp.automata.subproblem_goal = 0 value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() # Value Iteration. action_seq, state_seq = value_iter.plan(mdp.get_init_state()) print("Plan for", mdp) for i in range(len(action_seq)): print("\t", action_seq[i], state_seq[i]) print(ltlformula) f = open('/Users/romapatel/Desktop/actions.tsv', 'w+') for item in state_seq: f.write(str(item) + '\n') f.close() model = None ltl_visualiser(model)
def main(): # Setup MDP, Agents. mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2) value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() # Value Iteration. action_seq, state_seq = value_iter.plan(mdp.get_init_state()) print("Plan for", mdp) for i in range(len(action_seq)): print("\t", action_seq[i], state_seq[i])
def get_l1_policy(domain): vi = ValueIteration(domain, sample_rate=1) vi.run_vi() policy = defaultdict() action_seq, state_seq = vi.plan(domain.init_state) print('Plan for {}:'.format(domain)) for i in range(len(action_seq)): print("\tpi[{}] -> {}\n".format(state_seq[i], action_seq[i])) policy[state_seq[i]] = action_seq[i] return policy
def get_l1_policy(start_room=None, goal_room=None, mdp=None): if mdp is None: mdp = CubeL1MDP(start_room, goal_room) vi = ValueIteration(mdp) vi.run_vi() policy = defaultdict() action_seq, state_seq = vi.plan(mdp.init_state) print('Plan for {}:'.format(mdp)) for i in range(len(action_seq)): print("\tpi[{}] -> {}".format(state_seq[i], action_seq[i])) policy[state_seq[i]] = action_seq[i] return policy
def main(): # Setup MDP, Agents. mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2) value_iter = ValueIteration(mdp, sample_rate=5) mcts = MCTS(mdp, num_rollouts_per_step=50) # _, val = value_iter.run_vi() # Value Iteration. vi_action_seq, vi_state_seq = value_iter.plan(mdp.get_init_state()) mcts_action_seq, mcts_state_seq = mcts.plan(mdp.get_init_state()) print("Plan for", mdp) for i in range(len(mcts_action_seq)): print("\t", mcts_action_seq[i], mcts_state_seq[i])
def get_l1_policy(oomdp=None): if oomdp is None: oomdp = TaxiL1OOMDP() vi = ValueIteration(oomdp, sample_rate=1) vi.run_vi() policy = defaultdict() action_seq, state_seq = vi.plan(oomdp.init_state) print('Plan for {}:'.format(oomdp)) for i in range(len(action_seq)): print("\tpi[{}] -> {}\n".format(state_seq[i], action_seq[i])) policy[state_seq[i]] = action_seq[i] return policy
def main(): ap_map = {'a': (2,2),'b': (6,3), 'c': (5,3), 'd': (4,2)} ltlformula = 'F (b & Fa)' # Setup MDP, Agents. mdp = LTLGridWorldMDP(ltltask=ltlformula, ap_map=ap_map, width=6, height=6, goal_locs=[(6, 6)], slip_prob=0.2) mdp.automata.subproblem_flag = 0 mdp.automata.subproblem_stay = 1 mdp.automata.subproblem_goal = 0 value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() # Value Iteration. action_seq, state_seq = value_iter.plan(mdp.get_init_state()) print("Plan for", mdp) for i in range(len(action_seq)): print("\t", action_seq[i], state_seq[i])
def get_l1_policy(start_room=None, goal_room=None, mdp=None): if mdp is None: mdp = FourRoomL1MDP(start_room, goal_room, starting_items=[2, 0], goal_items=[2, 1]) #room 2, light off =0, light on =1 vi = ValueIteration(mdp) vi.run_vi() policy = defaultdict() action_seq, state_seq = vi.plan(mdp.init_state) print 'Plan for {}:'.format(mdp) for i in range(len(action_seq)): print "\tpi[{}] -> {}".format(state_seq[i], action_seq[i]) policy[state_seq[i]] = action_seq[i] return policy
def run_plain_pMDP(init_loc, ltl_formula, cube_env, ap_maps, verbose=False): start_time = time.time() mdp = RoomCubePlainMDP(init_loc=init_loc, ltl_formula=ltl_formula, env_file=[cube_env], ap_maps=ap_maps) value_iter = ValueIteration(mdp, sample_rate=1, max_iterations=50) value_iter.run_vi() # Value Iteration action_seq, state_seq = value_iter.plan(mdp.get_init_state()) computing_time = time.time() - start_time # Print if verbose: print("=====================================================") print("Plain: Plan for ", ltl_formula) for i in range(len(action_seq)): room_number, floor_number = mdp._get_abstract_number(state_seq[i]) print("\t {} in room {} on the floor {}, {}".format( state_seq[i], room_number, floor_number, action_seq[i])) room_number, floor_number = mdp._get_abstract_number(state_seq[-1]) print("\t {} in room {} on the floor {}".format( state_seq[-1], room_number, floor_number)) # success? if len(state_seq) <= 1: flag_success = -1 else: if mdp.automata.aut_spot.state_is_accepting(state_seq[-1].q): flag_success = 1 else: flag_success = 0 return computing_time, len( action_seq ), flag_success, state_seq, action_seq, value_iter.get_num_backups_in_recent_run( )
#!/usr/bin/env python ''' NOTE: Incomplete. Planning infrastructure in development. ''' # Other imports. import srl_example_setup from simple_rl.tasks import GridWorldMDP from simple_rl.planning import ValueIteration, MCTS # Setup MDP, Agents. mdp = GridWorldMDP(width=6, height=6, goal_locs=[(6, 6)]) vi = ValueIteration(mdp) vi.run_vi() action_seq, state_seq = vi.plan(mdp.get_init_state()) for i in range(len(action_seq)): print action_seq[i], state_seq[i]
# ltl_formula = 'F (a&b)' ap_maps = {'a': [1, 'state', 7], 'b': [2, 'state', 3]} #'c': [1, 'state', 3], 'd': [0, 'state', (6, 1, 1)], # 'e': [2, 'state', 1], # 'f': [2, 'state', 3], 'g': [0, 'state', (3, 4, 1)]} start_time = time.time() mdp = RoomCubePlainMDP(ltl_formula=ltl_formula, env_file=[cube_env1], ap_maps=ap_maps) value_iter = ValueIteration(mdp, sample_rate=1) value_iter.run_vi() # Value Iteration action_seq, state_seq = value_iter.plan(mdp.get_init_state()) computing_time = time.time() - start_time # print print("Plan for", mdp) for i in range(len(action_seq)): room_number, floor_number = mdp._get_abstract_number(state_seq[i]) print("\t {} in room {} on the floor {}, {}".format( state_seq[i], room_number, floor_number, action_seq[i])) room_number, floor_number = mdp._get_abstract_number(state_seq[-1]) print("\t {} in room {} on the floor {}".format(state_seq[-1], room_number, floor_number)) print("Summary")