from src.automata.ldba import LDBA # an example automaton for "iron then wood then work_bench" or # "F (iron & XF (wood & XF (work_bench)))" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here minecraft_6 = LDBA(accepting_sets=[[3]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'iron' in label: self.automaton_state = 1 else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'wood' in label: self.automaton_state = 2 else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2: if 'work_bench' in label: self.automaton_state = 3 else: self.automaton_state = 2 # state 3 elif self.automaton_state == 3:
from src.automata.ldba import LDBA # an example automaton for "visiting goal1 and goal2 infinitely often" or # "GF goal1 & GF goal2" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here surveillance = LDBA(accepting_sets=[[0]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label: self.automaton_state = 2 else: self.automaton_state = 1 # state 1 elif self.automaton_state == 1: if 'goal1' in label: self.automaton_state = 2 else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2: if 'goal2' in label: self.automaton_state = 0 else: self.automaton_state = 2 # step function returns the new automaton state return self.automaton_state
from src.automata.ldba import LDBA # an example automaton for "goal1 or goal2 while avoiding unsafe" or "(FG goal1 | FG goal2) & G !unsafe" # automaton image is available in "./assets" or "" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here goal1_or_goal2 = LDBA(accepting_sets=[[1, 2]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'epsilon_1' in label: self.automaton_state = 1 elif 'epsilon_2' in label: self.automaton_state = 2 elif 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'goal1' in label and 'unsafe' not in label: self.automaton_state = 1 else: self.automaton_state = -1 # un-accepting sink state # state 2 elif self.automaton_state == 2: if 'goal2' in label and 'unsafe' not in label: self.automaton_state = 2
from src.automata.ldba import LDBA # an example automaton for "wood then iron then work_bench then gold" or # "F (wood & XF (iron & XF (work_bench & XF gold)))" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here minecraft_7 = LDBA(accepting_sets=[[4]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'wood' in label: self.automaton_state = 1 else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'iron' in label: self.automaton_state = 2 else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2: if 'work_bench' in label: self.automaton_state = 3 else: self.automaton_state = 2 # state 3 elif self.automaton_state == 3:
from src.automata.ldba import LDBA # an example automaton for "goal1 while avoiding unsafe" or "F goal1 & G !unsafe" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here mars_rover_1_and_3 = LDBA(accepting_sets=[[1]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label and 'unsafe' not in label: self.automaton_state = 1 elif 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 2 # step function returns the new automaton state return self.automaton_state # now override the step function LDBA.step = step.__get__(mars_rover_1_and_3, LDBA)
from src.automata.ldba import LDBA # an example automaton for "grass then tool_shed" or "F (wood & XF (tool_shed))" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here minecraft_2 = LDBA(accepting_sets=[[2]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'grass' in label: self.automaton_state = 1 else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'tool_shed' in label: self.automaton_state = 2 else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2: self.automaton_state = 2 # step function returns the new automaton state return self.automaton_state # now override the step function LDBA.step = step.__get__(minecraft_2, LDBA)
from src.automata.ldba import LDBA # an example automaton for "(food1 then food2) or (food2 then food1) while avoiding ghost" or # "(F (food1 & F food2) || F (food2 & F food1)) & G !ghost" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here pacman_foods = LDBA(accepting_sets=[[1], [2], [3]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if label is not None and 'food1' in label and 'ghost' not in label: self.automaton_state = 1 elif label is not None and 'food2' in label and 'ghost' not in label: self.automaton_state = 2 elif label is not None and 'ghost' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if label is not None and 'food2' in label and 'ghost' not in label: self.automaton_state = 3 elif label is not None and 'ghost' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2:
from src.automata.ldba import LDBA # an example automaton for "goal1 then goal2 while avoiding unsafe" or "F (goal1 & XF (goal2)) & G !unsafe" # automaton image is available in "./assets" or "" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here goal1_then_goal2 = LDBA(accepting_sets=[[2]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label and 'unsafe' not in label: self.automaton_state = 1 elif 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'goal2' in label and 'unsafe' not in label: self.automaton_state = 2 elif 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2: if 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state
def train( MDP, LDBA, algorithm='ql', episode_num=2500, iteration_num_max=4000, discount_factor=0.95, learning_rate=0.9, nfq_replay_buffer_size=100, ddpg_replay_buffer_size=50000, decaying_learning_rate=False, epsilon=0.1, save_dir='./results', test=True, average_window=-1, ): learning_task = LCRL(MDP, LDBA, discount_factor, learning_rate, decaying_learning_rate, epsilon) if algorithm == 'ql': learning_task.train_ql(episode_num, iteration_num_max) import dill from src.environments.mars_rover_discrete_action import MarsRover elif algorithm == 'nfq': learning_task.train_nfq(episode_num, iteration_num_max, nfq_replay_buffer_size) import dill from src.environments.mars_rover_discrete_action import MarsRover elif algorithm == 'ddpg': learning_task.train_ddpg(episode_num, iteration_num_max, ddpg_replay_buffer_size) import dill import tensorflow as tf from src.environments.mars_rover_continuous_action import MarsRover else: raise NotImplementedError('New learning algorithms will be added to soon.') if average_window == -1: average_window = int(0.03 * episode_num) plt.plot(learning_task.q_at_initial_state, c="royalblue") plt.xlabel('Episode Number') plt.ylabel('Value Function at The Initial State') plt.grid(True) if average_window > 0: avg = np.convolve(learning_task.q_at_initial_state, np.ones((average_window,)) / average_window, mode='valid') plt.plot(avg, c='darkblue') # saving the results results_path = os.path.join(os.getcwd(), save_dir[2:]) dt_string ="%d.%m.%Y_%H.%M.%S") results_sub_path = os.path.join(os.getcwd(), save_dir[2:], dt_string) if not os.path.exists(results_path): os.mkdir(results_path) os.mkdir(results_sub_path) plt.savefig(os.path.join(results_sub_path, 'convergence.png')) if test: print('testing...') number_of_tests = 100 number_of_successes = 0 for tt in range(number_of_tests): learning_task.MDP.reset() learning_task.LDBA.reset() # check if MDP current_state is a list or ndarray: if type(learning_task.MDP.current_state) == np.ndarray: ndarray = True test_path = [learning_task.MDP.current_state.tolist()] else: ndarray = False test_path = [learning_task.MDP.current_state] iteration_num = 0 while learning_task.LDBA.accepting_frontier_set and iteration_num < iteration_num_max \ and learning_task.LDBA.automaton_state != -1: iteration_num += 1 if ndarray: if algorithm == "nfq": current_state = MDP.current_state.tolist() + [LDBA.automaton_state] if algorithm == "ddpg": current_state = MDP.current_state.tolist() + [LDBA.automaton_state] prev_state = np.array(current_state[0:2].copy()) else: current_state = learning_task.MDP.current_state + [learning_task.LDBA.automaton_state] if learning_task.epsilon_transitions_exists: product_MDP_action_space = learning_task.action_space_augmentation() else: product_MDP_action_space = MDP.action_space if not algorithm == "ddpg": Qs = [] if (not ndarray) and (str(current_state) in learning_task.Q.keys()): for action_index in range(len(product_MDP_action_space)): Qs.append(learning_task.Q[str(current_state)][product_MDP_action_space[action_index]]) elif ndarray: for action_index in range(len(product_MDP_action_space)): Qs.append(learning_task.Q[current_state[-1]].predict( [MDP.current_state.tolist() + [action_index]])) else: Qs.append(0) maxQ_action_index = random.choice(np.where(Qs == np.max(Qs))[0]) maxQ_action = product_MDP_action_space[maxQ_action_index] # check if an epsilon-transition is taken if learning_task.epsilon_transitions_exists and \ maxQ_action_index > len(learning_task.MDP.action_space) - 1: epsilon_transition_taken = True else: epsilon_transition_taken = False if epsilon_transition_taken: next_MDP_state = learning_task.MDP.current_state if not ndarray else learning_task.MDP.current_state.tolist() next_automaton_state = learning_task.LDBA.step(maxQ_action) else: next_MDP_state = learning_task.MDP.step(maxQ_action) next_automaton_state = learning_task.LDBA.step(learning_task.MDP.state_label(next_MDP_state)) if ndarray: next_MDP_state = next_MDP_state.tolist() else: # action space bounds lower_bound = -1 upper_bound = 1 tf_prev_state = tf.expand_dims(tf.convert_to_tensor(prev_state), 0) sampled_actions = tf.squeeze(learning_task.Q[current_state[-1]](tf_prev_state)) sampled_actions = sampled_actions.numpy() legal_action = np.clip(sampled_actions, lower_bound, upper_bound) action = np.squeeze(legal_action) if learning_task.epsilon_transitions_exists and \ LDBA.automaton_state in LDBA.epsilon_transitions.keys() and \ random.random() > 0.5: epsilon_action = random.choice(product_MDP_action_space[2:]) action = [np.squeeze( int(epsilon_action[-1]) + learning_task.upper_bound )] epsilon_transition_taken = True else: epsilon_transition_taken = False # product MDP modification (for more details refer to if epsilon_transition_taken: next_MDP_state = MDP.current_state.tolist() next_automaton_state = LDBA.step(epsilon_action) else: next_MDP_state = MDP.step(action).tolist() next_automaton_state = LDBA.step(MDP.state_label(next_MDP_state)) state = np.array(next_MDP_state.copy()) # product MDP: synchronise the automaton with MDP current_state = next_MDP_state + [next_automaton_state] test_path.append(next_MDP_state) if not epsilon_transition_taken: learning_task.LDBA.accepting_frontier_function(next_automaton_state) if not learning_task.LDBA.accepting_frontier_set: number_of_successes += 1 print('success rate in testing: ' + str(100 * number_of_successes / number_of_tests) + '%') if isinstance(MDP, SlipperyGrid) and test: # plt.plot(learning_task.path_length, c='royalblue') # plt.xlabel('Episode Number') # plt.ylabel('Agent Traversed Distance from The Initial State') # plt.grid(True) # if average_window > 0: # avg = np.convolve(learning_task.path_length, np.ones((average_window,)) / average_window, mode='valid') # plt.plot(avg, c='darkblue') # plt.savefig(os.path.join(results_sub_path, 'traversed distance in the grid.png')) # distinct_labels = np.unique(learning_task.MDP.labels) labels_dic = {} label_indx = 0 bounds = [-0.9] cmap = plt.get_cmap('gist_rainbow') for label in distinct_labels: labels_dic[label] = label_indx bounds.append(bounds[-1] + 1) label_indx += 1 color_map = cmap(np.linspace(0, 1, len(distinct_labels))) cmap = colors.ListedColormap(color_map) norm = colors.BoundaryNorm(bounds, cmap.N) labels_value = np.zeros([learning_task.MDP.shape[0], learning_task.MDP.shape[1]]) for i in range(learning_task.MDP.shape[0]): for j in range(learning_task.MDP.shape[1]): labels_value[i][j] = labels_dic[learning_task.MDP.state_label([i, j])] patches = [mpatches.Patch(color=color_map[i], label=list(distinct_labels)[i]) for i in range(len(distinct_labels))] plt.imshow(labels_value, interpolation='nearest', cmap=cmap, norm=norm) plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) path_x, path_y = np.array(test_path).T plt.scatter(path_y, path_x, c='lime', edgecolors='teal') plt.scatter(path_y[0], path_x[0], c='red', edgecolors='black') plt.annotate('s_0', (path_y[0], path_x[0]), fontsize=15, xytext=(20, 20), textcoords="offset points", va="center", ha="left", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) plt.title('This policy is synthesised by the trained agent') plt.savefig( os.path.join(results_sub_path, 'tested_policy.png'), bbox_inches="tight") is_gif = input( 'Would you like to create a gif for the the control policy? ' 'If so, type in "y", otherwise, type in "n". ') if is_gif == 'y' or is_gif == 'Y': animate(learning_task.MDP, test_path, results_sub_path, labels_value, cmap, norm, patches) print('\n---------------------------------\n') print('The results have been saved here:\n') print(results_sub_path) return learning_task if isinstance(MDP, MarsRover) and test: plt.imshow(MDP.background) path_x, path_y = np.array(test_path).T plt.scatter(path_y, path_x, c='lime', edgecolors='teal') plt.scatter(path_y[0], path_x[0], c='red', edgecolors='black') plt.annotate('s_0', (path_y[0], path_x[0]), fontsize=15, xytext=(20, 20), textcoords="offset points", va="center", ha="left", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) plt.title('This policy is synthesised by the trained agent') plt.savefig( os.path.join(results_sub_path, 'tested_policy.png'), bbox_inches="tight") is_gif = input( 'Would you like to create a gif for the the control policy? ' 'If so, type in "y", otherwise, type in "n". ') if is_gif == 'y' or is_gif == 'Y': animate(learning_task.MDP, test_path, results_sub_path, labels_value, cmap, norm, patches) print('\n---------------------------------\n') print('The results have been saved here:\n') print(results_sub_path) return learning_task if algorithm == 'ql': with open(os.path.join(results_sub_path, 'learned_model.pkl'), 'wb') as learning_file: dill.dump(learning_task, learning_file) if test: with open(os.path.join(results_sub_path, 'test_results.pkl'), 'wb') as test_file: dill.dump(test_path, test_file) print('In order to load the learning results use the following command in Python console:') print('import dill') print("learned_model = dill.load(open('" + os.path.join(results_sub_path, 'learned_model.pkl') + "', 'rb'))") if test: print("tested_trace = dill.load(open('" + os.path.join(results_sub_path, 'test_results.pkl') + "', 'rb'))") print('\n---------------------------------\n') if learning_task.early_interruption == 0: print("Training finished successfully!") else: print("Training results have been saved successfully! [Note: training was interrupted by user]") return learning_task # TODO: change the save method and add nfq & ddpg return learning_task
from src.automata.ldba import LDBA # an example automaton for "goal1" or "F goal1" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here slp_easy = LDBA(accepting_sets=[[1]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label: self.automaton_state = 1 else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: self.automaton_state = 1 # step function returns the new automaton state return self.automaton_state # now override the step function LDBA.step = step.__get__(slp_easy, LDBA) # finally, does the LDBA contains an epsilon transition? if so then # for each state with outgoing epsilon-transition define a different epsilon # example: <LDBA_object>.epsilon_transitions = {0: ['epsilon_0'], 4: ['epsilon_1']} # "0" and "4" are automaton_states
from src.automata.ldba import LDBA # an example automaton for "goal1 while avoiding unsafe" or "F goal1 & G !unsafe" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here frozenlake_reach_avoid = LDBA(accepting_sets=[[1]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label and 'unsafe' not in label: self.automaton_state = 1 elif 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 2 # step function returns the new automaton state return self.automaton_state # now override the step function LDBA.step = step.__get__(frozenlake_reach_avoid, LDBA)
from src.automata.ldba import LDBA # an example automaton for "goal1 then goal2 then goal3 then goal4" or # "F (goal1 & XF (goal2 & XF (goal3 & XF goal4)))" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here slp_hard = LDBA(accepting_sets=[[4]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label: self.automaton_state = 1 else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'goal2' in label: self.automaton_state = 2 else: self.automaton_state = 1 # state 2 elif self.automaton_state == 2: if 'goal3' in label: self.automaton_state = 3 else: self.automaton_state = 2 # state 3 elif self.automaton_state == 3:
from src.automata.ldba import LDBA # an example automaton for "goal2 then goal1 while avoiding unsafe" or "F (goal2 & XF (goal1)) & G !unsafe" # only the automaton "step" function and the "accepting_sets" attribute need to be specified # "accepting_sets" for Generalised Büchi Accepting (more details here mars_rover_2_and_4 = LDBA(accepting_sets=[[1]]) # "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1") def step(self, label): # state 0 if self.automaton_state == 0: if 'goal1' in label and 'unsafe' not in label: self.automaton_state = 1 elif 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 0 # state 1 elif self.automaton_state == 1: if 'unsafe' in label: self.automaton_state = -1 # un-accepting sink state else: self.automaton_state = 2 # step function returns the new automaton state return self.automaton_state # now override the step function LDBA.step = step.__get__(mars_rover_2_and_4, LDBA)