def __init__(self): #Initiate Graph self.G = { 0: { 'parent': None, 'state': np.array([0, 0, 2, 0, 0, 0, 0, 0, 0]), 'cost': 0, 'path': None, 'free': True } } #define Goal state self.state_goal = np.array([12, 3, 2, 0, 0, 0, 0, 0, 0]) #Define some global parameters self.r_search = 2 self.max_iter = 1000 self.dt_des = 1 / 10.0 #this is the mpc controller's delta time #Select RRT* steer function order self.steer_order = 1 self.optimization_type = 'constrained' #define search range for RRT* self.x_range = [0, 13] self.y_range = [-2, 4] self.z_range = [0, 6] #Create environment self.env = Enviroment()
def play_games(num_games, agent_one, agent_two, draw_module=None): env = Enviroment(draw_module) RocketOne_wins = 0 RocketTwo_wins = 0 total_time = 0 total_steps = 0 for i in range(num_games): game_over = False state = env.reset() start = time.time() agent_one.history = [0, 0, 0, 0, 0, 0] agent_two.history = [0, 0, 0, 0, 0, 0] while game_over == False: if agent_one.input and agent_two.input: actions_one, actions_two = agent_one.choose_actions(state) pygame.event.clear() elif agent_one.input: actions_one, _ = agent_one.choose_actions(state) pygame.event.clear() actions_two = agent_two.choose_actions(state) elif agent_two.input: actions_one = agent_one.choose_actions(state) _, actions_two = agent_two.choose_actions(state) pygame.event.clear() else: actions_one = agent_one.choose_actions(state) actions_two = agent_two.choose_actions(state) step_count, (game_over, rocket_one_won), state, _ = env.next_step( actions_one, actions_two) end = time.time() total_time = total_time + (end - start) #print(agent_one.history) #print(agent_two.history) print(f"avg_time: {total_time / (i+1)}") total_steps = total_steps + step_count print(f"avg_steps_count: {total_steps / (i+1)}") if rocket_one_won: RocketOne_wins = RocketOne_wins + 1 else: RocketTwo_wins = RocketTwo_wins + 1 print(f"Rocket one wins: {RocketOne_wins}") print(f"Rocket two wins: {RocketTwo_wins}")
def __init__(self, ALPHA, BETA, dataset, cycles, ant_numbers, init_pheromone, pheromone_constant, min_pheromone, evaporation_rate, seed): self.ALPHA = ALPHA self.ant_numbers = ant_numbers self.BETA = BETA self.cycles = cycles self.pheromone_constant = pheromone_constant self.evaporation_rate = evaporation_rate self.seed = seed #Inicialize the Enviroment and set data self.enviroment = Enviroment(dataset, init_pheromone, min_pheromone) self.time_of_executions = self.enviroment.getTimeOfExecutions() self.node_names = self.enviroment.getNodeNames() self.graph_edges = self.enviroment.getEdges()
def main(): grid = [[0, 0, 0, 1], [0, 9, 0, -1], [0, 0, 0, 0]] env = Enviroment(grid) agent = Agent(env) for i in range(10): state = env.reset() total_reward = 0 done = False while not done: action = agent.policy(state) next_state, reward, done = env.step(action) total_reward += reward state = next_state print("Episode {}: Agent gets {} reaward".format(i, total_reward))
def generate_enviroment(): rows, columns = random.randint(8, 15), random.randint(8, 15) total_cells = rows * columns dirty_percent = random.randint(5, 10) obstacle_percent = random.randint(5, 10) kids = random.randint(_percent_to_number(3, total_cells), _percent_to_number(7, total_cells)) t = random.randint(80, 150) return Enviroment(rows, columns, dirty_percent, obstacle_percent, kids, 0, t)
def main(): args = parser.parse() env = Enviroment(args.environment, args.display) agent = Agent(env_name=args.environment, num_actions=env.gym_env.action_space.n) if args.test: #TEST MODE agent.restore_network() for episode in range(NUM_EPISODES): state = env.reset() episode_end = False while not episode_end: action = agent.select_action_test(state) state, _, _ = env.step(action) else: #TRAIN MODE for episode in range(NUM_EPISODES): state = env.reset() episode_end = False while not episode_end: action = agent.select_action(state) state, reward, episode_end = env.step(action) agent.set(state, action, reward, episode_end)
class TrajectoryPlanner(object): #Define a class for overall trajectory planner def __init__(self): #Initiate Graph self.G = { 0: { 'parent': None, 'state': np.array([0, 0, 2, 0, 0, 0, 0, 0, 0]), 'cost': 0, 'path': None, 'free': True } } #define Goal state self.state_goal = np.array([12, 3, 2, 0, 0, 0, 0, 0, 0]) #Define some global parameters self.r_search = 2 self.max_iter = 1000 self.dt_des = 1 / 10.0 #this is the mpc controller's delta time #Select RRT* steer function order self.steer_order = 1 self.optimization_type = 'constrained' #define search range for RRT* self.x_range = [0, 13] self.y_range = [-2, 4] self.z_range = [0, 6] #Create environment self.env = Enviroment() def rrt_plan(self): """Function that Runs RRT* with parabolic sampling in the vicinity of narrow windows Returns: best path to the goal """ #Iterate to get converges state for it in range(1, self.max_iter): if it % 100 == 0: print('iteration = ', it) #Random sample random point state_rand = np.zeros(9) state_rand[0] = np.random.uniform(self.x_range[0], self.x_range[1]) state_rand[1] = np.random.uniform(self.y_range[0], self.y_range[1]) state_rand[2] = np.random.uniform(self.z_range[0], self.z_range[1]) #sample velocity angle_rand = np.random.uniform(-np.pi / 2., np.pi / 2.) state_rand[3:5] = np.array( [np.cos(angle_rand), np.sin(angle_rand)]) #randomly sample a window every 10 iterations, else random rest of space if it % 10 == 0: win = np.random.choice(self.env.windows) #state_rand, next_node = win.generate_parabolic_nodes(it, self.dt_des) found_path, state_rand, next_node = self.env.sample_parabolas( win, it, self.dt_des) in_win = True if not (found_path): continue else: #check if in window in_win, win = self.env.check_in_window(state_rand[0:3]) if in_win: #state_rand, next_node = win.generate_parabolic_nodes(it, self.dt_des) found_path, state_rand, next_node = self.env.sample_parabolas( win, it, self.dt_des) if not (found_path): continue #look for nearby nodes Near_nodes, Nearest_node, key_Nearest_node = nearby_nodes( state_rand, self.G, self.r_search) #Connect to best node min_cost = float('inf') best_node = None for key in Near_nodes.keys(): node = self.G[key] stage_cost, X = steer(node['state'], state_rand, self.dt_des, self.steer_order) if self.env.check_path_collision(X): continue total_cost = stage_cost + get_total_cost(self.G, key) if total_cost < min_cost: min_cost = total_cost best_node = key best_path = X best_cost = stage_cost #Continue if node is not found if best_node == None: continue #Wire new node self.G[it] = { 'parent': best_node, 'state': state_rand, 'cost': best_cost, 'path': best_path, 'free': True } #Wire next node if in window if in_win: self.G[-it] = next_node #rewire close nodes to reduce cost for key in Near_nodes.keys(): node = self.G[key] stage_cost, X = steer(state_rand, node['state'], self.dt_des, self.steer_order) if self.env.check_path_collision(X): continue total_cost = stage_cost + get_total_cost(self.G, it) if total_cost < node['cost']: self.G[key]['parent'] = it self.G[key]['cost'] = stage_cost self.G[key]['path'] = X #find best node to connect to goal min_cost = float('inf') best_node = None Near_nodes, Nearest_node, key_Nearest_node = nearby_nodes( self.state_goal, self.G, self.r_search) for key in Near_nodes.keys(): node = self.G[key] stage_cost, X = steer(node['state'], self.state_goal, self.dt_des, self.steer_order) if self.env.check_path_collision(X): continue total_cost = stage_cost + get_total_cost(self.G, key) if total_cost < min_cost: min_cost = total_cost best_node = key best_path = X #wire goal state self.G['goal'] = { 'parent': best_node, 'state': self.state_goal, 'cost': min_cost, 'path': best_path, 'free': True } #generate best path best_path = [self.G['goal']] parent = best_node while parent != None: best_path.append(self.G[parent]) parent = self.G[parent]['parent'] return best_path def plot_path(self, best_path, traj): """Function for plotting the results """ #Plotting Results: fig = plt.figure() ax = plt.axes(projection='3d') plt.title('Graph') for obs in self.env.obs_locs: circle = plt.Circle((obs[0], obs[1]), self.env.obs_rad, color='r') ax.add_artist(circle) for key in self.G.keys(): pos = self.G[key]['state'][0:3] ax.plot([pos[0]], [pos[1]], [pos[2]], 'ro') parent_key = self.G[key]['parent'] if parent_key != None: parent_pos = self.G[parent_key]['state'][0:3] ax.plot([pos[0], parent_pos[0]], [pos[1], parent_pos[1]], [pos[2], parent_pos[2]], 'b') plt.xlim(self.x_range) plt.ylim(self.y_range) #plot the shortest path fig = plt.figure() ax = plt.axes(projection='3d') for i in range(len(best_path) - 1): x = best_path[i]['path'][0, :] y = best_path[i]['path'][1, :] z = best_path[i]['path'][2, :] ax.plot(x, y, z, 'b') ax.plot(traj[0, :], traj[1, :], traj[2, :], 'b.') plt.title('Overall Trajectory 3D') fig, ax = plt.subplots() for obs in self.env.obs_locs: circle = plt.Circle((obs[0], obs[1]), self.env.obs_rad, color='r') ax.add_artist(circle) ax.plot(traj[0, :], traj[1, :], 'b.') plt.title('Overall Trajectory 2D') #additional plots # plt.figure() # plt.title('x-pos') # plt.plot(traj[0,:]) # plt.figure() # plt.title('y-pos') # plt.plot(traj[1,:]) # plt.figure() # plt.title('x-vel') # plt.plot(traj[3,:]) # plt.figure() # plt.title('y-vel') # plt.plot(traj[4,:]) # plt.figure() # plt.title('x-acc') # plt.plot(traj[6,:]) # plt.figure() # plt.title('y-acc') # plt.plot(traj[7,:]) plt.show() def lazy_states_contraction(self, best_path): """Implementation of lazy states contraction algorithm, prunes the path by removing any lazy states Arg's: best_path: list of nodes forming the best path Returns: best_path: pruned best_path """ #lazy states contraction curr_idx = 0 mid_idx = 1 next_idx = 2 while next_idx < len(best_path): node1 = best_path[curr_idx] node2 = best_path[next_idx] _, X = steer(node2['state'], node1['state'], self.dt_des, self.steer_order) if self.env.check_path_collision(X): curr_idx += 1 mid_idx = curr_idx + 1 next_idx = curr_idx + 2 continue best_path.pop(mid_idx) best_path[curr_idx]['path'] = X return best_path def min_snap_trajectory(self, best_path): """Function that generates the minimum snap trajectory Arg's: best_path: list of nodes forming the best path Returns: traj: a 9xN matrix forming the min snap trajectory solution_found: true if a solution was found, False otherwise s: total distance of trajectory """ print('Generating minimum snap trajectory') traj = None i = 0 solution_found = True while i < (len(best_path) - 1): #if node[i] has no path if best_path[i]['free']: state_final = best_path[i]['state'] int_points = [] int_nodes = [] for j in range(i + 1, len(best_path)): if best_path[j]['free']: if j + 1 == len(best_path): state_init = best_path[j]['state'] break int_points.append(best_path[j]['state'][0:3]) int_nodes.append(best_path[j]) continue else: state_init = best_path[j]['state'] break n_int = len(int_points) if self.optimization_type == 'constrained': s, X = min_snap_constrained(state_init, state_final, int_points, self.dt_des) elif self.optimization_type == 'unconstrained': s, X = min_snap_trajectory(state_init, state_final, int_points, self.dt_des) else: raise Exception('optimization_type not defined') #Check min snap trajectory collision div = 2 while self.env.check_path_collision(X) and div < 10: print('Collision Detected, adding midpoints') #add intermediate points int_points = [] N = len(int_nodes) for j in range(N - 1, -1, -1): for k in range(1, div): factor = k / float(div) int_idx = int(int_nodes[j]['path'].shape[1] * factor) p_mid = int_nodes[j]['path'][0:3, int_idx] int_points.append(p_mid) int_points.append(int_nodes[j]['state'][0:3]) for k in range(1, div): factor = k / float(div) int_idx = int(best_path[i]['path'].shape[1] * factor) p_mid = best_path[i]['path'][0:3, int_idx] int_points.append(p_mid) #recalculate path using intermediate points if self.optimization_type == 'constrained': s, X = min_snap_constrained(state_init, state_final, int_points, self.dt_des) elif self.optimization_type == 'unconstrained': s, X = min_snap_trajectory(state_init, state_final, int_points, self.dt_des) else: raise Exception('optimization_type not defined') div += 1 if div == 10: solution_found = False i += 1 + n_int if traj is None: traj = X else: traj = np.concatenate((X, traj), axis=1) else: X = best_path[i]['path'] i += 1 if traj is None: traj = X else: traj = np.concatenate((X, traj), axis=1) #Calculate total path length if traj is not None: N = traj.shape[1] x = traj[0, :] y = traj[1, :] z = traj[2, :] dx = x[1:N] - x[0:N - 1] dy = y[1:N] - y[0:N - 1] dz = z[1:N] - z[0:N - 1] ds2 = dx**2 + dy**2 + dz**2 ds = np.sqrt(ds2) s = np.sum(ds) else: s = None return traj, solution_found, s # def publish_path(self, traj): """Function for publishing path to ROS"""
processes.append( Process(target=simulate_one_game_wins, args=(i, envs[i], agents_one[i], agents_two[i], return_vals))) for i in range(len(processes)): processes[i].start() for i in range(len(processes)): processes[i].join() result = sum(return_vals.values()) / len(processes) print(f"sum: {result}") return result, envs = [Enviroment() for i in range(6)] agents_one = [None for i in range(6)] agents_two = [Stable_defensive_agent(2) for i in range(6)] funcs = [None for i in range(6)] env1 = Enviroment() env2 = Enviroment() env3 = Enviroment() agent1_two = Stable_defensive_agent(2) agent2_two = Stable_defensive_agent(2) agent3_two = Stable_defensive_agent(2) semi_result = [0, 0, 0, 0, 0, 0, 0] pset = gp.PrimitiveSetTyped("main", [int, int, int, int, int], ActionPlanEnum) pset.addPrimitive(if_then_else, [Bool, ActionPlanEnum, ActionPlanEnum], ActionPlanEnum)
class ACO(): """ Class responsible to manage the entire proccess. It creates and executes the graph enviroment all the ants, updates the pheromones and at the registers the best solution found. returns: Best Makespam time of critical path. Sequence Job/Machine for this path. """ def __init__(self, ALPHA, BETA, dataset, cycles, ant_numbers, init_pheromone, pheromone_constant, min_pheromone, evaporation_rate, seed): self.ALPHA = ALPHA self.ant_numbers = ant_numbers self.BETA = BETA self.cycles = cycles self.pheromone_constant = pheromone_constant self.evaporation_rate = evaporation_rate self.seed = seed #Inicialize the Enviroment and set data self.enviroment = Enviroment(dataset, init_pheromone, min_pheromone) self.time_of_executions = self.enviroment.getTimeOfExecutions() self.node_names = self.enviroment.getNodeNames() self.graph_edges = self.enviroment.getEdges() def releaseTheAnts(self): """ Method responsible to create and execute all ants through the enviroment and update the pheromones. returns: - Print the best time. - Generate a file with the time results of all cycles with this structure: {cycle : [Fastest, Mean, Longest], ...} """ results_control = {} all_times = [] fastest_path = [] for cycle_number in range(self.cycles): this_cycle_times = [] #Get the updated graph: this_cycle_Graph = self.enviroment.getGraph() #Create dict with each edge as a key and all values as zeros, # so it can sum all edges contribution along this cycle: this_cycle_edges_contributions = dict.fromkeys(self.graph_edges, 0) for ant_number in range(self.ant_numbers): #Create Ant, make it walk through the graph and calculate makespan time for that walk ant = Ant(this_cycle_Graph, self.node_names, self.ALPHA, self.BETA, self.seed, extended_seed=ant_number) ant_path = ant.walk() path_time = self.enviroment.calculateMakespanTime(ant_path) #Recording the pheromone contribution for each edge of this walk for edge in ant_path: this_cycle_edges_contributions[ edge] += self.pheromone_constant / path_time #Recording cycle values: this_cycle_times.append(path_time) all_times.append(path_time) #Update pheromone on edges of the graph self.enviroment.updatePheromone(self.evaporation_rate, this_cycle_edges_contributions) #save recorded values results_control.update({ cycle_number: [ min(this_cycle_times), mean(this_cycle_times), max(this_cycle_times) ] }) #generating file with fitness through cycles json.dump(results_control, open("ACO_cycles_results.json", 'w')) #Print results: print("---------------------------------------------------") print("Mean: ", mean(all_times)) print("Standard deviation: ", stdev(all_times)) print("BEST PATH TIME: ", min(all_times), " seconds") print("---------------------------------------------------")
from enviroment import Enviroment from agents import RandomAgent, InteractiveAgent op = input("Press 1 to start a game, press 2 for a random play\n") if op == '1': a = InteractiveAgent(Enviroment()) else: a = RandomAgent(Enviroment(), True) a.game()
from tensorflow.keras.optimizers import Adam from rl.agents.dqn import DQNAgent from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy from rl.memory import SequentialMemory from rl.callbacks import FileLogger, ModelIntervalCheckpoint from enviroment import Enviroment parser = argparse.ArgumentParser() parser.add_argument('--mode', choices=['train', 'test'], default='train') parser.add_argument('--weights', type=str, default=None) args = parser.parse_args() # Get the environment and extract the number of actions. env = Enviroment() np.random.seed(123) env.seed(123) nb_actions = env.action_space.n # Next, we build our model. We use the same model that was described by Mnih et al. (2015). model = Sequential() model.add(Permute((2, 3, 1), input_shape=(1, 4, 4))) model.add(Convolution2D(4, (2, 2), strides=(4, 4))) model.add(Activation('relu')) model.add(Convolution2D(64, (1, 1), strides=(2, 2))) model.add(Activation('relu')) model.add(Convolution2D(64, (1, 1), strides=(1, 1))) model.add(Activation('relu')) model.add(Flatten()) model.add(Dense(512))
from enviroment import Enviroment from animal import Animal import random as rnd from mdp_function import compute_policies import matplotlib.pyplot as plt INIT_FOOD_PROB = 0.2 REGEN_FOOD_PROB = 0.3 UPDATE_FOOD_TIME = 1 SQUARE_EDGE = 40 PRINT_ENV = True env = Enviroment(SQUARE_EDGE) Animal.POLICY_ARRAY = compute_policies() animals = [ Animal([rnd.randint(0, env.dim - 1), rnd.randint(0, env.dim - 1)], env, 2) ] env.generate_food(INIT_FOOD_PROB) env.print_map() # starting with only 1 animal, execute the simulation # every animal takes a decision, then it's executed # actions: 1 - move, 2 - sensing, 3 - reproduce population_array = [] for iteration in range(1, 300): print("ITERATION: " + str(iteration)) updated_animals = [] population_dimension = 0 for animal in animals: