def __init__(self):

        #Initiate Graph
        self.G = {
            0: {
                'parent': None,
                'state': np.array([0, 0, 2, 0, 0, 0, 0, 0, 0]),
                'cost': 0,
                'path': None,
                'free': True
            }
        }

        #define Goal state
        self.state_goal = np.array([12, 3, 2, 0, 0, 0, 0, 0, 0])

        #Define some global parameters
        self.r_search = 2
        self.max_iter = 1000
        self.dt_des = 1 / 10.0  #this is the mpc controller's delta time

        #Select RRT* steer function order
        self.steer_order = 1
        self.optimization_type = 'constrained'

        #define search range for RRT*
        self.x_range = [0, 13]
        self.y_range = [-2, 4]
        self.z_range = [0, 6]

        #Create environment
        self.env = Enviroment()
Пример #2
0
def play_games(num_games, agent_one, agent_two, draw_module=None):
    env = Enviroment(draw_module)
    RocketOne_wins = 0
    RocketTwo_wins = 0
    total_time = 0
    total_steps = 0

    for i in range(num_games):
        game_over = False
        state = env.reset()
        start = time.time()
        agent_one.history = [0, 0, 0, 0, 0, 0]
        agent_two.history = [0, 0, 0, 0, 0, 0]
        while game_over == False:
            if agent_one.input and agent_two.input:
                actions_one, actions_two = agent_one.choose_actions(state)
                pygame.event.clear()
            elif agent_one.input:
                actions_one, _ = agent_one.choose_actions(state)
                pygame.event.clear()
                actions_two = agent_two.choose_actions(state)
            elif agent_two.input:
                actions_one = agent_one.choose_actions(state)
                _, actions_two = agent_two.choose_actions(state)
                pygame.event.clear()
            else:
                actions_one = agent_one.choose_actions(state)
                actions_two = agent_two.choose_actions(state)

            step_count, (game_over, rocket_one_won), state, _ = env.next_step(
                actions_one, actions_two)

        end = time.time()
        total_time = total_time + (end - start)

        #print(agent_one.history)
        #print(agent_two.history)

        print(f"avg_time: {total_time / (i+1)}")
        total_steps = total_steps + step_count
        print(f"avg_steps_count: {total_steps / (i+1)}")

        if rocket_one_won:
            RocketOne_wins = RocketOne_wins + 1
        else:
            RocketTwo_wins = RocketTwo_wins + 1

        print(f"Rocket one wins: {RocketOne_wins}")
        print(f"Rocket two wins: {RocketTwo_wins}")
Пример #3
0
    def __init__(self, ALPHA, BETA, dataset, cycles, ant_numbers,
                 init_pheromone, pheromone_constant, min_pheromone,
                 evaporation_rate, seed):
        self.ALPHA = ALPHA
        self.ant_numbers = ant_numbers
        self.BETA = BETA
        self.cycles = cycles
        self.pheromone_constant = pheromone_constant
        self.evaporation_rate = evaporation_rate
        self.seed = seed

        #Inicialize the Enviroment and set data
        self.enviroment = Enviroment(dataset, init_pheromone, min_pheromone)
        self.time_of_executions = self.enviroment.getTimeOfExecutions()
        self.node_names = self.enviroment.getNodeNames()
        self.graph_edges = self.enviroment.getEdges()
def main():
    grid = [[0, 0, 0, 1], [0, 9, 0, -1], [0, 0, 0, 0]]

    env = Enviroment(grid)
    agent = Agent(env)

    for i in range(10):
        state = env.reset()
        total_reward = 0
        done = False

    while not done:
        action = agent.policy(state)
        next_state, reward, done = env.step(action)
        total_reward += reward
        state = next_state

    print("Episode {}: Agent gets {} reaward".format(i, total_reward))
def generate_enviroment():
    rows, columns = random.randint(8, 15), random.randint(8, 15)
    total_cells = rows * columns
    dirty_percent = random.randint(5, 10)
    obstacle_percent = random.randint(5, 10)
    kids = random.randint(_percent_to_number(3, total_cells),
                          _percent_to_number(7, total_cells))
    t = random.randint(80, 150)
    return Enviroment(rows, columns, dirty_percent, obstacle_percent, kids, 0,
                      t)
Пример #6
0
def main():
    args = parser.parse()

    env = Enviroment(args.environment, args.display)
    agent = Agent(env_name=args.environment, num_actions=env.gym_env.action_space.n)

    if args.test: #TEST MODE
        agent.restore_network()
        for episode in range(NUM_EPISODES):
            state = env.reset()
            episode_end = False

            while not episode_end:
                action = agent.select_action_test(state)
                state, _, _ = env.step(action)

    else: #TRAIN MODE
        for episode in range(NUM_EPISODES):
            state = env.reset()
            episode_end = False

            while not episode_end:
                action = agent.select_action(state)
                state, reward, episode_end = env.step(action)
                agent.set(state, action, reward, episode_end)
class TrajectoryPlanner(object):
    #Define a class for overall trajectory planner
    def __init__(self):

        #Initiate Graph
        self.G = {
            0: {
                'parent': None,
                'state': np.array([0, 0, 2, 0, 0, 0, 0, 0, 0]),
                'cost': 0,
                'path': None,
                'free': True
            }
        }

        #define Goal state
        self.state_goal = np.array([12, 3, 2, 0, 0, 0, 0, 0, 0])

        #Define some global parameters
        self.r_search = 2
        self.max_iter = 1000
        self.dt_des = 1 / 10.0  #this is the mpc controller's delta time

        #Select RRT* steer function order
        self.steer_order = 1
        self.optimization_type = 'constrained'

        #define search range for RRT*
        self.x_range = [0, 13]
        self.y_range = [-2, 4]
        self.z_range = [0, 6]

        #Create environment
        self.env = Enviroment()

    def rrt_plan(self):
        """Function that Runs RRT* with parabolic sampling in the vicinity
        of narrow windows

        Returns:
            best path to the goal
        """

        #Iterate to get converges state
        for it in range(1, self.max_iter):
            if it % 100 == 0:
                print('iteration = ', it)

            #Random sample random point
            state_rand = np.zeros(9)
            state_rand[0] = np.random.uniform(self.x_range[0], self.x_range[1])
            state_rand[1] = np.random.uniform(self.y_range[0], self.y_range[1])
            state_rand[2] = np.random.uniform(self.z_range[0], self.z_range[1])
            #sample velocity
            angle_rand = np.random.uniform(-np.pi / 2., np.pi / 2.)
            state_rand[3:5] = np.array(
                [np.cos(angle_rand), np.sin(angle_rand)])

            #randomly sample a window every 10 iterations, else random rest of space
            if it % 10 == 0:
                win = np.random.choice(self.env.windows)
                #state_rand, next_node = win.generate_parabolic_nodes(it, self.dt_des)
                found_path, state_rand, next_node = self.env.sample_parabolas(
                    win, it, self.dt_des)
                in_win = True
                if not (found_path):
                    continue
            else:
                #check if in window
                in_win, win = self.env.check_in_window(state_rand[0:3])
                if in_win:
                    #state_rand, next_node = win.generate_parabolic_nodes(it, self.dt_des)
                    found_path, state_rand, next_node = self.env.sample_parabolas(
                        win, it, self.dt_des)
                    if not (found_path):
                        continue

            #look for nearby nodes
            Near_nodes, Nearest_node, key_Nearest_node = nearby_nodes(
                state_rand, self.G, self.r_search)

            #Connect to best node
            min_cost = float('inf')
            best_node = None
            for key in Near_nodes.keys():
                node = self.G[key]

                stage_cost, X = steer(node['state'], state_rand, self.dt_des,
                                      self.steer_order)

                if self.env.check_path_collision(X):
                    continue

                total_cost = stage_cost + get_total_cost(self.G, key)

                if total_cost < min_cost:
                    min_cost = total_cost
                    best_node = key
                    best_path = X
                    best_cost = stage_cost

            #Continue if node is not found
            if best_node == None:
                continue

            #Wire new node
            self.G[it] = {
                'parent': best_node,
                'state': state_rand,
                'cost': best_cost,
                'path': best_path,
                'free': True
            }

            #Wire next node if in window
            if in_win:
                self.G[-it] = next_node

            #rewire close nodes to reduce cost
            for key in Near_nodes.keys():
                node = self.G[key]

                stage_cost, X = steer(state_rand, node['state'], self.dt_des,
                                      self.steer_order)

                if self.env.check_path_collision(X):
                    continue

                total_cost = stage_cost + get_total_cost(self.G, it)

                if total_cost < node['cost']:
                    self.G[key]['parent'] = it
                    self.G[key]['cost'] = stage_cost
                    self.G[key]['path'] = X

        #find best node to connect to goal
        min_cost = float('inf')
        best_node = None
        Near_nodes, Nearest_node, key_Nearest_node = nearby_nodes(
            self.state_goal, self.G, self.r_search)
        for key in Near_nodes.keys():
            node = self.G[key]

            stage_cost, X = steer(node['state'], self.state_goal, self.dt_des,
                                  self.steer_order)

            if self.env.check_path_collision(X):
                continue

            total_cost = stage_cost + get_total_cost(self.G, key)

            if total_cost < min_cost:
                min_cost = total_cost
                best_node = key
                best_path = X

        #wire goal state
        self.G['goal'] = {
            'parent': best_node,
            'state': self.state_goal,
            'cost': min_cost,
            'path': best_path,
            'free': True
        }

        #generate best path
        best_path = [self.G['goal']]
        parent = best_node
        while parent != None:
            best_path.append(self.G[parent])
            parent = self.G[parent]['parent']

        return best_path

    def plot_path(self, best_path, traj):
        """Function for plotting the results
        """

        #Plotting Results:
        fig = plt.figure()
        ax = plt.axes(projection='3d')
        plt.title('Graph')
        for obs in self.env.obs_locs:
            circle = plt.Circle((obs[0], obs[1]), self.env.obs_rad, color='r')
            ax.add_artist(circle)
        for key in self.G.keys():
            pos = self.G[key]['state'][0:3]
            ax.plot([pos[0]], [pos[1]], [pos[2]], 'ro')
            parent_key = self.G[key]['parent']
            if parent_key != None:
                parent_pos = self.G[parent_key]['state'][0:3]
                ax.plot([pos[0], parent_pos[0]], [pos[1], parent_pos[1]],
                        [pos[2], parent_pos[2]], 'b')

        plt.xlim(self.x_range)
        plt.ylim(self.y_range)

        #plot the shortest path
        fig = plt.figure()
        ax = plt.axes(projection='3d')

        for i in range(len(best_path) - 1):
            x = best_path[i]['path'][0, :]
            y = best_path[i]['path'][1, :]
            z = best_path[i]['path'][2, :]
            ax.plot(x, y, z, 'b')

        ax.plot(traj[0, :], traj[1, :], traj[2, :], 'b.')
        plt.title('Overall Trajectory 3D')

        fig, ax = plt.subplots()
        for obs in self.env.obs_locs:
            circle = plt.Circle((obs[0], obs[1]), self.env.obs_rad, color='r')
            ax.add_artist(circle)
        ax.plot(traj[0, :], traj[1, :], 'b.')
        plt.title('Overall Trajectory 2D')

        #additional plots
        # plt.figure()
        # plt.title('x-pos')
        # plt.plot(traj[0,:])
        # plt.figure()
        # plt.title('y-pos')
        # plt.plot(traj[1,:])
        # plt.figure()
        # plt.title('x-vel')
        # plt.plot(traj[3,:])
        # plt.figure()
        # plt.title('y-vel')
        # plt.plot(traj[4,:])
        # plt.figure()
        # plt.title('x-acc')
        # plt.plot(traj[6,:])
        # plt.figure()
        # plt.title('y-acc')
        # plt.plot(traj[7,:])

        plt.show()

    def lazy_states_contraction(self, best_path):
        """Implementation of lazy states contraction algorithm, prunes 
        the path by removing any lazy states

        Arg's: 
            best_path: list of nodes forming the best path
        Returns:
            best_path: pruned best_path

        """
        #lazy states contraction
        curr_idx = 0
        mid_idx = 1
        next_idx = 2
        while next_idx < len(best_path):
            node1 = best_path[curr_idx]
            node2 = best_path[next_idx]

            _, X = steer(node2['state'], node1['state'], self.dt_des,
                         self.steer_order)

            if self.env.check_path_collision(X):
                curr_idx += 1
                mid_idx = curr_idx + 1
                next_idx = curr_idx + 2
                continue

            best_path.pop(mid_idx)
            best_path[curr_idx]['path'] = X

        return best_path

    def min_snap_trajectory(self, best_path):
        """Function that generates the minimum snap trajectory

        Arg's: 
            best_path: list of nodes forming the best path

        Returns:
            traj: a 9xN matrix forming the min snap trajectory
            solution_found: true if a solution was found, False otherwise
            s: total distance of trajectory
        """
        print('Generating minimum snap trajectory')
        traj = None
        i = 0
        solution_found = True
        while i < (len(best_path) - 1):

            #if node[i] has no path
            if best_path[i]['free']:
                state_final = best_path[i]['state']
                int_points = []
                int_nodes = []
                for j in range(i + 1, len(best_path)):
                    if best_path[j]['free']:
                        if j + 1 == len(best_path):
                            state_init = best_path[j]['state']
                            break
                        int_points.append(best_path[j]['state'][0:3])
                        int_nodes.append(best_path[j])
                        continue
                    else:
                        state_init = best_path[j]['state']
                        break
                n_int = len(int_points)

                if self.optimization_type == 'constrained':
                    s, X = min_snap_constrained(state_init, state_final,
                                                int_points, self.dt_des)
                elif self.optimization_type == 'unconstrained':
                    s, X = min_snap_trajectory(state_init, state_final,
                                               int_points, self.dt_des)
                else:
                    raise Exception('optimization_type not defined')

                #Check min snap trajectory collision
                div = 2
                while self.env.check_path_collision(X) and div < 10:
                    print('Collision Detected, adding midpoints')

                    #add intermediate points
                    int_points = []
                    N = len(int_nodes)
                    for j in range(N - 1, -1, -1):
                        for k in range(1, div):
                            factor = k / float(div)
                            int_idx = int(int_nodes[j]['path'].shape[1] *
                                          factor)
                            p_mid = int_nodes[j]['path'][0:3, int_idx]
                            int_points.append(p_mid)
                        int_points.append(int_nodes[j]['state'][0:3])

                    for k in range(1, div):
                        factor = k / float(div)
                        int_idx = int(best_path[i]['path'].shape[1] * factor)
                        p_mid = best_path[i]['path'][0:3, int_idx]
                        int_points.append(p_mid)

                    #recalculate path using intermediate points
                    if self.optimization_type == 'constrained':
                        s, X = min_snap_constrained(state_init, state_final,
                                                    int_points, self.dt_des)
                    elif self.optimization_type == 'unconstrained':
                        s, X = min_snap_trajectory(state_init, state_final,
                                                   int_points, self.dt_des)
                    else:
                        raise Exception('optimization_type not defined')

                    div += 1

                if div == 10:
                    solution_found = False

                i += 1 + n_int
                if traj is None:
                    traj = X
                else:
                    traj = np.concatenate((X, traj), axis=1)

            else:
                X = best_path[i]['path']
                i += 1
                if traj is None:
                    traj = X
                else:
                    traj = np.concatenate((X, traj), axis=1)

        #Calculate total path length
        if traj is not None:
            N = traj.shape[1]
            x = traj[0, :]
            y = traj[1, :]
            z = traj[2, :]

            dx = x[1:N] - x[0:N - 1]
            dy = y[1:N] - y[0:N - 1]
            dz = z[1:N] - z[0:N - 1]

            ds2 = dx**2 + dy**2 + dz**2
            ds = np.sqrt(ds2)
            s = np.sum(ds)
        else:
            s = None

        return traj, solution_found, s

    # def publish_path(self, traj):
    """Function for publishing path to ROS"""
Пример #8
0
        processes.append(
            Process(target=simulate_one_game_wins,
                    args=(i, envs[i], agents_one[i], agents_two[i],
                          return_vals)))

    for i in range(len(processes)):
        processes[i].start()
    for i in range(len(processes)):
        processes[i].join()
    result = sum(return_vals.values()) / len(processes)
    print(f"sum: {result}")
    return result,


envs = [Enviroment() for i in range(6)]
agents_one = [None for i in range(6)]
agents_two = [Stable_defensive_agent(2) for i in range(6)]
funcs = [None for i in range(6)]

env1 = Enviroment()
env2 = Enviroment()
env3 = Enviroment()
agent1_two = Stable_defensive_agent(2)
agent2_two = Stable_defensive_agent(2)
agent3_two = Stable_defensive_agent(2)
semi_result = [0, 0, 0, 0, 0, 0, 0]

pset = gp.PrimitiveSetTyped("main", [int, int, int, int, int], ActionPlanEnum)
pset.addPrimitive(if_then_else, [Bool, ActionPlanEnum, ActionPlanEnum],
                  ActionPlanEnum)
Пример #9
0
class ACO():
    """
    Class responsible to manage the
    entire proccess. 
    It creates and executes the graph enviroment
    all the ants, updates the pheromones and 
    at the registers the best solution found.

    returns:
        Best Makespam time of critical path.
        Sequence Job/Machine for this path.
    """
    def __init__(self, ALPHA, BETA, dataset, cycles, ant_numbers,
                 init_pheromone, pheromone_constant, min_pheromone,
                 evaporation_rate, seed):
        self.ALPHA = ALPHA
        self.ant_numbers = ant_numbers
        self.BETA = BETA
        self.cycles = cycles
        self.pheromone_constant = pheromone_constant
        self.evaporation_rate = evaporation_rate
        self.seed = seed

        #Inicialize the Enviroment and set data
        self.enviroment = Enviroment(dataset, init_pheromone, min_pheromone)
        self.time_of_executions = self.enviroment.getTimeOfExecutions()
        self.node_names = self.enviroment.getNodeNames()
        self.graph_edges = self.enviroment.getEdges()

    def releaseTheAnts(self):
        """
        Method responsible to create
        and execute all ants through
        the enviroment and update
        the pheromones.

        returns:
            - Print the best time.
            - Generate a file with the 
                time results of all cycles
                with this structure:
                {cycle : [Fastest, Mean, Longest], ...}
        """
        results_control = {}
        all_times = []
        fastest_path = []
        for cycle_number in range(self.cycles):
            this_cycle_times = []
            #Get the updated graph:
            this_cycle_Graph = self.enviroment.getGraph()
            #Create dict with each edge as a key and all values as zeros,
            #  so it can sum all edges contribution along this cycle:
            this_cycle_edges_contributions = dict.fromkeys(self.graph_edges, 0)

            for ant_number in range(self.ant_numbers):
                #Create Ant, make it walk through the graph and calculate makespan time for that walk
                ant = Ant(this_cycle_Graph,
                          self.node_names,
                          self.ALPHA,
                          self.BETA,
                          self.seed,
                          extended_seed=ant_number)
                ant_path = ant.walk()
                path_time = self.enviroment.calculateMakespanTime(ant_path)
                #Recording the pheromone contribution for each edge of this walk
                for edge in ant_path:
                    this_cycle_edges_contributions[
                        edge] += self.pheromone_constant / path_time
                #Recording cycle values:
                this_cycle_times.append(path_time)
                all_times.append(path_time)

            #Update pheromone on edges of the graph
            self.enviroment.updatePheromone(self.evaporation_rate,
                                            this_cycle_edges_contributions)

            #save recorded values
            results_control.update({
                cycle_number: [
                    min(this_cycle_times),
                    mean(this_cycle_times),
                    max(this_cycle_times)
                ]
            })

        #generating file with fitness through cycles
        json.dump(results_control, open("ACO_cycles_results.json", 'w'))
        #Print results:
        print("---------------------------------------------------")
        print("Mean: ", mean(all_times))
        print("Standard deviation: ", stdev(all_times))
        print("BEST PATH TIME: ", min(all_times), " seconds")
        print("---------------------------------------------------")
Пример #10
0
from enviroment import Enviroment
from agents import RandomAgent, InteractiveAgent

op = input("Press 1 to start a game, press 2 for a random play\n")
if op == '1':
    a = InteractiveAgent(Enviroment())
else:
    a = RandomAgent(Enviroment(), True)
a.game()
Пример #11
0
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

from enviroment import Enviroment

parser = argparse.ArgumentParser()
parser.add_argument('--mode', choices=['train', 'test'], default='train')
parser.add_argument('--weights', type=str, default=None)
args = parser.parse_args()

# Get the environment and extract the number of actions.
env = Enviroment()
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=(1, 4, 4)))
model.add(Convolution2D(4, (2, 2), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (1, 1), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Convolution2D(64, (1, 1), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
Пример #12
0
from enviroment import Enviroment
from animal import Animal
import random as rnd
from mdp_function import compute_policies
import matplotlib.pyplot as plt

INIT_FOOD_PROB = 0.2
REGEN_FOOD_PROB = 0.3
UPDATE_FOOD_TIME = 1
SQUARE_EDGE = 40
PRINT_ENV = True

env = Enviroment(SQUARE_EDGE)
Animal.POLICY_ARRAY = compute_policies()
animals = [
    Animal([rnd.randint(0, env.dim - 1),
            rnd.randint(0, env.dim - 1)], env, 2)
]
env.generate_food(INIT_FOOD_PROB)
env.print_map()

# starting with only 1 animal, execute the simulation
# every animal takes a decision, then it's executed
# actions: 1 - move, 2 - sensing, 3 - reproduce

population_array = []
for iteration in range(1, 300):
    print("ITERATION: " + str(iteration))
    updated_animals = []
    population_dimension = 0
    for animal in animals: