def evaluate_bounds_are_of_type(self, bounds, type_id): sum = 0 position = Vector(0, 0) # Bottom horizontal bound position.y = bounds.y_min for x in range(bounds.x_min, bounds.x_max): position.x = x sum += self.evaluate_tile_type(position, type_id) # Top horizontal bound position.y = bounds.y_max - 1 for x in range(bounds.x_min, bounds.x_max): position.x = x sum += self.evaluate_tile_type(position, type_id) # Left vertical bound position.x = bounds.x_min for y in range(bounds.y_min, bounds.y_max - 1): position.y = y sum += self.evaluate_tile_type(position, type_id) # Right vertical bound position.x = bounds.x_max - 1 for y in range(bounds.y_min + 1, bounds.y_max - 1): position.y = y sum += self.evaluate_tile_type(position, type_id) return sum
def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (-1, -1), seed: int = 0, n_transition: float = 0.95, diagonals: int = 9, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (objective 1, objective 2) :param seed: Seed used for np.random.RandomState method. :param n_transition: if is 1, always do the action indicated. (Original is about 0.6) :param diagonals: Number of diagonals to be used to build this environment (allows experimenting with an identical environment, but considering only the first k diagonals) (By default 9 - all). """ # the original full-size environment. mesh_shape = (min(max(diagonals + 1, 1), 10), min(max(diagonals + 1, 1), 10)) # Dictionary with final states as keys, and treasure amounts as values. diagonals_states = { x for x in zip(range(0, diagonals + 1, 1), range(diagonals, -1, -1)) } # Generate finals states with its reward finals = { state: (Vector(state) + 1) * 10 for state in diagonals_states } # Pareto optimal PyramidMDP.pareto_optimal = { Vector(state) + 1 for state in diagonals_states } # Filter obstacles states obstacles = frozenset((x, y) for x, y in finals.keys() for y in range(y, diagonals + 1) if (x, y) not in finals) # Default reward (objective_1, objective_2) default_reward = Vector(default_reward) # Transaction assert 0 <= n_transition <= 1. self.n_transition = n_transition super().__init__(mesh_shape=mesh_shape, initial_state=initial_state, default_reward=default_reward, finals=finals, obstacles=obstacles, seed=seed, action_space=action_space)
def __init__(self, initial_state: tuple = ((0, 0), False), default_reward: tuple = (0, 0), seed: int = 0, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (objective 1, objective 2) :param seed: Seed used for np.random.RandomState method. :param action_space: """ # List of all treasures and its reward. finals = { (8, 0): Vector([1, 9]), (8, 2): Vector([3, 9]), (8, 4): Vector([5, 9]), (8, 6): Vector([7, 9]), (8, 8): Vector([9, 9]), (0, 8): Vector([9, 1]), (2, 8): Vector([9, 3]), (4, 8): Vector([9, 5]), (6, 8): Vector([9, 7]), } # Define mesh shape mesh_shape = (9, 9) # Set obstacles obstacles = frozenset({(2, 2), (2, 3), (3, 2)}) # Default reward plus time (objective 1, objective 2, time) default_reward += (-1,) default_reward = Vector(default_reward) # Build the observation space (position (x, y), bonus) observation_space = gym.spaces.Tuple( ( gym.spaces.Tuple( (gym.spaces.Discrete(mesh_shape[0]), gym.spaces.Discrete(mesh_shape[1])) ), spaces.Boolean() ) ) super().__init__(mesh_shape=mesh_shape, default_reward=default_reward, initial_state=initial_state, finals=finals, obstacles=obstacles, observation_space=observation_space, seed=seed, action_space=action_space) # Pits marks which returns the agent to the start location. self.pits = { (7, 1), (7, 3), (7, 5), (1, 7), (3, 7), (5, 7) } # X2 bonus self.bonus = [ (3, 3) ]
def evaluate_tiles_in_bounds_are_of_type(self, bounds, type_id): sum = 0 position = Vector(0, 0) for x in range(bounds.x_min, bounds.x_max): position.x = x for y in range(bounds.y_min, bounds.y_max): position.y = y sum += self.evaluate_tile_type(position, type_id) return sum
def draft_b_lp(columns: int): # Create environment environment = DeepSeaTreasureRightDownStochastic(columns=columns) # Vector precision Vector.set_decimal_precision(decimal_precision=0.000001) # Create instance of AgentB agent = AgentB(environment=environment, limited_precision=True) agent.simulate()
def is_room(self, start_position, area_bounds): found_floor = False current_position = Vector(start_position.x, start_position.y + 1) # Check vertical for the room bounds while (current_position.y <= area_bounds.y_max and not found_floor): if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]): found_floor = True current_position.y -= 1 else: current_position.y += 1 room_bounds_y = current_position.y next_position = room_bounds_y + 1 if not found_floor or room_bounds_y == start_position.y: return False, next_position, None # Check horizontal for the room bounds found_floor = False current_position.x += 1 while (current_position.x <= area_bounds.x_max and not found_floor): if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]): found_floor = True current_position.x -= 1 else: current_position.x += 1 room_bounds_x = current_position.x if not found_floor or room_bounds_x == start_position.x: return False, next_position, None # Check vertical with the bounds found for y for y in range(start_position.y, room_bounds_y + 1): current_position.y = y if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]): return False, next_position, None # Check horizontal with the bounds found for y current_position.y = start_position.y for x in range(start_position.x, room_bounds_x + 1): current_position.x = x if not self.is_tile_of_type(current_position, TILE_TYPES["WALL"]): return False, next_position, None if abs(start_position.x - room_bounds_x) == 1 or \ abs(start_position.y - room_bounds_y) == 1: return False, next_position, None room_bounds = Bounds(start_position.x, start_position.y, room_bounds_x, room_bounds_y) return True, next_position, room_bounds
def main(): # Get trained agent print('Training agent...') agent: AgentBN = get_trained_agent() # Set initial state initial_state = ((2, 4), (0, 0), False) # Initial vectors v_s_0 = agent.v[initial_state] vectors = Vector.m3_max(set(v_s_0)) # Show information print('Vectors obtained after m3_max algorithm: ') print(vectors, end='\n\n') # Define a tolerance decimal_precision = 0.0000001 # Simulation simulation = dict() # Set decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) print('Evaluating policies gotten...') # For each vector for vector in vectors: # Specify objective vector objective_vector = vector.copy() print('Recovering policy for objective vector: {}...'.format( objective_vector)) # Get simulation from this agent policy = agent.recover_policy(initial_state=initial_state, objective_vector=objective_vector, iterations_limit=agent.total_sweeps) print('Evaluating policy obtaining...', end='\n\n') # Train until converge with `decimal_precision` tolerance. policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=decimal_precision) # Save policy and it evaluation. simulation.update({objective_vector: (policy, policy_evaluated)}) print(simulation)
def get_trained_agent() -> AgentBN: # Environment environment = ResourceGatheringEpisodic() # Agent agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent agent.train(graph_type=GraphType.SWEEP, limit=10) return agent
def load(): # agent_10 = AgentBN.load( # 'bn/models/rge_1583857516_0.01.bin' # ) # sorted_v_s_0_10 = sorted(agent_10.v[((2, 4), (0, 0))], key=lambda k: k[0]) # agent_15 = AgentBN.load( # 'bn/models/rge_1583857532_0.01.bin' # ) # sorted_v_s_0_15 = sorted(agent_15.v[((2, 4), (0, 0))], key=lambda k: k[0]) agent_30: AgentBN = AgentBN.load('bn/models/rge_1583857678_0.01.bin') v_s_0_30 = agent_30.v[((2, 4), (0, 0))] v_s_0_30_nd = Vector.m3_max(set(v_s_0_30)) # agent_10625 = AgentBN.load( # filename='bn/models/rge_1583924116_0.01.bin' # ) # sorted_v_s_0_10625 = sorted(agent_10625.v[((2, 4), (0, 0))], key=lambda k: k[0]) pass
def __init__(self, initial_state: tuple = ((0, 0), False), default_reward: tuple = (0, 0), seed: int = 0): """ :param initial_state: Initial state where start the agent. :param default_reward: (objective 1, objective 2) :param seed: Seed used for np.random.RandomState method. """ # Create a bag action space action_space = Bag([]) action_space.seed(seed) super().__init__(seed=seed, initial_state=initial_state, default_reward=default_reward, action_space=action_space) # Set obstacles self.obstacles = frozenset({(2, 2)}) # PITS are finals states in this variant self.finals.update({state: Vector([-50, -50]) for state in self.pits}) self.pits = list()
def __init__(self, initial_state: tuple = (5, 2), default_reward: tuple = (0, -1), seed: int = 0, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (mission_success, radiation) :param seed: Seed used for np.random.RandomState method. """ # List of all treasures and its reward. finals = {} finals.update({(0, i): 20 for i in range(5)}) finals.update({(9, i): 10 for i in range(3)}) finals.update({(12, i): 30 for i in range(5)}) obstacles = frozenset() mesh_shape = (13, 5) default_reward = Vector(default_reward) super().__init__(mesh_shape=mesh_shape, seed=seed, initial_state=initial_state, default_reward=default_reward, finals=finals, obstacles=obstacles, action_space=action_space) self.asteroids = { (5, 0), (4, 1), (6, 1), (3, 2), (7, 2), (4, 3), (6, 3), (5, 4) } # Define radiations states (If the agent is on any of these, then receive -100 penalization) self.radiations = set() self.radiations = self.radiations.union({(1, i) for i in range(5)}) self.radiations = self.radiations.union({(10, i) for i in range(5)}) self.radiations = self.radiations.union({(11, i) for i in range(5)})
def find_rooms(self, bounds): room_areas = list() position = Vector(0, 0) # Evaluate the area for rooms next_position = 0 for x in range(bounds.x_min, bounds.x_max): position.x = x for y in range(bounds.y_min, bounds.y_max): position.y = y if self.is_tile_of_type(position, TILE_TYPES["WALL"]): # Found wall tile result, next_position, room_bounds = self.is_room( position, bounds) if result: room_areas.append(room_bounds) self.rooms = room_areas return room_areas
def train_from_zero(): # Define variables limit = int(3e6) epsilon = 0.4 max_steps = 1000 alpha = 0.1 gamma = 1 graph_type = GraphType.EPISODES columns_list = range(1, 6) decimals = [0.01, 0.05] for decimal_precision in decimals: # Set vector decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) for columns in columns_list: # Environment environment = DeepSeaTreasureRightDownStochastic(columns=columns) # Create agent agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, epsilon=epsilon, alpha=alpha, gamma=gamma, max_steps=max_steps) # Time train t0 = time.time() # Show numbers of columns print('# of columns: {}'.format(columns)) # Agent training agent.train(graph_type=graph_type, limit=limit) # Calc total time total_time = time.time() - t0 prepare_for_dumps(agent, columns, decimal_precision, graph_type, limit, total_time)
def test_transition_reward(self): # In this environment doesn't mind initial state to get the reward state = self.environment.observation_space.sample() # Doesn't mind action too. action = self.environment.action_space.sample() # Asteroids states for asteroid_state in self.environment.asteroids: self.assertEqual( Vector((-100, -1)), self.environment.transition_reward(state=state, action=action, next_state=asteroid_state)) # Radiations states for radiation_state in self.environment.radiations: self.assertEqual( Vector((0, -11)), self.environment.transition_reward(state=state, action=action, next_state=radiation_state)) # Finals states for final_state, final_reward in self.environment.finals.items(): self.assertEqual( Vector((final_reward, -1)), self.environment.transition_reward(state=state, action=action, next_state=final_state)) simple_states = self.environment.states() - set( self.environment.finals.keys()).union( self.environment.radiations).union(self.environment.asteroids) for simple_state in simple_states: self.assertEqual( Vector((0, -1)), self.environment.transition_reward(state=state, action=action, next_state=simple_state))
def setUp(self): # An observation space observation_space = gym.spaces.Discrete(7) # Default reward default_reward = Vector([1, 2, 1]) # Set initial_seed to 0 to testing. self.environment = Environment(observation_space=observation_space, default_reward=default_reward, seed=0)
def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (0,), seed: int = 0, columns: int = 0, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (treasure_value, ) :param seed: Seed used for np.random.RandomState method. """ original_mesh_shape = (10, 11) # Reduce the number of diagonals if columns < 1 or columns > original_mesh_shape[0]: columns = original_mesh_shape[0] # List of all treasures and its reward. finals = { (0, 1): 5, (1, 2): 80, (2, 3): 120, (3, 4): 140, (4, 4): 145, (5, 4): 150, (6, 7): 163, (7, 7): 166, (8, 9): 173, (9, 10): 175, } # Filter finals states finals = dict(filter(lambda x: x[0][0] < columns, finals.items())) obstacles = frozenset() obstacles = obstacles.union([(0, y) for y in range(2, 11)]) obstacles = obstacles.union([(1, y) for y in range(3, 11)]) obstacles = obstacles.union([(2, y) for y in range(4, 11)]) obstacles = obstacles.union([(3, y) for y in range(5, 11)]) obstacles = obstacles.union([(4, y) for y in range(5, 11)]) obstacles = obstacles.union([(5, y) for y in range(5, 11)]) obstacles = obstacles.union([(6, y) for y in range(8, 11)]) obstacles = obstacles.union([(7, y) for y in range(8, 11)]) obstacles = obstacles.union([(8, y) for y in range(10, 11)]) # Filter obstacles states obstacles = frozenset(filter(lambda x: x[0] < columns, obstacles)) # Resize mesh_shape mesh_shape = (columns, 11) # Default reward plus time (time_inverted, treasure_value, water_pressure) default_reward = (-1,) + default_reward + (0,) default_reward = Vector(default_reward) super().__init__(mesh_shape=mesh_shape, seed=seed, default_reward=default_reward, initial_state=initial_state, finals=finals, obstacles=obstacles, action_space=action_space)
def test_transition_reward(self): # In this environment doesn't mind initial state to get the reward state = self.environment.observation_space.sample() # Doesn't mind action too. action = self.environment.action_space.sample() # An intermediate state self.assertEqual( self.environment.transition_reward(state=state, action=action, next_state=(1, 1)), Vector((-1, 0, -2))) # A final state self.assertEqual( self.environment.transition_reward(state=state, action=action, next_state=(1, 2)), Vector([-1, 80, -3]))
def main(): # Get trained agent agent: AgentBN = get_trained_agent() # Set initial state initial_state = ((2, 4), (0, 0)) # agent: AgentBN = AgentBN.load( # filename='bn/models/rg_1584437328_0.005.bin' # ) v_s_0 = agent.v[initial_state] vectors = Vector.m3_max(set(v_s_0)) # Simulation simulation = dict() # Set decimal precision Vector.set_decimal_precision(decimal_precision=0.0000001) for vector in vectors: # Recreate the index objective vector. # objective_vector = IndexVector( # index=vector, vector=trained_agent.v[initial_state][vector] # ) objective_vector = vector.copy() # Get simulation from this agent policy = agent.recover_policy(initial_state=initial_state, objective_vector=objective_vector, iterations_limit=agent.total_sweeps) policy_evaluated = agent.evaluate_policy(policy=policy, tolerance=0.0000001) simulation.update({objective_vector: (policy, policy_evaluated)}) print(simulation)
def train_agent() -> Agent: # Environment # environment = DeepSeaTreasureRightDownStochastic(columns=3) # environment = DeepSeaTreasureRightDown(columns=3) # environment = PyramidMDPNoBounces(diagonals=3, n_transition=0.95) # environment = DeepSeaTreasure() environment = ResourceGathering() # Agent # agent = AgentMPQ( # environment=environment, hv_reference=environment.hv_reference, alpha=0.1, epsilon=0.4, max_steps=1000 # ) # agent = AgentMPQ(environment=environment, hv_reference=environment.hv_reference, alpha=0.01) agent = AgentBN(environment=environment, gamma=.9) # Vector precision Vector.set_decimal_precision(decimal_precision=0.01) # Train agent # agent.train(graph_type=GraphType.SWEEP, tolerance=0.00001) agent.train(graph_type=GraphType.SWEEP, limit=13) return agent
def setUp(self): # Mesh shape mesh_shape = (7, 7) # Default reward default_reward = Vector([1, 2, 1]) # Obstacles obstacles = frozenset({ (0, 0), (1, 1) }) # Set initial_seed to 0 to testing. self.environment = EnvMesh(mesh_shape=mesh_shape, default_reward=default_reward, seed=0, obstacles=obstacles)
def __init__(self, initial_state: tuple = ((2, 4), (0, 0)), default_reward: tuple = (0, 0, 0), seed: int = 0, p_attack: float = 0.1, mesh_shape: tuple = (5, 5), gold_positions: frozenset = frozenset({(2, 0)}), gem_positions: frozenset = frozenset({(4, 1)}), observation_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (enemy_attack, gold, gems) :param seed: Seed used for np.random.RandomState method. :param p_attack: Probability that a enemy attacks when agent stay in an enemy position. """ default_reward = Vector(default_reward) if observation_space is None: # Build the observation space (position(x, y), quantity(gold, gems)) observation_space = gym.spaces.Tuple( (gym.spaces.Tuple((gym.spaces.Discrete(mesh_shape[0]), gym.spaces.Discrete(mesh_shape[1]))), gym.spaces.Tuple( (gym.spaces.Discrete(2), gym.spaces.Discrete(2))))) # Define final states finals = frozenset() # Super constructor call. super().__init__(mesh_shape=mesh_shape, seed=seed, initial_state=initial_state, default_reward=default_reward, observation_space=observation_space, finals=finals) # Positions where there are gold. self.gold_positions = gold_positions # Positions where there is a gem. self.gem_positions = gem_positions # States where there are enemies_positions self.enemies_positions = {(3, 0), (2, 1)} self.p_attack = p_attack self.home_position = (2, 4) self.checkpoints_states = self._checkpoints_states()
def train_from_file(): # Models Path models_path = 'mpq/models/dstrds_1579869395_1.0_4.bin' agent: AgentMPQ = u_models.binary_load( path=dumps_path.joinpath(models_path)) # Data Path data_path = dumps_path.joinpath( 'mpq/train_data/dstrds_1579869395_1.0_4.yml') data_file = data_path.open(mode='r', encoding='UTF-8') # Load yaml from file data = yaml.load(data_file, Loader=yaml.FullLoader) # Extract relevant data for training before_training_execution = float(data['time']) decimal_precision = float(data['agent']['decimal_precision']) graph_type = GraphType.from_string(data['training']['graph_type']) limit = int(data['training']['limit']) columns = int(data['environment']['columns']) # Set decimal precision Vector.set_decimal_precision(decimal_precision=decimal_precision) # Time train t0 = time.time() # Agent training agent.train(graph_type=graph_type, limit=limit) # Calc total time total_time = (time.time() - t0) + before_training_execution prepare_for_dumps(agent, columns, decimal_precision, graph_type, limit, total_time)
class SpaceExploration(EnvMesh): # Possible actions _actions = {'UP': 0, 'UP RIGHT': 1, 'RIGHT': 2, 'DOWN RIGHT': 3, 'DOWN': 4, 'DOWN LEFT': 5, 'LEFT': 6, 'UP LEFT': 7} # Experiments common hypervolume reference hv_reference = Vector([-100, -150]) def __init__(self, initial_state: tuple = (5, 2), default_reward: tuple = (0, -1), seed: int = 0, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (mission_success, radiation) :param seed: Seed used for np.random.RandomState method. """ # List of all treasures and its reward. finals = {} finals.update({(0, i): 20 for i in range(5)}) finals.update({(9, i): 10 for i in range(3)}) finals.update({(12, i): 30 for i in range(5)}) obstacles = frozenset() mesh_shape = (13, 5) default_reward = Vector(default_reward) super().__init__(mesh_shape=mesh_shape, seed=seed, initial_state=initial_state, default_reward=default_reward, finals=finals, obstacles=obstacles, action_space=action_space) self.asteroids = { (5, 0), (4, 1), (6, 1), (3, 2), (7, 2), (4, 3), (6, 3), (5, 4) } # Define radiations states (If the agent is on any of these, then receive -100 penalization) self.radiations = set() self.radiations = self.radiations.union({(1, i) for i in range(5)}) self.radiations = self.radiations.union({(10, i) for i in range(5)}) self.radiations = self.radiations.union({(11, i) for i in range(5)}) def step(self, action: int) -> (tuple, Vector, bool, dict): """ Given an action, do a step :param action: :return: (position, (mission_success, radiation), final, extra) """ # Initialize reward as vector reward = self.default_reward.copy() # Update previous state self.current_state = self.next_state(action=action) # If the ship crash with asteroid, the ship is destroyed. else mission success. reward[0] = -100 if self.current_state in self.asteroids else self.finals.get( self.current_state, self.default_reward[0] ) # If agent is in a radiation position, the penalty is -11, else is default radiation reward[1] = -11 if self.current_state in self.radiations else self.default_reward[1] # Check if is_final final = self.is_final(self.current_state) # Set extra info = {} return self.current_state, reward, final, info def next_position(self, action: int, position: tuple) -> (tuple, bool): """ Given an action and a position, return the next position reached. :param action: :param position: :return: """ # Get my position x, y = position # Get observations spaces observation_space_x, observation_space_y = self.observation_space.spaces # Do movement in cyclic mesh if action == self.actions['UP']: y = ue.move_up(y=y, limit=observation_space_y.n) elif action == self.actions['RIGHT']: x = ue.move_right(x=x, limit=observation_space_x.n) elif action == self.actions['DOWN']: y = ue.move_down(y=y, limit=observation_space_y.n) elif action == self.actions['LEFT']: x = ue.move_left(x=x, limit=observation_space_x.n) elif action == self.actions['UP RIGHT']: y = ue.move_up(y=y, limit=observation_space_y.n) x = ue.move_right(x=x, limit=observation_space_x.n) elif action == self.actions['DOWN RIGHT']: y = ue.move_down(y=y, limit=observation_space_y.n) x = ue.move_right(x=x, limit=observation_space_x.n) elif action == self.actions['DOWN LEFT']: y = ue.move_down(y=y, limit=observation_space_y.n) x = ue.move_left(x=x, limit=observation_space_x.n) elif action == self.actions['UP LEFT']: y = ue.move_up(y=y, limit=observation_space_y.n) x = ue.move_left(x=x, limit=observation_space_x.n) # Set next position next_position = x, y return next_position, True def next_state(self, action: int, state: tuple = None) -> tuple: """ Calc next position with current position and action given, in this environment is 8-neighbors. :param state: If a position is given, do action from that position. :param action: from action_space :return: """ # Get my position position = state if state else self.current_state next_position, is_valid = self.next_position(action=action, position=position) if not self.observation_space.contains(next_position) or not is_valid: next_position = position # Return (x, y) position return next_position def is_final(self, state: tuple = None) -> bool: """ Is final if agent crash with asteroid or is on final position. :param state: :return: """ # Check if agent crash with asteroid crash = state in self.asteroids # Check if agent is in final position final = state in self.finals.keys() return crash or final def transition_reward(self, state: tuple, action: int, next_state: tuple) -> Vector: """ Return reward for reach `next_state` from `state` using `action`. :param state: initial position :param action: action to do :param next_state: next position reached :return: """ # Initialize reward as vector reward = self.default_reward.copy() # If the ship crash with asteroid, the ship is destroyed. else mission success. reward[0] = -100 if next_state in self.asteroids else self.finals.get( next_state, reward[0] ) # If agent is in a radiation position, the penalty is -11, else is default radiation reward[1] = -11 if next_state in self.radiations else reward[1] return reward
config = configparser.ConfigParser() config.read(config_file) # TODO: add errors handling # TODO: move all to the new class start_point = Point( float(config['START POINT']['x']), float(config['START POINT']['y']), float(config['START POINT']['z']), ) dimensions = Vector( float(config['SCENE DIMENSIONS']['dx']), float(config['SCENE DIMENSIONS']['dy']), float(config['SCENE DIMENSIONS']['dz']), ) stl_file = config['OTHERS']['STL file path'] condition = float(config['OTHERS']['minimum volume']) result_file_path = config['OTHERS']['result file path'] stl = STL(stl_file) print('> Generate octree...') root = Node(start_point, dimensions) get_grid(root, condition=condition, object=stl) ### NP.ARRAY ### # arr = array([], dtype=float)
def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (0, ), columns: int = 10, seed: int = 0, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (time_inverted, treasure_value) :param columns: Number of columns to be used to build this environment (allows experimenting with an identical environment, but considering only the first k columns) (By default 10 - all). :param seed: Seed used for np.random.RandomState method. :param action_space: Specific action space """ # the original full-size environment. original_mesh_shape = (10, 11) if columns < 1 or columns > original_mesh_shape[0]: columns = original_mesh_shape[0] # Dictionary with final states as keys, and treasure amounts as values. finals = { (0, 1): 1, (1, 2): 2, (2, 3): 3, (3, 4): 5, (4, 4): 8, (5, 4): 16, (6, 7): 24, (7, 7): 50, (8, 9): 74, (9, 10): 124, } # Filter finals states finals = dict(filter(lambda x: x[0][0] < columns, finals.items())) # Filter obstacles states obstacles = frozenset() obstacles = obstacles.union([(0, y) for y in range(2, 11)]) obstacles = obstacles.union([(1, y) for y in range(3, 11)]) obstacles = obstacles.union([(2, y) for y in range(4, 11)]) obstacles = obstacles.union([(3, y) for y in range(5, 11)]) obstacles = obstacles.union([(4, y) for y in range(5, 11)]) obstacles = obstacles.union([(5, y) for y in range(5, 11)]) obstacles = obstacles.union([(6, y) for y in range(8, 11)]) obstacles = obstacles.union([(7, y) for y in range(8, 11)]) obstacles = obstacles.union([(8, y) for y in range(10, 11)]) obstacles = frozenset(filter(lambda x: x[0] < columns, obstacles)) # Subspace of the environment to be considered mesh_shape = (columns, 11) # Default reward plus time (time_inverted, treasure_value) default_reward = (-1, ) + default_reward default_reward = Vector(default_reward) super().__init__(mesh_shape=mesh_shape, initial_state=initial_state, default_reward=default_reward, finals=finals, obstacles=obstacles, seed=seed, action_space=action_space)
class DeepSeaTreasure(EnvMesh): # Possible actions _actions = {'UP': 0, 'RIGHT': 1, 'DOWN': 2, 'LEFT': 3} # Pareto optimal policy vector-values pareto_optimal = [ Vector([-1, 1]), Vector([-3, 2]), Vector([-5, 3]), Vector([-7, 5]), Vector([-8, 8]), Vector([-9, 16]), Vector([-13, 24]), Vector([-14, 50]), Vector([-17, 74]), Vector([-19, 124]) ] # Experiments common hypervolume reference hv_reference = Vector((-25, 0)) def __init__(self, initial_state: tuple = (0, 0), default_reward: tuple = (0, ), columns: int = 10, seed: int = 0, action_space: gym.spaces = None): """ :param initial_state: Initial state where start the agent. :param default_reward: (time_inverted, treasure_value) :param columns: Number of columns to be used to build this environment (allows experimenting with an identical environment, but considering only the first k columns) (By default 10 - all). :param seed: Seed used for np.random.RandomState method. :param action_space: Specific action space """ # the original full-size environment. original_mesh_shape = (10, 11) if columns < 1 or columns > original_mesh_shape[0]: columns = original_mesh_shape[0] # Dictionary with final states as keys, and treasure amounts as values. finals = { (0, 1): 1, (1, 2): 2, (2, 3): 3, (3, 4): 5, (4, 4): 8, (5, 4): 16, (6, 7): 24, (7, 7): 50, (8, 9): 74, (9, 10): 124, } # Filter finals states finals = dict(filter(lambda x: x[0][0] < columns, finals.items())) # Filter obstacles states obstacles = frozenset() obstacles = obstacles.union([(0, y) for y in range(2, 11)]) obstacles = obstacles.union([(1, y) for y in range(3, 11)]) obstacles = obstacles.union([(2, y) for y in range(4, 11)]) obstacles = obstacles.union([(3, y) for y in range(5, 11)]) obstacles = obstacles.union([(4, y) for y in range(5, 11)]) obstacles = obstacles.union([(5, y) for y in range(5, 11)]) obstacles = obstacles.union([(6, y) for y in range(8, 11)]) obstacles = obstacles.union([(7, y) for y in range(8, 11)]) obstacles = obstacles.union([(8, y) for y in range(10, 11)]) obstacles = frozenset(filter(lambda x: x[0] < columns, obstacles)) # Subspace of the environment to be considered mesh_shape = (columns, 11) # Default reward plus time (time_inverted, treasure_value) default_reward = (-1, ) + default_reward default_reward = Vector(default_reward) super().__init__(mesh_shape=mesh_shape, initial_state=initial_state, default_reward=default_reward, finals=finals, obstacles=obstacles, seed=seed, action_space=action_space) def step(self, action: int) -> (tuple, Vector, bool, dict): """ Given an action, do a step :param action: :return: (position, (time_inverted, treasure_value), final, extra) """ # Initialize rewards as vector reward = self.default_reward.copy() # Update current position self.current_state = self.next_state(action=action) # Get treasure value reward[1] = self.finals.get(self.current_state, self.default_reward[1]) # Set extra info = {} # Check is_final final = self.is_final(self.current_state) return self.current_state, reward, final, info def transition_reward(self, state: tuple, action: int, next_state: tuple) -> Vector: """ Given a state, an action and a next state, return the corresponding reward. :param state: :param action: :param next_state: :return: """ # Default reward reward = self.default_reward.copy() # Get treasure reward reward[1] = self.finals.get(next_state, self.default_reward[1]) return reward
clock = pygame.time.Clock() font = pygame.font.SysFont(None, 25) WHITE = (255, 255, 255) BLACK = (0, 0, 0) RED = (255, 0, 0) GREEN = (0, 255, 0) BLUE = (0, 0, 255) FRAME_RATE = 60 WIDTH = 1000 HEIGHT = 800 FRICTION = 0 ELASTICITY = 1.0 GRAVITY = Vector(0, 0) BALL_SIZE = 50 INITIAL_VELOCITY_SCALAR = 5 GAME_DISPLAY = pygame.display.set_mode((WIDTH, HEIGHT)) GAME_DISPLAY.fill(BLACK) mouse_pos = None modify_type = None modify_up = False modify_down = False balls = [] def update_balls(balls): updated_balls = []
def draft_w(): tolerance = 0.00001 for decimal_precision in [0.05, 0.005, 0.001]: # Create environment # environment = ResourceGatheringEpisodicSimplified() # environment = ResourceGatheringSimplified() environment = ResourceGatheringEpisodic() # Create agent agent_w = AgentW(environment=environment, convergence_graph=True, gamma=.9) # Time train t0 = time.time() # Set numbers of decimals allowed Vector.set_decimal_precision(decimal_precision=decimal_precision) agent_w.train(graph_type=GraphType.SWEEP, limit=1) # Calc total time total_time = time.time() - t0 # Convert to vectors vectors = { key: [vector.tolist() for vector in vectors] for key, vectors in agent_w.v.items() } # Prepare full_data to dumps data = { 'time': '{}s'.format(total_time), 'memory': { 'v_s_0': len(agent_w.v[environment.initial_state]), 'full': sum(len(vectors) for vectors in agent_w.v.values()) }, 'vectors': vectors } # Configuration of environment environment_info = vars(environment).copy() environment_info.pop('_action_space', None) environment_info.pop('np_random', None) # Configuration of agent agent_info = { 'gamma': agent_w.gamma, 'initial_q_value': agent_w.initial_q_value, 'initial_seed': agent_w.initial_seed, 'interval_to_get_data': agent_w.interval_to_get_data, 'max_steps': agent_w.max_iterations, 'total_sweeps': agent_w.total_sweeps, 'tolerance': tolerance } # Extra data data.update({'environment': environment_info}) data.update({'agent': agent_info}) # Dumps partial execution dumps(data=data, environment=environment)
class MoPuddleWorld(EnvMesh): # Possible actions _actions = {'UP': 0, 'RIGHT': 1, 'DOWN': 2, 'LEFT': 3} # Experiments common hypervolume reference hv_reference = Vector([-50, -150]) def __init__(self, default_reward: tuple = (10, 0), penalize_non_goal: float = -1, seed: int = 0, final_state: tuple = (19, 0), action_space: gym.spaces = None): """ :param default_reward: (non_goal_reached, puddle_penalize) :param penalize_non_goal: While agent does not reach a final position get a penalize. :param seed: Initial initial_seed. The same is used for _action_space, observation_space, and random number generator :param final_state: This environment only has a final position. """ self.final_state = final_state mesh_shape = (20, 20) default_reward = VectorDecimal(default_reward) super().__init__(mesh_shape=mesh_shape, seed=seed, default_reward=default_reward, action_space=action_space) self.puddles = frozenset() self.puddles = self.puddles.union([(x, y) for x in range(0, 11) for y in range(3, 7)]) self.puddles = self.puddles.union([(x, y) for x in range(6, 10) for y in range(2, 14)]) self.penalize_non_goal = penalize_non_goal self.current_state = self.reset() # Get free spaces self.free_spaces = set(self.states() - self.puddles) def step(self, action: int) -> (tuple, VectorDecimal, bool, dict): """ Given an action, do a step :param action: :return: (position, (non_goal_reached, puddle_penalize), final, extra) """ # Initialize reward as vector reward = self.default_reward.copy() # Update previous position self.current_state = self.next_state(action=action) # If agent is in treasure final = self.is_final(self.current_state) # Set final reward if not final: reward[0] = self.penalize_non_goal # if the current position is in an puddle if self.current_state in self.puddles: # Set penalization per distance reward[1] = self.calc_puddle_penalization(state=self.current_state) # Set extra info = {} return self.current_state, reward, final, info def calc_puddle_penalization(self, state: tuple) -> float: """ Return a float that represents a penalization, the penalization is the lowest distance between current state and the nearest border in manhattan distance. :param state: :return: """ # Min distance found! min_distance = min( cityblock(self.current_state, state) for state in self.free_spaces) # Set penalization per distance return -min_distance def reset(self) -> tuple: """ Get random non-goal position to current_value :return: """ # Reset to initial seed self.seed(seed=self.initial_seed) random_space = None while random_space is None or random_space == self.final_state: random_space = self.observation_space.sample() self.current_state = random_space return self.current_state def is_final(self, state: tuple = None) -> bool: """ Is final if agent is on final position :param state: :return: """ return state == self.final_state def transition_reward(self, state: tuple, action: int, next_state: tuple) -> Vector: """ Return reward for reach `next_state` from `position` using `action`. :param state: initial position :param action: action to do :param next_state: next position reached :return: """ # Initialize reward as vector reward = self.default_reward.copy() # If agent is in treasure final = self.is_final(next_state) # Set final reward if not final: reward[0] = self.penalize_non_goal # if the current position is in an puddle if next_state in self.puddles: # Min distance found! min_distance = min( cityblock(next_state, state) for state in self.free_spaces) # Set penalization per distance reward[1] = -min_distance return reward def states(self) -> set: """ Return all possible states of this environment. :return: """ # Unpack spaces x_position, y_position = self.observation_space.spaces return set((x, y) for x in range(x_position.n) for y in range(y_position.n)).difference({self.final_state})
}, 'W_{0.01}': { 'color': 'c', 'marker': 'state' }, 'W_{0.005}': { 'color': 'b', 'marker': 'd' }, 'W_{0.001}': { 'color': 'k', 'marker': 'o' } } vector_reference = Vector((-25, 0)) def pareto_graph(data: dict): # Columns columns = list(data.keys())[0] # Prepare hypervolume to dumps data pareto_file = Path(__file__).parent.joinpath( 'article/output/pareto_{}.m'.format(columns)) # If any parents doesn't exist, make it. pareto_file.parent.mkdir(parents=True, exist_ok=True) data = data[columns]