def run_env_once(phenotype): single_envs = SubprocVecEnv([make_env(env_name, envs_size)]) test_organism([phenotype], single_envs, render=True) # Visualize().update(phenotype) feedforward_highest = FeedforwardCUDA([phenotype]) states = env.reset() done = False distance = 0.0 last_distance = 0.0 distance_stagnation = 0 while not done: actions = feedforward_highest.update(np.array([states])) states, reward, done, info = env.step(actions[0]) distance += np.around(states[2], decimals=2) if distance <= last_distance: distance_stagnation += 1 else: distance_stagnation = 0 if distance_stagnation >= 100: done = True last_distance = distance env.render() env.close()
def run_env_once(phenotype, env): feedforward_highest = FeedforwardCUDA() states = env.reset() done = False last_distance = 0.0 distance_stagnation = 0 final_reward = 0.0 while not done: actions = feedforward_highest.update([phenotype], np.array([states])) states, reward, done, info = env.step(actions[0]) pos = info["pos"] final_reward += reward if pos <= last_distance: distance_stagnation += 1 else: distance_stagnation = 0 if distance_stagnation >= 100: done = True # last_distance = pos env.render() print("Final rewards: {}".format(final_reward))
def __init__(self): print("Creating envs...") self.envs = SubprocVecEnv([make_env(env_name, seed) for seed in range(envs_size)]) self.num_of_envs = envs_size self.feedforward = FeedforwardCUDA() print("Done.")
def run_env_once(phenotype, env): feedforward_highest = FeedforwardCUDA() states = env.reset() done = False distance = 0.0 last_distance = 0.0 distance_stagnation = 0 # image = env.render(mode='rgb_array') # images = [] # activations = [] while not done: actions = feedforward_highest.update([phenotype], np.array([states])) # print(actions) states, reward, done, info = env.step(actions[0]) distance += np.around(states[2], decimals=2) if distance <= last_distance: distance_stagnation += 1 else: distance_stagnation = 0 if distance_stagnation >= 100: done = True last_distance = distance # activations.append(feedforward_highest.mem[0]) # images.append(env.render(mode='rgb_array')) env.render()
def test_different_input(): G = nx.DiGraph() G.add_nodes_from([(10, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (-1.0, -1.0) }), (20, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (0.0, -1.0) }), (30, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (40, { "activation": np.tanh, "type": NeuronType.HIDDEN }), (60, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), (70, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (1.0, 1.0) })]) G.add_weighted_edges_from([ (10, 40, 0.7), (20, 40, 0.4), (40, 60, 0.1), (40, 70, 0.25), ]) phenotype = Phenotype(G, 0) inputs = np.array([0.5, 0.2, 0.7]) feedforward_highest = FeedforwardCUDA() result = feedforward_highest.update([phenotype], [inputs])[0] answers = [0.0, 0.0] hidden_node = math.tanh( math.tanh(inputs[0]) * 0.7 + math.tanh(inputs[1]) * 0.4) answers[0] = math.tanh(hidden_node * 0.1) answers[1] = math.tanh(hidden_node * 0.25) np.testing.assert_array_almost_equal(result, answers)
def test_single_edges(): G = nx.DiGraph() G.add_nodes_from([(10, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (-1.0, -1.0) }), (20, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (0.0, -1.0) }), (30, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (1.0, -1.0) }), (60, { "activation": np.tanh, "type": NeuronType.OUTPUT, "bias": 0.0, "pos": (-1.0, 1.0) }), (70, { "activation": np.tanh, "type": NeuronType.OUTPUT, "bias": 0.0, "pos": (1.0, 1.0) })]) G.add_weighted_edges_from([ (20, 60, 0.4), (30, 70, 0.1), ]) phenotype = Phenotype(G, 0) inputs = np.array([0.5, 0.5, 0.5]) feedforward_highest = FeedforwardCUDA() result = feedforward_highest.update([phenotype], [inputs])[0] norm_inputs = np.tanh(inputs) answers = np.tanh([norm_inputs[0] * 0.4, norm_inputs[1] * 0.1]) np.testing.assert_array_almost_equal(result, answers)
def test_hidden_nodes(): G = nx.DiGraph() G.add_nodes_from([(10, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (-1.0, -1.0) }), (20, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (0.0, -1.0) }), (30, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (40, { "activation": np.tanh, "type": NeuronType.HIDDEN }), (60, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), (70, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (1.0, 1.0) })]) G.add_weighted_edges_from([ (20, 40, 0.4), (40, 60, 0.1), ]) phenotype = Phenotype(G, 0) inputs = np.array([1, 1, 1]) feedforward_highest = FeedforwardCUDA() result = feedforward_highest.update([phenotype], [inputs])[0] norm_inputs = np.tanh(inputs) hidden = math.tanh(norm_inputs[1] * 0.4) answers = np.tanh([hidden * 0.1, 0.0]) np.testing.assert_array_almost_equal(result, answers, decimal=4)
class TestOrganism(Evaluation): def __init__(self): print("Creating envs...") self.envs = SubprocVecEnv([make_env(env_name, seed) for seed in range(envs_size)]) self.num_of_envs = envs_size self.feedforward = FeedforwardCUDA() print("Done.") def evaluate(self, phenotypes: List[Phenotype]) -> Tuple[np.ndarray, np.ndarray]: states = self.envs.reset() num_of_runs = 3 fitnesses = np.zeros(len(self.envs.remotes), dtype=np.float64) done = False done_tracker = np.zeros(len(self.envs.remotes), dtype=np.int32) diff = abs(len(phenotypes) - len(self.envs.remotes)) if diff < 0: done_tracker[diff:] = num_of_runs while not done: actions = self.feedforward.update(phenotypes, states[:len(phenotypes)]) actions = np.pad(actions, ((0, diff), (0, 0)), 'constant') states, rewards, dones, info = self.envs.step(np.argmax(actions, axis=1)) fitnesses[done_tracker < num_of_runs] += rewards[done_tracker < num_of_runs] # Finish run if the robot fell envs_run_done = dones == True done_tracker[envs_run_done] += dones[envs_run_done] done = all(r >= num_of_runs for r in done_tracker) # Reset the done envs for i in np.where(dones == True)[0]: remote = self.envs.remotes[i] remote.send(('reset', None)) # If we don't receive, the remote will not reset properly reset_obs = remote.recv()[0] states[i] = reset_obs # self.envs.render() final_fitnesses = [] fitnesses_t = fitnesses.T for i in range(fitnesses_t.shape[0]): fitness = fitnesses_t[i] mean = np.sum(fitness)/num_of_runs final_fitnesses.append(mean) return (np.array(final_fitnesses[:len(phenotypes)]), np.zeros((len(phenotypes), 0)))
def run_env_once(phenotype, env): feedforward_highest = FeedforwardCUDA() states = env.reset() done = False final_reward = 0.0 while not done: actions = feedforward_highest.update([phenotype], np.array([states])) states, reward, done, info = env.step(np.argmax(actions[0])) final_reward += reward env.render() print("Final rewards: {}".format(final_reward))
def test_multiple_edges(): G = nx.DiGraph() G.add_nodes_from([(1, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (-1.0, -1.0) }), (2, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (0.0, -1.0) }), (3, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (6, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), (7, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (1.0, 1.0) })]) G.add_weighted_edges_from([(2, 6, 0.4), (3, 6, 0.1)]) phenotype = Phenotype(G, 0) inputs = np.array([1, 1, 1]) feedforward_highest = FeedforwardCUDA() result = feedforward_highest.update([phenotype], [inputs])[0] norm_inputs = np.tanh(inputs) answer = norm_inputs[1] * 0.4 + norm_inputs[2] * 0.1 answers = np.tanh([answer, 0.0]) np.testing.assert_array_almost_equal(result, answers)
def test_organism(phenotypes, envs, render=False): feedforward = FeedforwardCUDA(phenotypes) observations = envs.reset() obs_32 = np.float32(observations) actions = feedforward.update(obs_32) fitnesses = np.zeros(len(envs.remotes), dtype=np.float64) done = False done_tracker = np.array([False for _ in range(len(envs.remotes))]) diff = len(phenotypes) - len(envs.remotes) if diff < 0: done_tracker[diff:] = True distances = np.zeros(len(envs.remotes)) last_distances = np.zeros(len(envs.remotes)) stagnations = np.zeros(len(envs.remotes)) all_states = [] max_steps = 50 steps = max_steps while not done: actions = np.pad(actions, (0, abs(diff)), 'constant') states, rewards, dones, info = envs.step(actions) # if render: # envs.remotes[0].send(('render', None)) # envs.remotes[0].recv() actions = feedforward.update(states) fitnesses[done_tracker == False] += np.around( rewards[done_tracker == False], decimals=4) # fitnesses[done_tracker == False] = np.around(rewards[done_tracker == False], decimals=2) envs_done = dones == True done_tracker[envs_done] = dones[envs_done] envs_running = len([d for d in done_tracker if d == False]) # print("\r"+" "* 100, end='', flush=True) # print("\rEnvs running: {}/{}".format(envs_running, len(phenotypes)), end='') done = envs_running == 0 distances += np.around(states.T[2], decimals=2) stagnations += distances == last_distances done_tracker[stagnations >= 100] = True last_distances = distances if steps == max_steps: steps = 0 all_states.append(states[:, [0, 4, 6, 8, 9, 11, 13]]) steps += 1 all_states = np.array(all_states) flattened_states = [] for row_i in range(all_states.shape[1]): flattened_states.append(all_states[:, row_i].flatten()) flattened_states = pad_matrix(np.array(flattened_states), behavior_matrix_size) return (fitnesses, flattened_states)
def test_custom(): G = nx.DiGraph() G.add_nodes_from([ (1, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (-1.0, -1.0) }), (2, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (3, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (4, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (5, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (6, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (7, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (8, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (9, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (10, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), # (11, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), # (12, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (13, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (14, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (15, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (16, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (17, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (18, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (19, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (20, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (21, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (22, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (23, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (24, { "activation": np.tanh, "type": NeuronType.INPUT, "pos": (1.0, -1.0) }), (25, { "activation": np.tanh, "type": NeuronType.HIDDEN, "pos": (1.0, -1.0) }), (26, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), (27, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), (28, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), (29, { "activation": np.tanh, "type": NeuronType.OUTPUT, "pos": (-1.0, 1.0) }), ]) G.add_weighted_edges_from([ (10, 25, 1.0), (11, 25, 1.0), (25, 27, 1.0), ]) phenotype = Phenotype(G, 0) inputs = np.array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.89, 0.996, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) feedforward_highest = FeedforwardCUDA() result = feedforward_highest.update([phenotype], [inputs])[0] print(result) print(feedforward_highest.mem[0])
def test_multiple_phenotypes(): G1 = nx.DiGraph() G1.add_nodes_from([ (0, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (-1.0, -1.0) }), (-1, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (0.0, -1.0) }), (6, { "activation": np.tanh, "type": NeuronType.HIDDEN, "bias": 0.0, "pos": (-1.0, 0.0) }), (10, { "activation": np.tanh, "type": NeuronType.HIDDEN, "bias": 0.0, "pos": (0.0, 0.0) }), (3, { "activation": np.tanh, "type": NeuronType.HIDDEN, "bias": 0.0, "pos": (1.0, 0.0) }), (-2, { "activation": np.tanh, "type": NeuronType.OUTPUT, "bias": 0.0, "pos": (-1.0, 1.0) }), ]) G1.add_weighted_edges_from([(0, -2, 0.15286614111378544), (-1, -2, 0.8345164457693686), (0, 3, 0.15286614111378544), (3, -2, 1.0), (0, 6, 0.15286614111378544), (6, 3, 1.0), (6, 10, 1.0), (10, 3, 1.0)]) phenotype1 = Phenotype(G1, 0) G2 = nx.DiGraph() G2.add_nodes_from([ (0, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (-1.0, -1.0) }), (-1, { "activation": np.tanh, "type": NeuronType.INPUT, "bias": 0.0, "pos": (0.0, -1.0) }), (6, { "activation": np.tanh, "type": NeuronType.HIDDEN, "bias": 0.0, "pos": (-1.0, 0.0) }), (3, { "activation": np.tanh, "type": NeuronType.HIDDEN, "bias": 0.0, "pos": (1.0, 0.0) }), (-2, { "activation": np.tanh, "type": NeuronType.OUTPUT, "bias": 0.0, "pos": (-1.0, 1.0) }), ]) G2.add_weighted_edges_from([(0, -2, 0.15286614111378544), (-1, -2, 0.8345164457693686), (0, 3, 0.15286614111378544), (3, -2, 1.0), (0, 6, 0.15286614111378544), (6, 3, 1.0)]) phenotype2 = Phenotype(G2, 0) inputs = np.array([1, 1]) feedforward_highest = FeedforwardCUDA() result_one = feedforward_highest.update([phenotype1], [inputs]) result_both = feedforward_highest.update([phenotype1, phenotype2], [inputs, inputs]) assert result_one[0] == result_both[0]
class TestOrganism(Evaluation): def __init__(self): print("Creating envs...") self.envs = SubprocVecEnv([make_env(env_name, seed) for seed in range(envs_size)]) self.num_of_envs = envs_size self.feedforward = FeedforwardCUDA() print("Done.") def evaluate(self, phenotypes: List[Phenotype]) -> Tuple[np.ndarray, np.ndarray]: states = self.envs.reset() num_of_runs = 1 fitnesses = np.zeros(len(self.envs.remotes), dtype=np.float64) done = False done_tracker = np.zeros(len(self.envs.remotes), dtype=np.int32) diff = len(phenotypes) - len(self.envs.remotes) if diff < 0: done_tracker[diff:] = num_of_runs # distances = np.zeros(len(self.envs.remotes)) last_distances = np.zeros(len(self.envs.remotes)) stagnations = np.zeros(len(self.envs.remotes)) while not done: actions = self.feedforward.update(phenotypes, states[:len(phenotypes)]) actions = np.pad(actions, (0, abs(diff)), 'constant') states, rewards, dones, info = self.envs.step(actions) pos = np.round(np.array([i['pos'] for i in info]), 2) # Only keep track of rewards for the right run # padded_rewards = np.pad(np.asmatrix(rewards), [(0, num_of_runs - 1), (0, 0)], mode='constant') # rolled_rewards = np.array( # [np.roll(padded_rewards[:, i], done_tracker[i]) for i in range(len(done_tracker))]).T # fitnesses += rolled_rewards fitnesses[done_tracker < num_of_runs] += rewards[done_tracker < num_of_runs] # Finish run if it has not moved for a certain amount of frames stagnated_distances = pos == last_distances stagnations += stagnated_distances stopped_moving = stagnations >= 100 dones[stopped_moving == True] = stopped_moving[stopped_moving == True] # Reset stagnations stagnations[stopped_moving == True] = 0 last_distances = pos # Finish run if the robot fell envs_run_done = dones == True done_tracker[envs_run_done] += dones[envs_run_done] done = all(r >= num_of_runs for r in done_tracker) # Reset the done envs for i in np.where(dones == True)[0]: remote = self.envs.remotes[i] remote.send(('reset', None)) # If we don't receive, the remote will not reset properly reset_obs = remote.recv()[0] states[i] = reset_obs # print(done_tracker) # print(done) # self.envs.render() final_fitnesses = [] fitnesses_t = fitnesses.T for i in range(fitnesses_t.shape[0]): fitness = fitnesses_t[i] mean = np.sum(fitness)/num_of_runs final_fitnesses.append(mean) return (np.array(final_fitnesses[:len(phenotypes)]), np.zeros((len(phenotypes), 0)))
class TestOrganism(Evaluation): def __init__(self): print("Creating envs...") self.envs = SubprocVecEnv( [make_env(env_name, seed) for seed in range(envs_size)]) self.num_of_envs = envs_size self.feedforward = FeedforwardCUDA() print("Done.") def evaluate( self, phenotypes: List[Phenotype]) -> Tuple[np.ndarray, np.ndarray]: states = self.envs.reset() num_of_runs = 1 states_32 = np.float32(states) actions = self.feedforward.update(phenotypes, states_32) fitnesses = np.zeros((num_of_runs, len(self.envs.remotes)), dtype=np.float64) all_states = np.zeros( (num_of_runs, len(self.envs.remotes), behavior_dimensions)) state_indexes = np.zeros((num_of_runs, len(self.envs.remotes)), dtype=np.int32) done = False done_tracker = np.zeros(len(self.envs.remotes), dtype=np.int32) diff = len(phenotypes) - len(self.envs.remotes) if diff < 0: done_tracker[diff:] = num_of_runs last_distances = np.zeros(len(self.envs.remotes)) stagnations = np.zeros(len(self.envs.remotes)) max_steps = 10 steps = max_steps while not done: states_32 = np.float32(states) actions = self.feedforward.update(phenotypes, states_32[:len(phenotypes)]) actions = np.pad(actions, (0, abs(diff)), 'constant') states, rewards, dones, info = self.envs.step(actions) pos = np.round(np.array([i['pos'] for i in info]), 2) # Only keep track of rewards for the right run padded_rewards = np.pad(np.asmatrix(rewards), [(0, num_of_runs - 1), (0, 0)], mode='constant') rolled_rewards = np.array([ np.roll(padded_rewards[:, i], done_tracker[i]) for i in range(len(done_tracker)) ]).T fitnesses += rolled_rewards # Finish run if it has not moved for a certain amount of frames stagnated_distances = pos == last_distances stagnations += stagnated_distances stopped_moving = stagnations >= 100 dones[stopped_moving == True] = stopped_moving[stopped_moving == True] # Reset stagnations stagnations[stopped_moving == True] = 0 last_distances = pos # Finish run if the robot fell envs_run_done = dones == True done_tracker[envs_run_done] += dones[envs_run_done] done = all(r >= num_of_runs for r in done_tracker) # Reset the done envs for i in np.where(dones == True)[0]: remote = self.envs.remotes[i] remote.send(('reset', None)) # If we don't receive, the remote will not reset properly reset_obs = remote.recv()[0] states[i] = reset_obs if steps == max_steps: steps = 0 relevant_states = states[:, :features_dimensions] for i in range(done_tracker.size): for row in range(relevant_states.shape[0]): level = done_tracker[i] if level >= num_of_runs: continue s = relevant_states[row] start = state_indexes[level][row] end = start + len(s) all_states[level][row][start:end] = s # print(all_states[level][row].shape) # print(all_states[level][row]) state_indexes[level][ row] = end % behavior_dimensions steps += 1 final_fitnesses = [] final_states = [] fitnesses_t = fitnesses.T for i in range(fitnesses_t.shape[0]): fitness = fitnesses_t[i] index = np.argmax(fitness) states = all_states[index] final_fitnesses.append(fitness[index]) final_states.append(states[index]) return (np.array(final_fitnesses[:len(phenotypes)]), np.array(final_states[:len(phenotypes)]))