def eval_genomes(genomes, config): for genome_id, genome in genomes: genome.fitness = 0 net = helper.create_net(genome, config) total_reward = 0 observation = env.reset() max_pos = 0 max_speed = 0 print(genome_id) for _ in range(200): # if genome_id > 200: # env.render() actions = net.activate(observation) # eng_lr = helper.scale(0,1,-1,1, eng_lr) observation, reward, done, info = env.step(actions) total_reward += reward max_pos = max(max_pos, abs(observation[0])) max_speed = max(max_speed, abs(observation[1])) if done: break # Avg fitness over n runs genome.fitness = total_reward + max_pos + max_speed print(genome.fitness)
def eval_genomes(genomes, config): for genome_id, genome in genomes: net = helper.create_net(genome, config) # 1600 steps to "solve" as per environment requests (most are expected to fail anyway) # 24 inputs, 14 features + 10 lidar # 4 outputs, -1 to 1 for each joint total_reward = 0 observation = env.reset() stopped_time = 0 for _ in range(800): env.render() action = net.activate(observation) observation, reward, done, info = env.step(action) if abs(observation[2]) < 0.001: stopped_time += 1 if stopped_time >= 50: reward = -100 done = True total_reward += reward if done: break genome.fitness = total_reward
def eval_genomes(genomes, config): for genome_id, genome in genomes: genome.fitness = 0 net = helper.create_net(genome, config) total_reward = 0 observation = env.reset() for _ in range(1000): env.render() actions = net.activate(observation) # eng_lr = helper.scale(0,1,-1,1, eng_lr) a, _ = max(enumerate(actions), key=lambda i_s: i_s[1]) observation, reward, done, info = env.step(a) total_reward += reward if done: break # Avg fitness over n runs genome.fitness = total_reward
def eval_genomes(genomes, config): for genome_id, genome in genomes: net = helper.create_net(genome, config) sse = 0 for x, y in zip(x_test, y_test): y_pred = net.activate(x) sse += sum([(p - c)**2 for p, c in zip(y_pred, y)]) genome.fitness = -sse
def eval_genomes(genomes, config): for genome_id, genome in genomes: net = helper.create_net(genome, config) total_reward = 0 observation = env.reset() for _ in range(200): cos_th, sin_th, th_dot = observation th_dot = helper.scale(-8, 8, -1, 1, th_dot) action = (net.activate([cos_th, sin_th, th_dot])[0]) observation, reward, done, info = env.step([action]) total_reward += reward if done: break genome.fitness = total_reward
def eval_genomes(genomes, config): for genome_id, genome in genomes: net = helper.create_net(genome, config) total_correct = 0 # eval on the i'th bucket for x, y in zip(x_test, y_test): y_pred = net.activate(x) y_pred_i = max(range(len(y_pred)), key=lambda j: y_pred[j]) y_test_i = max(range(len(y)), key=lambda j: y[j]) # Get a measure of "how" correct it is correct = 1 - ( (y[y_test_i] - y_pred[y_pred_i]))**2 if y_pred_i == y_test_i else 0 avg_diff = sum([(y_p - y_t)**2 for y_p, y_t in zip(y_pred, y)]) / len(y_pred) total_correct += correct - avg_diff genome.fitness = total_correct
def eval_genomes(genomes, config): for genome_id, genome in genomes: net = helper.create_net(genome, config) # 1600 steps to "solve" as per environment requests (most are expected to fail anyway) # 24 inputs, 14 features + 10 lidar # 4 outputs, -1 to 1 for each joint total_reward = 0 observation = env.reset() for _ in range(800): env.render() action = net.activate(observation) # These are all standard sigmoid outputs = [0,1] # Adjust to [-1,1] # for i,a in enumerate(action): # action[i] = helper.scale(0,1,-1,1, a) observation, reward, done, info = env.step(action) total_reward += reward if done: break genome.fitness = total_reward
def eval_genomes(genomes, config): for genome_id, genome in genomes: genome.fitness = 0 net = helper.create_net(genome, config) total_reward = 0 observation = env.reset() max_pos = 0 max_speed = 0 min_speed = 0 for _ in range(200): position_scaled = helper.scale(-1.2, 0.6, 0, 1, observation[0]) velocity_scaled = helper.scale(-0.07, 0.07, -1, 1, observation[1]) # if genome_id > 200: # env.render() # print(position_scaled, velocity_scaled) actions = net.activate([position_scaled, velocity_scaled]) action, _ = max(enumerate(actions), key=lambda i_s: i_s[1]) # eng_lr = helper.scale(0,1,-1,1, eng_lr) observation, reward, done, info = env.step(action) total_reward += reward max_pos = max(max_pos, observation[0]) max_speed = max(max_speed, abs(observation[1])) min_speed = min(min_speed, abs(observation[1])) if done: break # Avg fitness over n runs genome.fitness = total_reward + max_pos + max_speed + (max_speed - min_speed)
def eval_genomes(genomes, config): for genome_id, genome in genomes: genome.fitness = 0 net = helper.create_net(genome, config) total_reward = 0 observation = env.reset() for _ in range(1000): # env.render() eng_m, eng_lr = net.activate(observation) # eng_lr = helper.scale(0,1,-1,1, eng_lr) observation,reward,done,info = env.step([eng_m, eng_lr]) total_reward += reward if done: break # Avg fitness over n runs genome.fitness = total_reward
def eval_genomes(genomes, config): for genome_id, genome in genomes: total_reward = 0 net = helper.create_net(genome, config)