def test_discret_obs(self): env = cartpole.CartPole() for i in range(self.num_agents): env.prepare_env() while not env.is_done(): obs = env.get_obs() action = self.agent[i].predict(obs) env.step(action) self.results[i] = env.get_reward() for result in self.results: self.assertGreater(result, 0)
def main(): # Loading configuration cfg = config.Config('ga_cfg.json') cfg.update() # Creating the environment env = cartpole.CartPole(img_mode=True, img_size=(100, 150), num_prev_states=cfg.NUM_PREVIOUS_USING_STATES) # Generation of the start population for i in range(cfg.NUM_START_POPULATION): nn = network.generate_model(env.tensor_shape) nn.save(NN_NAME_TMPL.format(i)) utils.clear_session() # Download the first model of the neural network nn = models.load_model(NN_NAME_TMPL.format(0)) # Creating information about network layers layers_info = [] for i in range(len(nn.layers)): layers_info.append(utils.Weights(nn.layers[i])) # Maximum reward for all epochs max_reward = 0 # The main cycle of epochs for gen_idx in range(cfg.NUM_GENERATION): print('#### GENERATION {} ####'.format(gen_idx)) # Read updated configuration cfg.update() # If the first epocha, then do not generate children if gen_idx == 0: num_networks = cfg.NUM_START_POPULATION # Else generate children else: num_tasks = cfg.NUM_PARENT_NETWORKS * cfg.CHILDREN_PER_PARENT for net_idx in range(cfg.NUM_PARENT_NETWORKS): for child_idx in range(cfg.CHILDREN_PER_PARENT): partner_idx = geneticalg.get_partner( net_idx, cfg.NUM_PARENT_NETWORKS) nn_parent1 = models.load_model( NN_NAME_TMPL.format(net_idx)) nn_parent2 = models.load_model( NN_NAME_TMPL.format(partner_idx)) child_model = geneticalg.generate_child( nn_parent1, nn_parent2, env.tensor_shape, layers_info, cfg) safe_idx = (cfg.NUM_PARENT_NETWORKS + child_idx + net_idx * cfg.CHILDREN_PER_PARENT) child_model.save(NN_NAME_TMPL.format(safe_idx)) print('Generating: {}%\r'.format( (safe_idx - cfg.NUM_PARENT_NETWORKS) / num_tasks * 100), end='') utils.clear_session() utils.clear_session() print('') num_networks = (cfg.NUM_PARENT_NETWORKS + cfg.CHILDREN_PER_PARENT * cfg.NUM_PARENT_NETWORKS) # Estimates for the current epoch gen_rewards = [0 for i in range(num_networks)] # Cycle to test each neural network for network_idx in range(num_networks): current_nn = models.load_model(NN_NAME_TMPL.format(network_idx)) # Estimates for different tests of current neural network nn_rewards = np.array([]) # Cycle to test different attempts for start_id in range(cfg.NUM_STARTS_FOR_AVRG): env.prepare_env() while not env.is_done(): obs = env.get_obs() predict = current_nn.predict(obs) action = 0 if predict[0][0] < 0.5 else 1 env.step(action) nn_rewards = np.append(nn_rewards, env.get_reward()) # Save the average estimate for the current neural network gen_rewards[network_idx] = int(np.mean(nn_rewards)) # Update and save the best estimate and network for all epochs if max_reward < gen_rewards[network_idx]: max_reward = gen_rewards[network_idx] with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) current_nn.save('best_network.h5') print('Network {}: {}'.format(network_idx, gen_rewards[network_idx])) utils.clear_session() # Information on the results of the current epoch print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(gen_rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) # Selection of the best neural networks nnetworks = geneticalg.selection(num_networks, gen_rewards, cfg.NUM_PARENT_NETWORKS, cfg.RANDOM_SELECTED_NETWORKS, cfg.NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape) utils.clear_session()
def __init__(self): self.agent = agent.Agent() self.env = cartpole.CartPole() self.episodes_num = 10000000 self.steps_num = self.env.fps * 10
def main(): global NUM_PARENT_NETWORKS global CHILDREN_PER_PARENT global NUM_MUTATION_WEIGHTS global MUTATION_FACTOR env = cartpole.CartPole(img_mode=True, img_size=(25, 25)) for i in range(NUM_PARENT_NETWORKS): nn = generate_model(env.tensor_shape) nn.save('nn' + str(i) + '.h5') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks = [generate_model(img_tensor.shape) # for i in range(NUM_PARENT_NETWORKS)] nn = models.load_model('nn0.h5') layers_info = [] for i in range(len(nn.layers)): layers_info.append(Weights(nn.layers[i])) max_reward = 0 for gen_idx in range(NUM_GENERATION): print('Generation {}'.format(gen_idx)) with open('GAConfig.txt') as cfg: NUM_PARENT_NETWORKS = int(cfg.readline()) CHILDREN_PER_PARENT = int(cfg.readline()) NUM_MUTATION_WEIGHTS = int(cfg.readline()) MUTATION_FACTOR = np.float32(float(cfg.readline())) print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT, NUM_MUTATION_WEIGHTS, MUTATION_FACTOR) num_tasks = NUM_PARENT_NETWORKS * CHILDREN_PER_PARENT for net_idx in range(NUM_PARENT_NETWORKS): for child_idx in range(CHILDREN_PER_PARENT): partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS) nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5') nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5') child_model = generate_child(nn_parent1, nn_parent2, env.tensor_shape, layers_info) safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx child_model.save('nn' + str(safe_idx) + '.h5') print('Generating: {}%\r'.format( int( float(net_idx * CHILDREN_PER_PARENT + child_idx) / num_tasks * 100)), end='') K.clear_session() gc.collect() K.clear_session() gc.collect() # nnetworks.append(child_model) print('') num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS rewards = [0 for i in range(num_networks)] for network_idx in range(num_networks): current_nn = models.load_model('nn' + str(network_idx) + '.h5') run_results = np.array([]) for start_id in range(NUM_STARTS_FOR_AVRG): env.prepare_env() while not env.is_done(): obs = env.get_obs() predict = current_nn.predict(obs) action = 0 if predict[0][0] < 0.5 else 1 env.step(action) run_results = np.append(run_results, env.get_reward()) rewards[network_idx] = int(np.mean(run_results)) if max_reward < max(rewards): max_reward = max(rewards) with open("max_reward.txt", "w") as f: f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) current_nn.save('best_network.h5') print('Network {}: {}'.format(network_idx, rewards[network_idx])) K.clear_session() gc.collect() print('-' * 40) print('MAX REWARD CURRENT: {}'.format(max(rewards))) print('MAX REWARD COMMON: {}'.format(max_reward)) print('-' * 40) nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS, RANDOM_SELECTED_NETWORKS, NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape) # for i in range(len(nnetworks)): # nnetworks[i].save('tmp'+str(i) + '.h5') # nnetworks.clear() K.clear_session() gc.collect()
def main(): # Loading configuration cfg = config.Config('ga_cfg.json') cfg.update() # Creating the environment env = cartpole.CartPole(img_mode=False) archs = list() for i in range(cfg.NUM_PARENT_ARCHITECTURES): arch = geneticalg.generate_start_architectures(env.tensor_shape, cfg) archs.append(arch) for arch_gen_id in range(cfg.NUM_ARCHITECTURES_GENERATIONS): print('ARCH_GENERATION: {}'.format(arch_gen_id)) for arch_idx in range(cfg.NUM_PARENT_ARCHITECTURES): for child_arch_idx in range(cfg.NUM_CHILD_ARCHITECTURES): partner_idx = geneticalg.get_partner( arch_idx, cfg.NUM_PARENT_ARCHITECTURES) arch_parent1 = archs[arch_idx] arch_parent2 = archs[partner_idx] child_arch = geneticalg.generate_child_architecture( arch_parent1, arch_parent2, env.tensor_shape, cfg) archs.append(child_arch) print('ALL_ARCHS: {}'.format(archs)) arch_rewards = list() for curr_arch in archs: print('CURRENT_ARCH: {}'.format(curr_arch)) # Generation of the start population for i in range(cfg.NUM_START_POPULATION): nn = network.generate_model_from_list(curr_arch, env.tensor_shape) nn.save(NN_NAME_TMPL.format(i)) utils.clear_session() # Download the first model of the neural network nn = models.load_model(NN_NAME_TMPL.format(0)) # Creating information about network layers layers_info = [] for i in range(len(nn.layers)): layers_info.append(utils.Weights(nn.layers[i])) # Maximum reward for all epochs max_reward = 0 # The main cycle of epochs for gen_idx in range(cfg.NUM_GENERATION): print('NN_GENERATION: {}'.format(gen_idx)) # Read updated configuration cfg.update() # If the first epocha, then do not generate children if gen_idx == 0: num_networks = cfg.NUM_START_POPULATION # Else generate children else: num_tasks = cfg.NUM_PARENT_NETWORKS * cfg.CHILDREN_PER_PARENT for net_idx in range(cfg.NUM_PARENT_NETWORKS): for child_idx in range(cfg.CHILDREN_PER_PARENT): partner_idx = geneticalg.get_partner( net_idx, cfg.NUM_PARENT_NETWORKS) nn_parent1 = models.load_model( NN_NAME_TMPL.format(net_idx)) nn_parent2 = models.load_model( NN_NAME_TMPL.format(partner_idx)) child_model = geneticalg.generate_child_from_arch( nn_parent1, nn_parent2, env.tensor_shape, layers_info, cfg, curr_arch) safe_idx = (cfg.NUM_PARENT_NETWORKS + child_idx + net_idx * cfg.CHILDREN_PER_PARENT) child_model.save(NN_NAME_TMPL.format(safe_idx)) utils.clear_session() utils.clear_session() num_networks = ( cfg.NUM_PARENT_NETWORKS + cfg.CHILDREN_PER_PARENT * cfg.NUM_PARENT_NETWORKS) # Estimates for the current epoch gen_rewards = [0 for i in range(num_networks)] # Cycle to test each neural network for network_idx in range(num_networks): current_nn = models.load_model( NN_NAME_TMPL.format(network_idx)) # Estimates for different tests of current neural network nn_rewards = np.array([]) # Cycle to test different attempts for start_id in range(cfg.NUM_STARTS_FOR_AVRG): env.prepare_env() while not env.is_done(): obs = env.get_obs() obs = obs.reshape((1, 4)) predict = current_nn.predict(obs) action = 0 if predict[0][0] < 0.5 else 1 env.step(action) nn_rewards = np.append(nn_rewards, env.get_reward()) # Save the average estimate for the current neural network gen_rewards[network_idx] = int(np.mean(nn_rewards)) # Update and save the best estimate and network for all epochs if max_reward < gen_rewards[network_idx]: max_reward = gen_rewards[network_idx] # with open("max_reward.txt", "w") as f: # f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)]) # current_nn.save('best_network.h5') utils.clear_session() print(max(gen_rewards)) if max(gen_rewards) > 199: break # Selection of the best neural networks nnetworks = geneticalg.selection( num_networks, gen_rewards, cfg.NUM_PARENT_NETWORKS, cfg.RANDOM_SELECTED_NETWORKS, cfg.NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape) utils.clear_session() arch_reward = ( cfg.NUM_GENERATION * alpha + (max(cfg.RANGE_LAYERS) * max(cfg.RANGE_NEURONS) * betta) - (gen_idx + 1) * alpha - sum(curr_arch) * betta) print('ARCH_REWARD: {}'.format(arch_reward)) arch_rewards.append(arch_reward) new_archs = list() for _ in range(cfg.NUM_PARENT_ARCHITECTURES): best_arch_idx = arch_rewards.index(max(arch_rewards)) new_archs.append(archs[best_arch_idx]) arch_rewards[best_arch_idx] = -1 archs = new_archs[:] del new_archs