Python CartPole示例，cartpole.CartPole Python示例

示例#1

0

显示文件

 def test_discret_obs(self):
     env = cartpole.CartPole()
     for i in range(self.num_agents):
         env.prepare_env()
         while not env.is_done():
             obs = env.get_obs()
             action = self.agent[i].predict(obs)
             env.step(action)
         self.results[i] = env.get_reward()
     
     for result in self.results:
         self.assertGreater(result, 0)

示例#2

0

显示文件

文件： main.py 项目： megagnom37/NN_GA

def main():
    # Loading configuration
    cfg = config.Config('ga_cfg.json')
    cfg.update()

    # Creating the environment
    env = cartpole.CartPole(img_mode=True,
                            img_size=(100, 150),
                            num_prev_states=cfg.NUM_PREVIOUS_USING_STATES)

    # Generation of the start population
    for i in range(cfg.NUM_START_POPULATION):
        nn = network.generate_model(env.tensor_shape)
        nn.save(NN_NAME_TMPL.format(i))
        utils.clear_session()

    # Download the first model of the neural network
    nn = models.load_model(NN_NAME_TMPL.format(0))

    # Creating information about network layers
    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(utils.Weights(nn.layers[i]))

    # Maximum reward for all epochs
    max_reward = 0

    # The main cycle of epochs
    for gen_idx in range(cfg.NUM_GENERATION):
        print('#### GENERATION {} ####'.format(gen_idx))

        # Read updated configuration
        cfg.update()

        # If the first epocha, then do not generate children
        if gen_idx == 0:
            num_networks = cfg.NUM_START_POPULATION
        # Else generate children
        else:
            num_tasks = cfg.NUM_PARENT_NETWORKS * cfg.CHILDREN_PER_PARENT

            for net_idx in range(cfg.NUM_PARENT_NETWORKS):
                for child_idx in range(cfg.CHILDREN_PER_PARENT):
                    partner_idx = geneticalg.get_partner(
                        net_idx, cfg.NUM_PARENT_NETWORKS)

                    nn_parent1 = models.load_model(
                        NN_NAME_TMPL.format(net_idx))
                    nn_parent2 = models.load_model(
                        NN_NAME_TMPL.format(partner_idx))

                    child_model = geneticalg.generate_child(
                        nn_parent1, nn_parent2, env.tensor_shape, layers_info,
                        cfg)

                    safe_idx = (cfg.NUM_PARENT_NETWORKS + child_idx +
                                net_idx * cfg.CHILDREN_PER_PARENT)

                    child_model.save(NN_NAME_TMPL.format(safe_idx))

                    print('Generating: {}%\r'.format(
                        (safe_idx - cfg.NUM_PARENT_NETWORKS) / num_tasks *
                        100),
                          end='')

                    utils.clear_session()
                utils.clear_session()
            print('')
            num_networks = (cfg.NUM_PARENT_NETWORKS +
                            cfg.CHILDREN_PER_PARENT * cfg.NUM_PARENT_NETWORKS)

        # Estimates for the current epoch
        gen_rewards = [0 for i in range(num_networks)]

        # Cycle to test each neural network
        for network_idx in range(num_networks):
            current_nn = models.load_model(NN_NAME_TMPL.format(network_idx))

            # Estimates for different tests of current neural network
            nn_rewards = np.array([])
            # Cycle to test different attempts
            for start_id in range(cfg.NUM_STARTS_FOR_AVRG):
                env.prepare_env()

                while not env.is_done():
                    obs = env.get_obs()

                    predict = current_nn.predict(obs)
                    action = 0 if predict[0][0] < 0.5 else 1

                    env.step(action)

                nn_rewards = np.append(nn_rewards, env.get_reward())

            # Save the average estimate for the current neural network
            gen_rewards[network_idx] = int(np.mean(nn_rewards))
            # Update and save the best estimate and network for all epochs
            if max_reward < gen_rewards[network_idx]:
                max_reward = gen_rewards[network_idx]
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')

            print('Network {}: {}'.format(network_idx,
                                          gen_rewards[network_idx]))
            utils.clear_session()

        # Information on the results of the current epoch
        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(gen_rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        # Selection of the best neural networks
        nnetworks = geneticalg.selection(num_networks, gen_rewards,
                                         cfg.NUM_PARENT_NETWORKS,
                                         cfg.RANDOM_SELECTED_NETWORKS,
                                         cfg.NEW_GENERATED_RANDOM_NETWORK,
                                         env.tensor_shape)

        utils.clear_session()

示例#3

0

显示文件

文件： experiment.py 项目： high-moctane/anylang-rl

    def __init__(self):
        self.agent = agent.Agent()
        self.env = cartpole.CartPole()

        self.episodes_num = 10000000
        self.steps_num = self.env.fps * 10

示例#4

0

显示文件

def main():
    global NUM_PARENT_NETWORKS
    global CHILDREN_PER_PARENT
    global NUM_MUTATION_WEIGHTS
    global MUTATION_FACTOR

    env = cartpole.CartPole(img_mode=True, img_size=(25, 25))

    for i in range(NUM_PARENT_NETWORKS):
        nn = generate_model(env.tensor_shape)
        nn.save('nn' + str(i) + '.h5')
        K.clear_session()
        gc.collect()

    K.clear_session()
    gc.collect()

    # nnetworks = [generate_model(img_tensor.shape)
    #              for i in range(NUM_PARENT_NETWORKS)]

    nn = models.load_model('nn0.h5')

    layers_info = []
    for i in range(len(nn.layers)):
        layers_info.append(Weights(nn.layers[i]))

    max_reward = 0
    for gen_idx in range(NUM_GENERATION):
        print('Generation {}'.format(gen_idx))
        with open('GAConfig.txt') as cfg:
            NUM_PARENT_NETWORKS = int(cfg.readline())
            CHILDREN_PER_PARENT = int(cfg.readline())
            NUM_MUTATION_WEIGHTS = int(cfg.readline())
            MUTATION_FACTOR = np.float32(float(cfg.readline()))
            print(NUM_PARENT_NETWORKS, CHILDREN_PER_PARENT,
                  NUM_MUTATION_WEIGHTS, MUTATION_FACTOR)

        num_tasks = NUM_PARENT_NETWORKS * CHILDREN_PER_PARENT
        for net_idx in range(NUM_PARENT_NETWORKS):
            for child_idx in range(CHILDREN_PER_PARENT):
                partner_idx = get_partner_idx(net_idx, NUM_PARENT_NETWORKS)
                nn_parent1 = models.load_model('nn' + str(net_idx) + '.h5')
                nn_parent2 = models.load_model('nn' + str(partner_idx) + '.h5')
                child_model = generate_child(nn_parent1, nn_parent2,
                                             env.tensor_shape, layers_info)
                safe_idx = NUM_PARENT_NETWORKS + net_idx * CHILDREN_PER_PARENT + child_idx
                child_model.save('nn' + str(safe_idx) + '.h5')
                print('Generating: {}%\r'.format(
                    int(
                        float(net_idx * CHILDREN_PER_PARENT + child_idx) /
                        num_tasks * 100)),
                      end='')
                K.clear_session()
                gc.collect()
            K.clear_session()
            gc.collect()
            # nnetworks.append(child_model)
        print('')

        num_networks = NUM_PARENT_NETWORKS + CHILDREN_PER_PARENT * NUM_PARENT_NETWORKS

        rewards = [0 for i in range(num_networks)]
        for network_idx in range(num_networks):
            current_nn = models.load_model('nn' + str(network_idx) + '.h5')
            run_results = np.array([])
            for start_id in range(NUM_STARTS_FOR_AVRG):
                env.prepare_env()

                while not env.is_done():
                    obs = env.get_obs()

                    predict = current_nn.predict(obs)
                    action = 0 if predict[0][0] < 0.5 else 1

                    env.step(action)

                run_results = np.append(run_results, env.get_reward())
            rewards[network_idx] = int(np.mean(run_results))
            if max_reward < max(rewards):
                max_reward = max(rewards)
                with open("max_reward.txt", "w") as f:
                    f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                current_nn.save('best_network.h5')
            print('Network {}: {}'.format(network_idx, rewards[network_idx]))
            K.clear_session()
            gc.collect()

        print('-' * 40)
        print('MAX REWARD CURRENT: {}'.format(max(rewards)))
        print('MAX REWARD COMMON: {}'.format(max_reward))
        print('-' * 40)

        nnetworks = selection(num_networks, rewards, NUM_PARENT_NETWORKS,
                              RANDOM_SELECTED_NETWORKS,
                              NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape)

        # for i in range(len(nnetworks)):
        #     nnetworks[i].save('tmp'+str(i) + '.h5')

        # nnetworks.clear()

        K.clear_session()
        gc.collect()

示例#5

0

显示文件

def main():
    # Loading configuration
    cfg = config.Config('ga_cfg.json')
    cfg.update()

    # Creating the environment
    env = cartpole.CartPole(img_mode=False)

    archs = list()
    for i in range(cfg.NUM_PARENT_ARCHITECTURES):
        arch = geneticalg.generate_start_architectures(env.tensor_shape, cfg)
        archs.append(arch)

    for arch_gen_id in range(cfg.NUM_ARCHITECTURES_GENERATIONS):
        print('ARCH_GENERATION: {}'.format(arch_gen_id))

        for arch_idx in range(cfg.NUM_PARENT_ARCHITECTURES):
            for child_arch_idx in range(cfg.NUM_CHILD_ARCHITECTURES):
                partner_idx = geneticalg.get_partner(
                    arch_idx, cfg.NUM_PARENT_ARCHITECTURES)

                arch_parent1 = archs[arch_idx]
                arch_parent2 = archs[partner_idx]

                child_arch = geneticalg.generate_child_architecture(
                    arch_parent1, arch_parent2, env.tensor_shape, cfg)
                archs.append(child_arch)

        print('ALL_ARCHS: {}'.format(archs))
        arch_rewards = list()

        for curr_arch in archs:
            print('CURRENT_ARCH: {}'.format(curr_arch))

            # Generation of the start population
            for i in range(cfg.NUM_START_POPULATION):
                nn = network.generate_model_from_list(curr_arch,
                                                      env.tensor_shape)
                nn.save(NN_NAME_TMPL.format(i))
                utils.clear_session()

            # Download the first model of the neural network
            nn = models.load_model(NN_NAME_TMPL.format(0))

            # Creating information about network layers
            layers_info = []
            for i in range(len(nn.layers)):
                layers_info.append(utils.Weights(nn.layers[i]))

            # Maximum reward for all epochs
            max_reward = 0

            # The main cycle of epochs
            for gen_idx in range(cfg.NUM_GENERATION):
                print('NN_GENERATION: {}'.format(gen_idx))
                # Read updated configuration
                cfg.update()

                # If the first epocha, then do not generate children
                if gen_idx == 0:
                    num_networks = cfg.NUM_START_POPULATION
                # Else generate children
                else:
                    num_tasks = cfg.NUM_PARENT_NETWORKS * cfg.CHILDREN_PER_PARENT

                    for net_idx in range(cfg.NUM_PARENT_NETWORKS):
                        for child_idx in range(cfg.CHILDREN_PER_PARENT):
                            partner_idx = geneticalg.get_partner(
                                net_idx, cfg.NUM_PARENT_NETWORKS)

                            nn_parent1 = models.load_model(
                                NN_NAME_TMPL.format(net_idx))
                            nn_parent2 = models.load_model(
                                NN_NAME_TMPL.format(partner_idx))

                            child_model = geneticalg.generate_child_from_arch(
                                nn_parent1, nn_parent2, env.tensor_shape,
                                layers_info, cfg, curr_arch)

                            safe_idx = (cfg.NUM_PARENT_NETWORKS + child_idx +
                                        net_idx * cfg.CHILDREN_PER_PARENT)

                            child_model.save(NN_NAME_TMPL.format(safe_idx))
                            utils.clear_session()
                        utils.clear_session()

                    num_networks = (
                        cfg.NUM_PARENT_NETWORKS +
                        cfg.CHILDREN_PER_PARENT * cfg.NUM_PARENT_NETWORKS)

                # Estimates for the current epoch
                gen_rewards = [0 for i in range(num_networks)]

                # Cycle to test each neural network
                for network_idx in range(num_networks):
                    current_nn = models.load_model(
                        NN_NAME_TMPL.format(network_idx))

                    # Estimates for different tests of current neural network
                    nn_rewards = np.array([])
                    # Cycle to test different attempts
                    for start_id in range(cfg.NUM_STARTS_FOR_AVRG):
                        env.prepare_env()

                        while not env.is_done():
                            obs = env.get_obs()
                            obs = obs.reshape((1, 4))

                            predict = current_nn.predict(obs)
                            action = 0 if predict[0][0] < 0.5 else 1

                            env.step(action)

                        nn_rewards = np.append(nn_rewards, env.get_reward())

                    # Save the average estimate for the current neural network
                    gen_rewards[network_idx] = int(np.mean(nn_rewards))
                    # Update and save the best estimate and network for all epochs
                    if max_reward < gen_rewards[network_idx]:
                        max_reward = gen_rewards[network_idx]
                        # with open("max_reward.txt", "w") as f:
                        #     f.writelines(['MAX REWARD COMMON: {}'.format(max_reward)])
                        # current_nn.save('best_network.h5')

                    utils.clear_session()

                print(max(gen_rewards))
                if max(gen_rewards) > 199:
                    break

                # Selection of the best neural networks
                nnetworks = geneticalg.selection(
                    num_networks, gen_rewards, cfg.NUM_PARENT_NETWORKS,
                    cfg.RANDOM_SELECTED_NETWORKS,
                    cfg.NEW_GENERATED_RANDOM_NETWORK, env.tensor_shape)

                utils.clear_session()

            arch_reward = (
                cfg.NUM_GENERATION * alpha +
                (max(cfg.RANGE_LAYERS) * max(cfg.RANGE_NEURONS) * betta) -
                (gen_idx + 1) * alpha - sum(curr_arch) * betta)

            print('ARCH_REWARD: {}'.format(arch_reward))
            arch_rewards.append(arch_reward)

        new_archs = list()
        for _ in range(cfg.NUM_PARENT_ARCHITECTURES):
            best_arch_idx = arch_rewards.index(max(arch_rewards))
            new_archs.append(archs[best_arch_idx])
            arch_rewards[best_arch_idx] = -1
        archs = new_archs[:]

        del new_archs