Exemplo n.º 1
0
    def __init__(self,
                 encoding_strategy,
                 num_actors,
                 episode_length,
                 percentage,
                 training=False,
                 weight_file_name='ppo_fitness_shaping_ranked',
                 learning_rate=1e-5,
                 discount_factor=0.99,
                 variance_bias_factor=0.98,
                 num_hidden_layers=1,
                 num_neurons=128,
                 batch_size=32,
                 clipping_value=0.2,
                 num_training_epochs=4,
                 dim_elimination_max_pooling=False,
                 entropy_factor=0.1,
                 entropy_factor_decay=0.05,
                 min_entropy_factor=0.01,
                 value_loss_factor=0.5,
                 prefer_higher_fitness=True):
        """
        :param percentage: The percentage of fittest individuals to be selected from the population
        :param prefer_higher_fitness: Whether a higher or lower fitness
                                      is preferred during selection
        """

        self.percentage = percentage
        self.prefer_higher_fitness = prefer_higher_fitness

        network = CombinedActorCriticNetwork(
            encoding_strategy.num_channels(),
            2,
            eliminate_length_dimension=True,
            eliminate_population_dimension=False,
            dim_elimination_max_pooling=dim_elimination_max_pooling,
            num_hidden_layers=num_hidden_layers,
            num_neurons=num_neurons).cuda()

        super().__init__(network,
                         encoding_strategy,
                         weight_file_name,
                         training=training,
                         learning_rate=learning_rate,
                         num_actors=num_actors,
                         episode_length=episode_length,
                         discount_factor=discount_factor,
                         variance_bias_factor=variance_bias_factor,
                         batch_size=batch_size,
                         clipping_value=clipping_value,
                         num_training_epochs=num_training_epochs,
                         finite_environment=True,
                         entropy_factor=entropy_factor,
                         entropy_factor_decay=entropy_factor_decay,
                         min_entropy_factor=min_entropy_factor,
                         value_loss_factor=value_loss_factor)
    def __init__(
        self,
        encoding_strategy,
        num_actors,
        episode_length,
        num_pairs,
        training=False,
        weight_file_name='ppo_fitness_shaping_tournament',
        learning_rate=1e-5,
        discount_factor=0.99,
        variance_bias_factor=0.98,
        num_hidden_layers=1,
        num_neurons=128,
        batch_size=32,
        clipping_value=0.2,
        num_training_epochs=4,
        dim_elimination_max_pooling=False,
        entropy_factor=0.1,
        entropy_factor_decay=0.05,
        min_entropy_factor=0.01,
        value_loss_factor=0.5,
    ):
        """
        :param num_pairs: Number of parent pairs that have to be generated
        """

        self.tournament_strategy = TournamentParentPairingStrategy(num_pairs)
        self.num_pairs = num_pairs

        network = CombinedActorCriticNetwork(
            encoding_strategy.num_channels(),
            2,
            eliminate_length_dimension=True,
            eliminate_population_dimension=False,
            dim_elimination_max_pooling=dim_elimination_max_pooling,
            num_hidden_layers=num_hidden_layers,
            num_neurons=num_neurons).cuda()

        super().__init__(network,
                         encoding_strategy,
                         weight_file_name,
                         training=training,
                         learning_rate=learning_rate,
                         num_actors=num_actors,
                         episode_length=episode_length,
                         discount_factor=discount_factor,
                         variance_bias_factor=variance_bias_factor,
                         batch_size=batch_size,
                         clipping_value=clipping_value,
                         num_training_epochs=num_training_epochs,
                         finite_environment=True,
                         entropy_factor=entropy_factor,
                         entropy_factor_decay=entropy_factor_decay,
                         min_entropy_factor=min_entropy_factor,
                         value_loss_factor=value_loss_factor)
Exemplo n.º 3
0
    def __init__(
            self,
            encoding_strategy,
            num_actors,
            episode_length,
            training=False,
            weight_file_name='ppo_mutate_onestep_individual_learning_rates',
            learning_rate=1e-5,
            discount_factor=0.99,
            variance_bias_factor=0.98,
            num_hidden_layers=1,
            num_neurons=128,
            batch_size=32,
            clipping_value=0.2,
            num_training_epochs=4,
            dim_elimination_max_pooling=False,
            entropy_factor=0.1,
            entropy_factor_decay=0.05,
            min_entropy_factor=0.01,
            value_loss_factor=0.5,
            minimum_step_size=1e-5):
        """
        :param minimum_step_size: The lower limit for step sizes
        """

        num_output_channels = 2

        self.minimum_step_size = minimum_step_size

        network = CombinedActorCriticNetwork(
            encoding_strategy.num_channels(),
            num_output_channels,
            eliminate_length_dimension=True,
            dim_elimination_max_pooling=dim_elimination_max_pooling,
            num_hidden_layers=num_hidden_layers,
            num_neurons=num_neurons).cuda()

        super().__init__(network,
                         encoding_strategy,
                         weight_file_name,
                         training=training,
                         learning_rate=learning_rate,
                         num_actors=num_actors,
                         episode_length=episode_length,
                         discount_factor=discount_factor,
                         variance_bias_factor=variance_bias_factor,
                         batch_size=batch_size,
                         clipping_value=clipping_value,
                         num_training_epochs=num_training_epochs,
                         finite_environment=True,
                         entropy_factor=entropy_factor,
                         entropy_factor_decay=entropy_factor_decay,
                         min_entropy_factor=min_entropy_factor,
                         value_loss_factor=value_loss_factor)
Exemplo n.º 4
0
    def __init__(self,
                 encoding_strategy,
                 num_actors,
                 episode_length,
                 training=False,
                 weight_file_name='ppo_population_subset',
                 learning_rate=1e-3,
                 min_subset_size=0,
                 discount_factor=0.99,
                 variance_bias_factor=0.98,
                 num_hidden_layers=1,
                 num_neurons=128,
                 batch_size=32,
                 clipping_value=0.2,
                 num_training_epochs=4,
                 dim_elimination_max_pooling=False,
                 entropy_factor=0.1,
                 entropy_factor_decay=0.05,
                 min_entropy_factor=0.01,
                 value_loss_factor=0.5):
        """
        :param min_subset_size: The minimum number of individuals that has to be selected.
                                If too few are selected, min_subset_size random individuals are returned
        """

        self.min_subset_size = min_subset_size

        num_output_channels = 1

        network = CombinedActorCriticNetwork(
            encoding_strategy.num_channels(),
            num_output_channels,
            eliminate_length_dimension=True,
            dim_elimination_max_pooling=dim_elimination_max_pooling,
            num_hidden_layers=num_hidden_layers,
            num_neurons=num_neurons).cuda()

        super().__init__(network,
                         encoding_strategy,
                         weight_file_name,
                         training=training,
                         learning_rate=learning_rate,
                         num_actors=num_actors,
                         episode_length=episode_length,
                         discount_factor=discount_factor,
                         variance_bias_factor=variance_bias_factor,
                         batch_size=batch_size,
                         clipping_value=clipping_value,
                         num_training_epochs=num_training_epochs,
                         entropy_factor=entropy_factor,
                         entropy_factor_decay=entropy_factor_decay,
                         min_entropy_factor=min_entropy_factor,
                         value_loss_factor=value_loss_factor)
Exemplo n.º 5
0
    def __init__(self,
                 encoding_strategy,
                 population_size,
                 num_actors,
                 episode_length,
                 training=False,
                 weight_file_name='ppo_survivor_selection',
                 learning_rate=1e-5,
                 discount_factor=0.99,
                 variance_bias_factor=0.98,
                 num_hidden_layers=1,
                 num_neurons=128,
                 batch_size=32,
                 clipping_value=0.2,
                 num_training_epochs=4,
                 dim_elimination_max_pooling=False,
                 entropy_factor=0.1,
                 entropy_factor_decay=0.05,
                 min_entropy_factor=0.01,
                 value_loss_factor=0.5):

        self.population_size = population_size

        network = CombinedActorCriticNetwork(
            encoding_strategy.num_channels(),
            2,
            eliminate_length_dimension=True,
            dim_elimination_max_pooling=dim_elimination_max_pooling,
            num_hidden_layers=num_hidden_layers,
            num_neurons=num_neurons).cuda()

        super().__init__(network,
                         encoding_strategy,
                         weight_file_name,
                         training=training,
                         learning_rate=learning_rate,
                         num_actors=num_actors,
                         episode_length=episode_length,
                         discount_factor=discount_factor,
                         variance_bias_factor=variance_bias_factor,
                         batch_size=batch_size,
                         clipping_value=clipping_value,
                         num_training_epochs=num_training_epochs,
                         finite_environment=True,
                         entropy_factor=entropy_factor,
                         entropy_factor_decay=entropy_factor_decay,
                         min_entropy_factor=min_entropy_factor,
                         value_loss_factor=value_loss_factor)
Exemplo n.º 6
0
    def __init__(
        self,
        encoding_strategy,
        genome_creation_strategy,
        num_actors,
        episode_length,
        training=False,
        weight_file_name='ppo_individual_crossover_operator_selection',
        learning_rate=1e-5,
        discount_factor=0.99,
        variance_bias_factor=0.98,
        num_hidden_layers=1,
        num_neurons=128,
        batch_size=32,
        clipping_value=0.2,
        num_training_epochs=4,
        dim_elimination_max_pooling=False,
        entropy_factor=0.1,
        entropy_factor_decay=0.05,
        min_entropy_factor=0.01,
        value_loss_factor=0.5,
        crossover_rate=0.9,
    ):
        """
        :param crossover_rate: The probability, with which crossover is performed, instead of copying the fittest parent
        """

        self.operators = [
            OnePointCrossoverStrategy(crossover_rate,
                                      genome_creation_strategy),
            TwoPointCrossoverStrategy(crossover_rate,
                                      genome_creation_strategy),
            LinearCrossoverStrategy(crossover_rate, genome_creation_strategy),
            CyclicCrossoverStrategy(crossover_rate, genome_creation_strategy),
            PositionCrossoverStrategy(crossover_rate,
                                      genome_creation_strategy),
            OrderCrossoverStrategy(crossover_rate, genome_creation_strategy),
            PartiallyMappedCrossoverStrategy(crossover_rate,
                                             genome_creation_strategy)
        ]

        network = CombinedActorCriticNetwork(
            2 * encoding_strategy.num_channels(),
            len(self.operators),
            eliminate_length_dimension=True,
            eliminate_population_dimension=False,
            dim_elimination_max_pooling=dim_elimination_max_pooling,
            num_hidden_layers=num_hidden_layers,
            num_neurons=num_neurons).cuda()

        super().__init__(network,
                         encoding_strategy,
                         weight_file_name,
                         training=training,
                         learning_rate=learning_rate,
                         num_actors=num_actors,
                         episode_length=episode_length,
                         discount_factor=discount_factor,
                         variance_bias_factor=variance_bias_factor,
                         batch_size=batch_size,
                         clipping_value=clipping_value,
                         num_training_epochs=num_training_epochs,
                         finite_environment=True,
                         entropy_factor=entropy_factor,
                         entropy_factor_decay=entropy_factor_decay,
                         min_entropy_factor=min_entropy_factor,
                         value_loss_factor=value_loss_factor)