示例#1
0
def run():
    """Run the EDO algorithm."""

    pop_histories, fit_histories = [], []
    for seed in range(5):

        families = [edo.Family(RadiusUniform), edo.Family(AngleUniform)]

        opt = edo.DataOptimiser(
            fitness,
            size=100,
            row_limits=[50, 100],
            col_limits=[(1, 1), (1, 1)],
            families=families,
            max_iter=30,
            best_prop=0.1,
            maximise=True,
        )

        pops, fits = opt.run(random_state=seed)

        fits["seed"] = seed
        pop_histories.append(pops)
        fit_histories.append(fits)

    return pop_histories, pd.concat(fit_histories)
示例#2
0
def run_circle_example():
    """ Run a smaller version of the circle example from the paper repo. """

    fit_histories = []
    for seed in range(3):

        families = [edo.Family(RadiusUniform), edo.Family(AngleUniform)]

        do = edo.DataOptimiser(
            fitness=circle_fitness,
            size=10,
            row_limits=[5, 10],
            col_limits=[(1, 1), (1, 1)],
            families=families,
            max_iter=3,
            best_prop=0.1,
            mutation_prob=0.01,
            maximise=True,
        )

        _, fits = do.run(random_state=seed)

        fits["seed"] = seed
        fit_histories.append(fits)

    fit_history = pd.concat(fit_histories)
    return fit_history
示例#3
0
def test_run_parallel(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the EA can be run in parallel to produce valid histories. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    pop_history, fit_history = do.run(processes=4, random_state=size)

    assert isinstance(fit_history, pd.DataFrame)
    assert all(fit_history.columns == ["fitness", "generation", "individual"])
    assert all(fit_history.dtypes == [float, int, int])
    assert list(fit_history["generation"].unique()) == list(range(max_iter +
                                                                  1))
    assert list(fit_history["individual"].unique()) == list(range(size))
    assert len(fit_history) % size == 0

    for generation in pop_history:
        assert len(generation) == size

        for individual in generation:
            dataframe, metadata = individual

            assert isinstance(individual, Individual)
            assert isinstance(metadata, list)
            assert isinstance(dataframe, pd.DataFrame)
            assert len(metadata) == len(dataframe.columns)

            for pdf in metadata:
                assert sum(pdf.family is family for family in families)
示例#4
0
def test_init(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the `DataOptimiser` class can be instantiated correctly. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    assert do.fitness is trivial_fitness
    assert do.size == size
    assert do.row_limits == row_limits
    assert do.col_limits == col_limits
    assert do.families == families
    assert do.weights == weights
    assert do.max_iter == max_iter
    assert do.best_prop == best_prop
    assert do.lucky_prop == lucky_prop
    assert do.crossover_prob == crossover_prob
    assert do.mutation_prob == mutation_prob
    assert do.shrinkage == shrinkage
    assert do.maximise is maximise

    assert do.converged is False
    assert do.generation == 0
    assert do.population is None
    assert do.pop_fitness is None
    assert do.pop_history == []
    assert do.fit_history.equals(pd.DataFrame())
示例#5
0
def test_update_fit_history(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the DataOptimiser can update its fitness history. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(4)
    do._update_fit_history()
    fit_history = do.fit_history
    assert fit_history.shape == (size, 3)
    assert list(fit_history.columns) == ["fitness", "generation", "individual"]
    assert list(fit_history["fitness"].values) == do.pop_fitness
    assert list(fit_history["generation"].unique()) == [0]
    assert list(fit_history["individual"]) == list(range(size))

    do.generation += 1
    do._update_fit_history()
    fit_history = do.fit_history
    assert fit_history.shape == (size * 2, 3)
    assert list(fit_history["fitness"].values) == do.pop_fitness * 2
    assert list(fit_history["generation"].unique()) == [0, 1]
    assert list(fit_history["individual"]) == list(range(size)) * 2
示例#6
0
def run_sample_example():
    """ Run the sample example used in debugging the random leak. """

    opt = edo.DataOptimiser(
        sample_fitness,
        size=100,
        row_limits=[50, 100],
        col_limits=[1, 3],
        families=[edo.Family(Uniform)],
        max_iter=10,
    )

    state = np.random.RandomState(0)
    _, fit_history = opt.run(processes=4, random_state=state)
    return fit_history
示例#7
0
def test_get_fitness(row_limits, col_limits, weights, seed):
    """Create an individual and get its fitness. Then verify that the fitness
    is of the correct data type and has been added to the cache."""

    distributions = [Normal, Poisson, Uniform]
    families = [edo.Family(dist) for dist in distributions]
    random_state = np.random.RandomState(seed)

    individual = create_individual(
        row_limits, col_limits, families, weights, random_state
    )

    fit = get_fitness(individual, trivial_fitness).compute()
    assert isinstance(fit, float)
    assert individual.fitness == fit
示例#8
0
def test_update_subtypes(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the DataOptimiser can update the subtypes present. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(4)
    parents = do.population[:max(int(size / 5), 1)]
    parent_subtypes = do._get_current_subtypes(parents)

    do._update_subtypes(parents)
    updated_subtypes = {
        family: list(family.subtypes.keys())
        for family in parent_subtypes
    }

    assert parent_subtypes == updated_subtypes
示例#9
0
def test_get_fit_history(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the DataOptimiser can get the fitness hsitory on disk. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(4)
    do._write_generation(root=".testcache")

    fit_history = _get_fit_history(".testcache")
    assert isinstance(fit_history, dd.DataFrame)
    assert list(fit_history.columns) == ["fitness", "generation", "individual"]
    assert list(fit_history["fitness"].compute()) == do.pop_fitness
    assert list(fit_history["generation"].unique().compute()) == [0]
    assert list(fit_history["individual"].compute()) == list(range(size))

    os.system("rm -r .testcache")
示例#10
0
def test_get_population_fitness_serial(size, row_limits, col_limits, weights):
    """Create a population and find its fitness serially. Verify that the
    fitness array is of the correct data type and size, and that they have each
    been added to the cache."""

    distributions = [Normal, Poisson, Uniform]
    families = [edo.Family(dist) for dist in distributions]
    random_states = {i: np.random.RandomState(i) for i in range(size)}

    population = create_initial_population(
        row_limits, col_limits, families, weights, random_states
    )

    pop_fit = get_population_fitness(population, trivial_fitness)
    assert len(pop_fit) == size
    for ind, fit in zip(population, pop_fit):
        assert isinstance(fit, float)
        assert ind.fitness == fit
示例#11
0
def test_get_next_generation(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the EA can find the next generation. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(None)
    do._get_next_generation(None)
    assert isinstance(do.population, list)
    assert len(do.population) == len(do.pop_fitness)
    assert len(do.population) == size

    for individual, fitness in zip(do.population, do.pop_fitness):
        assert isinstance(individual, Individual)
        assert isinstance(fitness, float)
示例#12
0
def test_update_pop_history(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the DataOptimiser can update its population history. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(4)
    do._update_pop_history()
    assert len(do.pop_history) == 1
    assert len(do.pop_history[0]) == size
    for i, individual in enumerate(do.population):
        hist_ind = do.pop_history[0][i]
        assert hist_ind.dataframe.equals(individual.dataframe)
        assert hist_ind.metadata == individual.metadata
示例#13
0
def test_dwindle(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the default dwindling method does nothing. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.dwindle()
    assert do.mutation_prob == mutation_prob

    do.mutation_prob = "foo"
    do.dwindle()
    assert do.mutation_prob == "foo"
示例#14
0
def test_run_not_reproducible_without_seed(size, distributions, maximise):
    """Test that two runs of the EA with the same parameters will likely
    produce different populations if they aren't seeded."""

    row_limits = [10, 30]
    col_limits = [2, 5]
    families = [edo.Family(dist) for dist in distributions]
    max_iter = 5

    opt_one = DataOptimiser(
        lambda ind: np.random.random(),
        size,
        row_limits,
        col_limits,
        families,
        max_iter=max_iter,
        maximise=maximise,
    )

    pop_history_one, fit_history_one = opt_one.run(processes=4)

    opt_two = DataOptimiser(
        lambda ind: np.random.random(),
        size,
        row_limits,
        col_limits,
        families,
        max_iter=max_iter,
        maximise=maximise,
    )

    pop_history_two, fit_history_two = opt_two.run(processes=4)

    checks = []
    for gen_one, gen_two in zip(pop_history_one, pop_history_two):
        for ind_one, ind_two in zip(gen_one, gen_two):
            checks.append(ind_one.dataframe.equals(ind_two.dataframe))

    assert not all(checks)
示例#15
0
def test_run_is_reproducible(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """Test that two runs of the EA with the same parameters produce the
    same population and fitness histories."""

    families = [edo.Family(dist) for dist in distributions]

    do_one = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    pop_history_one, fit_history_one = do_one.run(processes=None,
                                                  random_state=size)

    families = [edo.Family(dist) for dist in distributions]

    do_two = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    pop_history_two, fit_history_two = do_two.run(processes=4,
                                                  random_state=size)

    assert fit_history_one.equals(fit_history_two)

    for gen_from_one, gen_from_two in zip(pop_history_one, pop_history_two):
        for ind_from_one, ind_from_two in zip(gen_from_one, gen_from_two):
            assert ind_from_one.dataframe.equals(ind_from_two.dataframe)
示例#16
0
def test_run_on_disk_parallel(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the EA can be run with histories on disk and in parallel. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    pop_history, fit_history = do.run(root=".testcache_parallel",
                                      processes=4,
                                      random_state=size)

    assert isinstance(fit_history, dd.DataFrame)
    assert list(fit_history.columns) == ["fitness", "generation", "individual"]
    assert list(fit_history.dtypes) == [float, int, int]
    assert list(fit_history["generation"].unique().compute()) == list(
        range(max_iter + 1))
    assert list(fit_history["individual"].unique().compute()) == list(
        range(size))

    os.system("rm -r .testcache_parallel")

    for generation in pop_history:
        assert len(generation) == size

        for individual in generation:
            dataframe, metadata = individual

            assert isinstance(individual, Individual)
            assert isinstance(metadata, list)
            assert isinstance(dataframe, dd.DataFrame)
            assert len(metadata) == len(dataframe.columns)

            for pdf in metadata:
                assert (sum(pdf.family.distribution is family.distribution
                            for family in families) == 1)
示例#17
0
def test_get_pop_history(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """ Test that the DataOptimiser can get the population history on disk. """

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(4)
    do._write_generation(root=".testcache")

    pop_history = _get_pop_history(".testcache", 1, distributions)
    assert isinstance(pop_history, list)
    for generation in pop_history:

        assert isinstance(generation, list)
        for i, individual in enumerate(generation):

            pop_ind = do.population[i]
            assert isinstance(individual, Individual)
            assert isinstance(individual.dataframe, dd.DataFrame)
            assert isinstance(individual.metadata, list)

            assert np.allclose(pop_ind.dataframe.values,
                               individual.dataframe.values.compute())

            for ind_meta, pop_ind_meta in zip(individual.metadata,
                                              pop_ind.metadata):
                assert ind_meta.family.name == pop_ind_meta.family.name
                assert (ind_meta.family.distribution is
                        pop_ind_meta.family.distribution)
                assert ind_meta.to_dict() == pop_ind_meta.to_dict()

    os.system("rm -r .testcache")
示例#18
0
def test_write_generation(
    size,
    row_limits,
    col_limits,
    distributions,
    weights,
    max_iter,
    best_prop,
    lucky_prop,
    crossover_prob,
    mutation_prob,
    shrinkage,
    maximise,
):
    """Test that the DataOptimiser can write a generation and its fitness to
    file with a single core."""

    families = [edo.Family(dist) for dist in distributions]

    do = DataOptimiser(
        trivial_fitness,
        size,
        row_limits,
        col_limits,
        families,
        weights,
        max_iter,
        best_prop,
        lucky_prop,
        crossover_prob,
        mutation_prob,
        shrinkage,
        maximise,
    )

    do.random_state = np.random.RandomState(size)
    do._initialise_run(4)
    do._write_generation(root=".testcache")
    path = Path(".testcache")

    assert (path / "fitness.csv").exists()
    fit = pd.read_csv(path / "fitness.csv")
    assert list(fit.columns) == ["fitness", "generation", "individual"]
    assert list(fit.dtypes) == [float, int, int]
    assert list(fit["generation"].unique()) == [0]
    assert list(fit["individual"]) == list(range(size))
    assert np.allclose(fit["fitness"].values, do.pop_fitness)

    path /= "0"
    for i, ind in enumerate(do.population):
        ind_path = path / str(i)
        assert (ind_path / "main.csv").exists()
        assert (ind_path / "main.meta").exists()

        df = pd.read_csv(ind_path / "main.csv")
        with open(ind_path / "main.meta", "r") as meta_file:
            meta = yaml.load(meta_file, Loader=yaml.FullLoader)

        assert np.allclose(df.values, ind.dataframe.values)
        assert meta == [m.to_dict() for m in ind.metadata]

    os.system("rm -r .testcache")