def test_run_parallel( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the EA can be run in parallel to produce valid histories. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) pop_history, fit_history = do.run(processes=4, random_state=size) assert isinstance(fit_history, pd.DataFrame) assert all(fit_history.columns == ["fitness", "generation", "individual"]) assert all(fit_history.dtypes == [float, int, int]) assert list(fit_history["generation"].unique()) == list(range(max_iter + 1)) assert list(fit_history["individual"].unique()) == list(range(size)) assert len(fit_history) % size == 0 for generation in pop_history: assert len(generation) == size for individual in generation: dataframe, metadata = individual assert isinstance(individual, Individual) assert isinstance(metadata, list) assert isinstance(dataframe, pd.DataFrame) assert len(metadata) == len(dataframe.columns) for pdf in metadata: assert sum(pdf.family is family for family in families)
def test_update_fit_history( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the DataOptimiser can update its fitness history. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) do._update_fit_history() fit_history = do.fit_history assert fit_history.shape == (size, 3) assert list(fit_history.columns) == ["fitness", "generation", "individual"] assert list(fit_history["fitness"].values) == do.pop_fitness assert list(fit_history["generation"].unique()) == [0] assert list(fit_history["individual"]) == list(range(size)) do.generation += 1 do._update_fit_history() fit_history = do.fit_history assert fit_history.shape == (size * 2, 3) assert list(fit_history["fitness"].values) == do.pop_fitness * 2 assert list(fit_history["generation"].unique()) == [0, 1] assert list(fit_history["individual"]) == list(range(size)) * 2
def test_initialise_run( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the EA can be initialised. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) assert isinstance(do.population, list) assert len(do.population) == len(do.pop_fitness) == size for individual, fitness in zip(do.population, do.pop_fitness): assert isinstance(individual, Individual) assert isinstance(fitness, float)
def test_init( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the `DataOptimiser` class can be instantiated correctly. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) assert do.fitness is trivial_fitness assert do.size == size assert do.row_limits == row_limits assert do.col_limits == col_limits assert do.families == families assert do.weights == weights assert do.max_iter == max_iter assert do.best_prop == best_prop assert do.lucky_prop == lucky_prop assert do.crossover_prob == crossover_prob assert do.mutation_prob == mutation_prob assert do.shrinkage == shrinkage assert do.maximise is maximise assert do.converged is False assert do.generation == 0 assert do.population is None assert do.pop_fitness is None assert do.pop_history == [] assert do.fit_history.equals(pd.DataFrame())
def test_update_subtypes( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the DataOptimiser can update the subtypes present. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) parents = do.population[:max(int(size / 5), 1)] parent_subtypes = do._get_current_subtypes(parents) do._update_subtypes(parents) updated_subtypes = { family: list(family.subtypes.keys()) for family in parent_subtypes } assert parent_subtypes == updated_subtypes
def test_get_fit_history( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the DataOptimiser can get the fitness hsitory on disk. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) do._write_generation(root=".testcache") fit_history = _get_fit_history(".testcache") assert isinstance(fit_history, dd.DataFrame) assert list(fit_history.columns) == ["fitness", "generation", "individual"] assert list(fit_history["fitness"].compute()) == do.pop_fitness assert list(fit_history["generation"].unique().compute()) == [0] assert list(fit_history["individual"].compute()) == list(range(size)) os.system("rm -r .testcache")
def test_update_pop_history( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the DataOptimiser can update its population history. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) do._update_pop_history() assert len(do.pop_history) == 1 assert len(do.pop_history[0]) == size for i, individual in enumerate(do.population): hist_ind = do.pop_history[0][i] assert hist_ind.dataframe.equals(individual.dataframe) assert hist_ind.metadata == individual.metadata
def test_dwindle( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the default dwindling method does nothing. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.dwindle() assert do.mutation_prob == mutation_prob do.mutation_prob = "foo" do.dwindle() assert do.mutation_prob == "foo"
def test_run_not_reproducible_without_seed(size, distributions, maximise): """Test that two runs of the EA with the same parameters will likely produce different populations if they aren't seeded.""" row_limits = [10, 30] col_limits = [2, 5] families = [edo.Family(dist) for dist in distributions] max_iter = 5 opt_one = DataOptimiser( lambda ind: np.random.random(), size, row_limits, col_limits, families, max_iter=max_iter, maximise=maximise, ) pop_history_one, fit_history_one = opt_one.run(processes=4) opt_two = DataOptimiser( lambda ind: np.random.random(), size, row_limits, col_limits, families, max_iter=max_iter, maximise=maximise, ) pop_history_two, fit_history_two = opt_two.run(processes=4) checks = [] for gen_one, gen_two in zip(pop_history_one, pop_history_two): for ind_one, ind_two in zip(gen_one, gen_two): checks.append(ind_one.dataframe.equals(ind_two.dataframe)) assert not all(checks)
def test_run_is_reproducible( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """Test that two runs of the EA with the same parameters produce the same population and fitness histories.""" families = [edo.Family(dist) for dist in distributions] do_one = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) pop_history_one, fit_history_one = do_one.run(processes=None, random_state=size) families = [edo.Family(dist) for dist in distributions] do_two = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) pop_history_two, fit_history_two = do_two.run(processes=4, random_state=size) assert fit_history_one.equals(fit_history_two) for gen_from_one, gen_from_two in zip(pop_history_one, pop_history_two): for ind_from_one, ind_from_two in zip(gen_from_one, gen_from_two): assert ind_from_one.dataframe.equals(ind_from_two.dataframe)
def test_run_on_disk_parallel( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the EA can be run with histories on disk and in parallel. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) pop_history, fit_history = do.run(root=".testcache_parallel", processes=4, random_state=size) assert isinstance(fit_history, dd.DataFrame) assert list(fit_history.columns) == ["fitness", "generation", "individual"] assert list(fit_history.dtypes) == [float, int, int] assert list(fit_history["generation"].unique().compute()) == list( range(max_iter + 1)) assert list(fit_history["individual"].unique().compute()) == list( range(size)) os.system("rm -r .testcache_parallel") for generation in pop_history: assert len(generation) == size for individual in generation: dataframe, metadata = individual assert isinstance(individual, Individual) assert isinstance(metadata, list) assert isinstance(dataframe, dd.DataFrame) assert len(metadata) == len(dataframe.columns) for pdf in metadata: assert (sum(pdf.family.distribution is family.distribution for family in families) == 1)
def test_get_pop_history( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """ Test that the DataOptimiser can get the population history on disk. """ families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) do._write_generation(root=".testcache") pop_history = _get_pop_history(".testcache", 1, distributions) assert isinstance(pop_history, list) for generation in pop_history: assert isinstance(generation, list) for i, individual in enumerate(generation): pop_ind = do.population[i] assert isinstance(individual, Individual) assert isinstance(individual.dataframe, dd.DataFrame) assert isinstance(individual.metadata, list) assert np.allclose(pop_ind.dataframe.values, individual.dataframe.values.compute()) for ind_meta, pop_ind_meta in zip(individual.metadata, pop_ind.metadata): assert ind_meta.family.name == pop_ind_meta.family.name assert (ind_meta.family.distribution is pop_ind_meta.family.distribution) assert ind_meta.to_dict() == pop_ind_meta.to_dict() os.system("rm -r .testcache")
def test_write_generation( size, row_limits, col_limits, distributions, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ): """Test that the DataOptimiser can write a generation and its fitness to file with a single core.""" families = [edo.Family(dist) for dist in distributions] do = DataOptimiser( trivial_fitness, size, row_limits, col_limits, families, weights, max_iter, best_prop, lucky_prop, crossover_prob, mutation_prob, shrinkage, maximise, ) do.random_state = np.random.RandomState(size) do._initialise_run(4) do._write_generation(root=".testcache") path = Path(".testcache") assert (path / "fitness.csv").exists() fit = pd.read_csv(path / "fitness.csv") assert list(fit.columns) == ["fitness", "generation", "individual"] assert list(fit.dtypes) == [float, int, int] assert list(fit["generation"].unique()) == [0] assert list(fit["individual"]) == list(range(size)) assert np.allclose(fit["fitness"].values, do.pop_fitness) path /= "0" for i, ind in enumerate(do.population): ind_path = path / str(i) assert (ind_path / "main.csv").exists() assert (ind_path / "main.meta").exists() df = pd.read_csv(ind_path / "main.csv") with open(ind_path / "main.meta", "r") as meta_file: meta = yaml.load(meta_file, Loader=yaml.FullLoader) assert np.allclose(df.values, ind.dataframe.values) assert meta == [m.to_dict() for m in ind.metadata] os.system("rm -r .testcache")