success_threshold: float = 30.0 experiment_high_reward_selfplay: TicTacToeExperiment = TicTacToeExperiment( "experiment_high_reward", success_threshold, environment_selfplay_high_reward, dddqn_curriculum_agent_second, interface_high_reward_selfplay) # Define experiments data testing_episodes: int = 100 test_cycles: int = 10 training_episodes: int = 1000 validation_episodes: int = 100 max_training_episodes: int = 25000 episode_length_max: int = 20 # Run curriculum experiments for low reward saved_metagraph_paths: [] = run_experiment( experiment_low_reward_fixed, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, render_during_test, workspace_path, __file__, logger, None, experiment_iterations_number) for metagraph_path in saved_metagraph_paths: run_experiment(experiment_low_reward_selfplay, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, True, workspace_path, __file__, logger, metagraph_path) # Run curriculum experiments for high reward saved_metagraph_paths: [] = run_experiment( experiment_high_reward_fixed, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, render_during_test, workspace_path, __file__, logger, None, experiment_iterations_number) for metagraph_path in saved_metagraph_paths:
testing_episodes: int = 100 test_cycles: int = 10 training_episodes: int = 100 validation_episodes: int = 100 max_training_episodes: int = 35000 episode_length_max: int = 100 plot_sample_density: int = 10 # Run experiment intro: str = "Data:\n" \ "\nVanilla Policy Gradient with GAE buffer" \ "\nThree dense layer with 4096 neurons each using xavier initialization" \ "\nLearning rate policy: 0.0003" \ "\nLearning rate advantage: 0.0001" \ "\nDiscount factor: 0.99" \ "\nValue steps per update: 10" \ "\nLambda parameter: 0.95" \ "\nUpdates per training volley: 2" \ "\nSuccess threshold: 0.35 average total reward on the validation set episodes" \ "\nEpisodic: yes" \ "\nEpisode length: 100" \ "\nMax allowed steps for episode: 100" \ "\nSeed states range [0, 0]" \ "\nAcceptance value: none" \ "\nThreshold value: 0.2\n" run_experiment(experiment, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, render_during_test, workspace_path, __file__, logger, None, experiment_iterations_number, intro, plot_sample_density)
environment_high_reward, dddqn_epsilon_greedy_agent, interface_high_reward) experiment_boltzmann_high_reward: TicTacToeExperiment = TicTacToeExperiment( "b_experiment_high_reward", success_threshold, environment_high_reward, dddqn_boltzmann_agent, interface_high_reward) # Define experiments data testing_episodes: int = 100 test_cycles: int = 10 training_episodes: int = 1000 validation_episodes: int = 100 max_training_episodes: int = 50000 episode_length_max: int = 20 # Run epsilon greedy experiment for low reward run_experiment(experiment_egreedy_low_reward, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, render_during_test, workspace_path, __file__, logger, None, experiment_iterations_number) # Run boltzmann experiment for low reward run_experiment(experiment_boltzmann_low_reward, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, render_during_test, workspace_path, __file__, logger, None, experiment_iterations_number) # # Run epsilon greedy experiment for high reward run_experiment(experiment_egreedy_high_reward, training_episodes, max_training_episodes, episode_length_max, validation_episodes, testing_episodes, test_cycles, render_during_training, render_during_validation, render_during_test, workspace_path, __file__, logger, None,