population_size = 128 total_tournaments = 100000 save_freq = 1000 def mutate(length, mutation_rate, mutation_sigma): # (not used, in case I wanted to do partial mutations) # create an additive mutation vector of some size mask = np.random.randint(int(1/mutation_rate), size=length) mask = 1-np.minimum(mask, 1) noise = np.random.normal(size=length) * mutation_sigma return mask * noise # Log results from library import util args = util.get_args('ga_selfplay_obs', '../zoo/ga/_sp/ga.json') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath with open(BEST_MODEL_PATH) as f: data = json.load(f) model_params = np.array(data[0]) # Create two instances of a feed forward policy we may need later. policy_left = Model(mlp.games['slimevolleylite']) policy_right = Model(mlp.games['slimevolleylite']) param_count = policy_left.param_count print("Number of parameters of the neural net policy:", param_count) # 273 for slimevolleylite
save_freq = 1000 def mutate(length, mutation_rate, mutation_sigma): # (not used, in case I wanted to do partial mutations) # create an additive mutation vector of some size mask = np.random.randint(int(1 / mutation_rate), size=length) mask = 1 - np.minimum(mask, 1) noise = np.random.normal(size=length) * mutation_sigma return mask * noise # Log results from library import util args = util.get_args('ga_selfplay_actionbit', '../zoo/ga/_sp/ga.json') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath with open(BEST_MODEL_PATH) as f: data = json.load(f) model_params = np.array(data[0]) # Create two instances of a feed forward policy we may need later. policy_left = Model(mlp.games['slimevolleylite']) policy_right = Model(mlp.games['slimevolleylite']) param_count = policy_left.param_count print("Number of parameters of the neural net policy:", param_count) # 273 for slimevolleylite
save_freq = 1000 def mutate(length, mutation_rate, mutation_sigma): # (not used, in case I wanted to do partial mutations) # create an additive mutation vector of some size mask = np.random.randint(int(1 / mutation_rate), size=length) mask = 1 - np.minimum(mask, 1) noise = np.random.normal(size=length) * mutation_sigma return mask * noise # Log results from library import util args = util.get_args('ga_selfplay_constant', '../zoo/ga/_sp/ga.json') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath with open(BEST_MODEL_PATH) as f: data = json.load(f) model_params = np.array(data[0]) # Create two instances of a feed forward policy we may need later. policy_left = Model(mlp.games['slimevolleylite']) policy_right = Model(mlp.games['slimevolleylite']) param_count = policy_left.param_count print("Number of parameters of the neural net policy:", param_count) # 273 for slimevolleylite
from mpi4py import MPI from stable_baselines.common import set_global_seeds from stable_baselines.common.policies import MlpPolicy from stable_baselines import bench, logger, PPO1 from stable_baselines.common.callbacks import EvalCallback NUM_TIMESTEPS = int(2e6) SEED = 831 EVAL_FREQ = 200000 EVAL_EPISODES = 1000 # Log results and get trained model location from library import util args = util.get_args(logdir='ppo_mpi_spike', modelpath='../zoo/ppo/best_model.zip') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) PPO_PATH = args.modelpath from library import wrapper def make_env(seed): # Add spike wrapper to environment env = wrapper.SpikeWrapper(gym.make("SlimeVolley-v0")) env.seed(seed) return env
from stable_baselines.common import set_global_seeds from stable_baselines.common.policies import MlpPolicy from stable_baselines import bench, logger, PPO1 from stable_baselines.common.callbacks import EvalCallback from library.Observation_Wrapper import ObservationWrapper NUM_TIMESTEPS = int(2e6) SEED = 831 EVAL_FREQ = 200000 EVAL_EPISODES = 1000 # Log results from library import util args = util.get_args('ppo_extended_obs', '../zoo/ppo/best_model.zip') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath if not os.path.exists(BEST_MODEL_PATH): raise Exception('File does not exist:', BEST_MODEL_PATH) def make_env(seed): env = ObservationWrapper(gym.make("SlimeVolley-v0")) env.seed(seed) return env
from library.Observation_Wrapper import ObservationWrapperHalf # Settings SEED = 17 NUM_TIMESTEPS = int(2e6) EVAL_FREQ = int(1e5) EVAL_EPISODES = int(1e2) BEST_THRESHOLD = 0.5 # must achieve a mean score above this to replace prev best self RENDER_MODE = False # set this to false if you plan on running for full 1000 trials. # Log results from library import util args = util.get_args('ppo_selfplay_extended_obs_half', '../zoo/ppo_sp/history_00000144.zip') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath if not os.path.exists(BEST_MODEL_PATH): raise Exception('File does not exist:', BEST_MODEL_PATH) class SlimeVolleySelfPlayEnv(ObservationWrapperHalf): # wrapper over the normal single player env, but loads the best self play model def __init__(self): super(SlimeVolleySelfPlayEnv, self).__init__(slimevolleygym.SlimeVolleyEnv()) self.policy = self
save_freq = 1000 def mutate(length, mutation_rate, mutation_sigma): # (not used, in case I wanted to do partial mutations) # create an additive mutation vector of some size mask = np.random.randint(int(1 / mutation_rate), size=length) mask = 1 - np.minimum(mask, 1) noise = np.random.normal(size=length) * mutation_sigma return mask * noise # Log results and get trained model location from library import util args = util.get_args(logdir='ga_selfplay_spike', modelpath='../zoo/ga_sp/ga.json') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) GA_PATH = args.modelpath # Create two instances of a feed forward policy we may need later. policy_left = Model(mlp.games['slimevolleylite']) policy_right = Model(mlp.games['slimevolleylite']) param_count = policy_left.param_count print("Number of parameters of the neural net policy:", param_count) # 273 for slimevolleylite # store our population here population = np.random.normal(
from library.action_wrapper import ConstantNoiseActionWrapper # Settings SEED = 17 NUM_TIMESTEPS = int(2e6) EVAL_FREQ = int(1e5) EVAL_EPISODES = int(1e2) BEST_THRESHOLD = 0.5 # must achieve a mean score above this to replace prev best self RENDER_MODE = False # set this to false if you plan on running for full 1000 trials. # Log results from library import util args = util.get_args('ppo_selfplay_constant', '../zoo/ppo_sp/history_00000144.zip') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath if not os.path.exists(BEST_MODEL_PATH): raise Exception('File does not exist:', BEST_MODEL_PATH) class SlimeVolleySelfPlayEnv(ConstantNoiseActionWrapper): # wrapper over the normal single player env, but loads the best self play model def __init__(self): super(SlimeVolleySelfPlayEnv, self).__init__(slimevolleygym.SlimeVolleyEnv()) self.policy = self
from shutil import copyfile # keep track of generations # Settings SEED = 17 NUM_TIMESTEPS = int(2e6) EVAL_FREQ = int(1e5) EVAL_EPISODES = int(1e2) BEST_THRESHOLD = 0.5 # must achieve a mean score above this to replace prev best self RENDER_MODE = False # set this to false if you plan on running for full 1000 trials. # Log results from library import util from library import wrapper args = util.get_args(logdir='ppo_selfplay_spike', modelpath='../zoo/ppo_sp/history_00000144.zip') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath if not os.path.exists(BEST_MODEL_PATH): raise Exception('File does not exist:', BEST_MODEL_PATH) class SlimeVolleySelfPlayEnv(wrapper.SpikeWrapper): # wrapper over the normal single player env, but loads the best self play model def __init__(self): super(SlimeVolleySelfPlayEnv, self).__init__(slimevolleygym.SlimeVolleyEnv()) self.policy = self
from stable_baselines.common import set_global_seeds from stable_baselines.common.policies import MlpPolicy from stable_baselines import bench, logger, PPO1 from stable_baselines.common.callbacks import EvalCallback from library.action_wrapper import ConstantNoiseActionWrapper NUM_TIMESTEPS = int(2e6) SEED = 831 EVAL_FREQ = 200000 EVAL_EPISODES = 1000 # Log results from library import util args = util.get_args('ppo_constant', '../zoo/ppo/best_model.zip') LOGDIR = args.logdir if not os.path.exists(LOGDIR): os.makedirs(LOGDIR) BEST_MODEL_PATH = args.modelpath if not os.path.exists(BEST_MODEL_PATH): raise Exception('File does not exist:', BEST_MODEL_PATH) def make_env(seed): env = ConstantNoiseActionWrapper(gym.make("SlimeVolley-v0")) env.seed(seed) return env