def main(job_id, input_params): import sys import logging gym.undo_logger_setup() logger = logging.getLogger() logger.setLevel(logging.ERROR) datasets = ["CartPole-v0", "Acrobot-v0", "MountainCar-v0", "Pendulum-v0"] params = LeeaParams() params.parent_fitness_decay = input_params["parent_fitness_decay"][0] params.mutation_power_decay = 0.99 params.sexual_reproduction_proportion = 0.5 params.population_size = input_params["population_size"][0] params.starting_mutation_power = input_params["starting_mutation_power"][0] params.mutation_power = params.starting_mutation_power params.mutation_rate = input_params["mutation_rate"][0] params.selection_proportion = input_params["selection_proportion"][0] max_evaluations = 5000 architecture = "simple" network_size = 20 cum_reward = 0 for env_name in datasets: step_limit = gym.envs.registry.spec(env_name).timestep_limit build_network = net_configuration(architecture, network_size, env_name) reward = train_network(env_name, step_limit, max_evaluations, build_network, params) if env_name == "Pendulum-v0": reward /= 10 print reward, reward / 100 cum_reward += reward return -cum_reward
def build_env(env_id): gym.undo_logger_setup() env = gym.make(env_id) if env_id.endswith('NoFrameskip-v4'): env = wrap_deepmind(env) return env
def setup(exp, single_threaded): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) # from util.MultiStateEnv import MultiStateEnv # # env = MultiStateEnv(game='SonicTheHedgehog-Genesis', # states=['GreenHillZone.Act1', 'GreenHillZone.Act2', 'GreenHillZone.Act3', # 'LabyrinthZone.Act1', 'LabyrinthZone.Act2', 'LabyrinthZone.Act3', # 'MarbleZone.Act1', 'MarbleZone.Act2', 'MarbleZone.Act3', # 'ScrapBrainZone.Act1', 'ScrapBrainZone.Act2', # 'SpringYardZone.Act1', 'SpringYardZone.Act2', 'SpringYardZone.Act3', # 'StarLightZone.Act1', 'StarLightZone.Act2', 'StarLightZone.Act3']) # # from gym.wrappers.time_limit import TimeLimit # env = TimeLimit(env,max_episode_steps=env.spec.max_episode_steps, # max_episode_seconds=env.spec.max_episode_seconds) env = gym.make(exp['env_id']) if exp['env_id'].endswith('NoFrameskip-v4'): from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def setup_env(exp): import gym gym.undo_logger_setup() config = Config(**exp['config']) env = gym.make(exp['env_id']) if exp['policy']['type'] == "ESAtariPolicy": from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) return config, env
def testA2C(): render = False filename = 'testA2C.h5' resume = False # resume = True # render = True gym.undo_logger_setup() # Stop gym logging actionSpace = [2, 3] agent = A2C_OneGame(2, 1024, actionSpace, filename, resume=resume) game = Game('Pong-v0', agent, render=render, logfile='test.log') game.play()
def test(): render = False filename = 'test.h5' resume = False # filename = 'pong_gym_keras_mlp_full_batch.h5' # resume = True # render = True gym.undo_logger_setup() # Stop gym logging agent = KarpathyPolicyPong(filename, resume=resume) game = Game('Pong-v0', agent, render=render, logfile='test.log') game.play()
def test(): gym.undo_logger_setup() # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) model_dump_filepath = 'pretrained_models/ddpg_{}_weights.h5f'.format(ENV_NAME) agent = build_agent(env) agent.load_weights(model_dump_filepath) agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200)
def setup(exp, single_threaded): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) env = gym.make(exp['env_id']) if exp['env_id'].endswith('NoFrameskip-v4'): from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def load_network(env): ENV_NAME = 'Carom-v0' gym.undo_logger_setup() # Get the environment and extract the number of actions. np.random.seed(323) env.seed(323) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(16)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('linear')) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=50000, window_length=1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, gamma=.99, target_model_update=1e3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('ddpg_{}_2balls_final_weights_v4.h5f'.format(ENV_NAME)) return agent
def setup(exp, single_threaded): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) env = gym.make(exp['env_id']) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def make_multi_pong(frame_stack=True): import gym import retro from baselines.common.atari_wrappers import FrameStack gym.undo_logger_setup() game_env = env = retro.make('Pong-Atari2600', players=2) env = RetroALEActions(env, game_env.BUTTONS, n_players=2) env = NoReward(env) env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=False) if frame_stack: env = FrameStack(env, 4) return env
def make_multi_pong(frame_stack=True): import gym import retro gym.undo_logger_setup() game_env = env = retro.make("Pong-Atari2600", players=2) env = RetroALEActions(env, game_env.BUTTONS, n_players=2) env = NoReward(env) env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=False) if frame_stack: env = FrameStack(env, 4) return env
def setup_env(exp): import gym gym.undo_logger_setup() config = Config(**exp['config']) if exp['env_id'] == "DeceptivePointEnv-v0": # Lil hack we need to do import sys sys.path.append("../envs") import point_env env = gym.make(exp['env_id']) if exp['policy']['type'] == "ESAtariPolicy": from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) return config, env
def _parallel_worker(parent, env, seed, connection): gym.undo_logger_setup() env = gym.make(env) env.seed(seed) connection.send(parent._maybe_discretize(env.reset())) try: while True: action = connection.recv() state, reward, done, info = env.step(action) if done: state = env.reset() connection.send((parent._maybe_discretize(state), reward, done, info)) except KeyboardInterrupt: pass
def main(self): gym.undo_logger_setup() self.stats.start() self.dynamic_adjustment.start() if Config.PLAY_MODE: for trainer in self.trainers: trainer.enabled = False learning_rate_multiplier = (Config.LEARNING_RATE_END - Config.LEARNING_RATE_START) / Config.ANNEALING_EPISODE_COUNT beta_multiplier = (Config.BETA_END - Config.BETA_START) / Config.ANNEALING_EPISODE_COUNT while self.stats.episode_count.value < Config.EPISODES: step = min(self.stats.episode_count.value, Config.ANNEALING_EPISODE_COUNT - 1) self.model.learning_rate = Config.LEARNING_RATE_START + learning_rate_multiplier * step self.model.beta = Config.BETA_START + beta_multiplier * step # Saving is async - even if we start saving at a given episode, we may save the model at a later episode if Config.SAVE_MODELS and self.stats.should_save_model.value > 0: print("Saving GA3C model!") self.save_model() self.stats.should_save_model.value = 0 if self.reward_modifier: ################################ # START REWARD MODIFICATIONS # ################################ if not self.reward_modifier_q.empty(): source_id, done, path = self.reward_modifier_q.get() rewards = self.reward_modifier.predict_reward(path) if done: self.reward_modifier.path_callback(path) self.agents[source_id].wait_q.put(rewards) ################################ # END REWARD MODIFICATIONS # ################################ time.sleep(0.01) self.dynamic_adjustment.exit_flag = True while self.agents: self.remove_agent() while self.predictors: self.remove_predictor() while self.trainers: self.remove_trainer()
def env(self): if self._env is None: import gym gym.undo_logger_setup() # Get rid of gym logging # logger = logging.getLogger() # logger.addHandler(logging.StreamHandler(sys.stdout)) self.pre_init_hook() self._env = self._create_gym_env(self.OPEN_AI_GYM_ENV_NAME) if self.runner.run_mode == 'run' and not self.runner.render: # We only run rendering to video in "run" mode (not training mode) self._env = monitor.UniMonitor(self._env) return self._env
def getEnv(visualize=False): if ENV_TAG=='OSIM': e = env(difficulty=0, visualize=visualize, rewardMode=15, rewardScale=1., action_repetition=3, pel_min=0.6) elif ENV_TAG=='GYM' or 'ROBOSCHOOL': gym.undo_logger_setup() e = gym.make('RoboschoolInvertedPendulum-v1') #e.seed(0) if visualize: e = gv(e) else: e = None raise RuntimeError('No valid environment selected!') return e
def test_smoke(env_id): """Check that environments start up without errors and that we can extract rewards and observations""" gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) env = gym.make(env_id) env = wrappers.Unvectorize(env) if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES'): # Used to test universe-envs in CI configure_with_latest_docker_runtime_tag(env) else: env.configure(remotes=1) env.reset() _rollout(env, timestep_limit=60*30) # Check a rollout
def setup(exp, single_threaded): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) # env = gym.make(exp['env_id']) # if exp['env_id'].endswith('NoFrameskip-v4'): from .atari_wrappers import wrap_deepmind game_states = pd.read_csv("train_small.csv").values.tolist() env = wrap_deepmind('SonicTheHedgehog-Genesis', 'LabyrinthZone.Act1', game_states=game_states) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def make_mario_env(crop=True, frame_stack=True, clip_rewards=False): assert clip_rewards is False import gym import retro from baselines.common.atari_wrappers import FrameStack gym.undo_logger_setup() env = retro.make('SuperMarioBros-Nes', 'Level1-1') buttons = env.BUTTONS env = MarioXReward(env) env = FrameSkip(env, 4) env = ProcessFrame84(env, crop=crop) if frame_stack: env = FrameStack(env, 4) env = LimitedDiscreteActions(env, buttons) return env
def test_smoke(env_id): """Check that environments start up without errors and that we can extract rewards and observations""" gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) env = gym.make(env_id) env = wrappers.Unvectorize(env) if os.environ.get('FORCE_LATEST_UNIVERSE_DOCKER_RUNTIMES' ): # Used to test universe-envs in CI configure_with_latest_docker_runtime_tag(env) else: env.configure(remotes=1) env.reset() _rollout(env, timestep_limit=60 * 30) # Check a rollout
def train_cartpole_nnet(): #from test import CartPoleContEnv ENV_NAME = 'CartPole-v0' gym.undo_logger_setup() # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=60000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=60000, visualize=False, verbose=2) # get model weights weights = model.get_weights() # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True) return weights
def train_module(env, agent): """ chainerrlのモジュールによるtraining """ import logging import sys import gym gym.undo_logger_setup() # Turn off gym's default logger settings logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='') chainerrl.experiments.train_agent_with_evaluation( agent, env, steps=10000, # 合計10000ステップagentを動かす eval_n_runs=5, # 本番テストのたびに 5回評価を行う max_episode_len=200, # 1ゲームのステップ数 eval_frequency=1000, # 1000ステップごとに本番テストを行う outdir='agent/result') # Save everything to 'agent/result' directory
def get_new_env(env_name, cmdl): """Configure the training environment and return an instance.""" import logging import gym import gym_fast_envs # noqa from gym.wrappers import Monitor # Undo the default logger and configure a new one. gym.undo_logger_setup() logger = logging.getLogger() logger.setLevel(logging.WARNING) # Configure environment outdir = '/tmp/nec/%s-results' % cmdl.label env = gym.make(env_name) env = Monitor(env, directory=outdir, force=True, video_callable=False) env.seed(cmdl.seed) return env
def setup(exp, single_threaded): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) if exp['env_id'] == "DeceptivePointEnv-v0": # Lil hack we need to do import sys sys.path.append("../envs") import point_env env = gym.make(exp['env_id']) if exp['policy']['type'] == "ESAtariPolicy": from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def setup_env(exp): import gym gym.undo_logger_setup() config = Config(**exp['config']) if not isinstance(exp['env_id'], (list, )): env = gym.make(exp['env_id']) if exp['policy']['type'] == "ESAtariPolicy": from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) return config, [env] envs = [] for env_id in exp['env_id']: env = gym.make(env_id) if exp['policy']['type'] == "ESAtariPolicy": from .atari_wrappers import wrap_deepmind env = wrap_deepmind(env) envs.append(env) return config, envs
def setup(exp, single_threaded, snapshot_file=None): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) env = gym.make(exp['env_id']) session = make_session(single_threaded=single_threaded) if snapshot_file is None: policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) running_from_snapshot = False else: print("[master] Initializing agent weights using a snapshot...") policy = getattr(policies, exp['policy']['type']).Load(snapshot_file) running_from_snapshot = True tf_util.initialize() return config, env, session, policy, running_from_snapshot
def __init__(self, log): """Initialize default configuration.""" # some libraries think it is a good idea to add handlers by default # without documenting that at all, thanks gpy... log.propagate = False self.log = log self.n_jobs = 1 self.monitor_verbosity = 0 self._stream_handler = None self._file_handler = None self._fmt = ('%(process)d - %(asctime)s - %(name)s - %(levelname)s' + ' - %(message)s') self._formatter = logging.Formatter(self._fmt) try: import gym gym.undo_logger_setup() except: pass
def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0) vnc_env = spec.make() if vnc_env.metadata.get('configure.required', False): vnc_env.configure(remotes=1) vnc_env = wrapper(vnc_env) vnc_env = wrappers.Unvectorize(vnc_env) env = gym.make(spec._kwargs['gym_core_id']) env.seed(0) vnc_env.seed(0) # Check that reset observations work reset(matcher, env, vnc_env, stage='initial reset') # Check a full rollout rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') # Reset to start a new episode reset(matcher, env, vnc_env, stage='reset to new episode') # Check that a step into the next episode works rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') # Make sure env can be reseeded env.seed(1) vnc_env.seed(1) reset(matcher, env, vnc_env, 'reseeded reset') rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
def env_factory(cmdl, mode): # Undo the default logger and configure a new one. gym.undo_logger_setup() logger = logging.getLogger() logger.setLevel(logging.WARNING) print(clr("[Main] Constructing %s environment." % mode, attrs=['bold'])) env = gym.make(cmdl.env_name) if hasattr(cmdl, 'rescale_dims'): state_dims = (cmdl.rescale_dims, cmdl.rescale_dims) else: state_dims = env.observation_space.shape[0:2] env_class, hist_len, cuda = cmdl.env_class, cmdl.hist_len, cmdl.cuda if mode == "training": env = PreprocessFrames(env, env_class, hist_len, state_dims, cuda) if hasattr(cmdl, 'reward_clamp') and cmdl.reward_clamp: env = SqueezeRewards(env) if hasattr(cmdl, 'done_after_lost_life') and cmdl.done_after_lost_life: env = DoneAfterLostLife(env) print('-' * 50) return env elif mode == "evaluation": if cmdl.eval_env_name != cmdl.env_name: print( clr("[%s] Warning! evaluating on a different env: %s" % ("Main", cmdl.eval_env_name), 'red', attrs=['bold'])) env = gym.make(cmdl.eval_env_name) env = PreprocessFrames(env, env_class, hist_len, state_dims, cuda) env = EvaluationMonitor(env, cmdl) print('-' * 50) return env
def setup(exp, single_threaded): import gym gym.undo_logger_setup() from . import policies, tf_util config = Config(**exp['config']) if 'env_id' in exp: env = gym.make(exp['env_id']) elif 'env_target' in exp: env_target = exp['env_target'] (module_str, cls_str) = env_target.split(":") module = importlib.import_module(module_str) cls = getattr(module, cls_str) env = cls(**exp['env_params']) else: raise NotImplementedError sess = make_session(single_threaded=single_threaded) policy = getattr(policies, exp['policy']['type'])(env.observation_space, env.action_space, **exp['policy']['args']) tf_util.initialize() return config, env, sess, policy
def test_nice_vnc_semantics_match(spec, matcher, wrapper): # Check that when running over VNC or using the raw environment, # semantics match exactly. gym.undo_logger_setup() logging.getLogger().setLevel(logging.INFO) spaces.seed(0) vnc_env = spec.make() if vnc_env.metadata.get('configure.required', False): vnc_env.configure(remotes=1) vnc_env = wrapper(vnc_env) vnc_env = wrappers.Unvectorize(vnc_env) env = gym.make(spec._kwargs['gym_core_id']) env.seed(0) vnc_env.seed(0) # Check that reset observations work reset(matcher, env, vnc_env, stage='initial reset') # Check a full rollout rollout(matcher, env, vnc_env, timestep_limit=50, stage='50 steps') # Reset to start a new episode reset(matcher, env, vnc_env, stage='reset to new episode') # Check that a step into the next episode works rollout(matcher, env, vnc_env, timestep_limit=1, stage='1 step in new episode') # Make sure env can be reseeded env.seed(1) vnc_env.seed(1) reset(matcher, env, vnc_env, 'reseeded reset') rollout(matcher, env, vnc_env, timestep_limit=1, stage='reseeded step')
from copy import deepcopy import torch import gym from normalized_env import * # from pybullet_envs.bullet.racecarGymEnv import RacecarGymEnv # from pybullet_envs.bullet.kukaGymEnv import KukaGymEnv from evaluator import Evaluator from ddpg import DDPG from util import * from tensorboardX import SummaryWriter from observation_processor import queue from multi import fastenv # from llll import Subprocess gym.undo_logger_setup() import time writer = SummaryWriter() def train(num_iterations, agent, env, evaluate, bullet): fenv = fastenv(env, args.action_repeat, args.vis, args.atari) window_length = args.window_length validate_interval = args.validate_interval save_interval = args.save_interval max_episode_length = args.max_episode_length // args.action_repeat debug = args.debug visualize = args.vis traintimes = args.traintimes output = args.output