def main(): #remote env env = grc.RemoteEnv('tmp/sock') #FIXME: DEBUG #import retro #env = retro.make(game='SonictheHedgehog-Genesis', state='GreenHillZone.Act1') #load the policy name = 'learner_global' state = process_state(env.reset()) test_policy = Policy(state.shape, env.action_space.n, name, act_int=False, recover=True, sess=tf.Session(), pull_scope=name) #run the env lstm_state = test_policy.lstm_init_state while True: action, _, _, lstm_state = test_policy.act(state, lstm_state, explore=False) state, reward, done, _ = env.step(action) state = process_state(state) if done: env.reset()
def make_remote_env(stack=2, scale_rew=True, color=False, exp_type='obs', exp_const=0.002, socket_dir='/tmp'): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv(socket_dir) env = BackupOriginalData(env) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env, color) if exp_const > 0: if exp_type == 'obs': env = ObsExplorationReward(env, exp_const, game_specific=False) elif exp_type == 'x': env = XExplorationReward(env, exp_const, game_specific=False) if stack > 1: env = FrameStack(env, stack) env = ScaledFloatFrame(env) env = EpisodeInfo(env) return env
def make_vec_env(extra_wrap_fn=None): if 'RETRO_ROOT_DIR' in os.environ: subenv_ids, subenvs = build_envs(extra_wrap_fn=extra_wrap_fn) return SubprocessVecEnv(zip(subenv_ids, subenvs)) else: return DummyVecEnv([('tmp/sock', lambda: wrap_env( grc.RemoteEnv('tmp/sock'), extra_wrap_fn=extra_wrap_fn))])
def main(): """Run JERK on the attached environment.""" env = grc.RemoteEnv('tmp/sock') # env = make(game='SonicTheHedgehog-Genesis', # state='GreenHillZone.Act1', # scenario='scenario.json') # ) env = TrackedEnv(env) new_ep = True solutions = [] while True: if new_ep: if (solutions and random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS): solutions = sorted(solutions, key=lambda x: np.mean(x[0])) best_pair = solutions[-1] new_rew = exploit(env, best_pair[1]) best_pair[0].append(new_rew) print('replayed best with reward %f' % new_rew) continue else: env.reset() new_ep = False rew, new_ep = move(env, 100, False, 0.1) env.render() if not new_ep and rew <= 50: print('backtracking due to negative reward: %f' % rew) _, new_ep = move(env, 200, left=True) if new_ep: solutions.append(([max(env.reward_history)], env.best_sequence()))
def main(): """Run JERK on the attached environment.""" env = grc.RemoteEnv('tmp/sock') env = TrackedEnv(env) new_ep = True solutions = [] while True: if new_ep: if (solutions and random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS): solutions = sorted(solutions, key=lambda x: np.mean(x[0])) best_pair = solutions[-1] new_rew = exploit(env, best_pair[1]) best_pair[0].append(new_rew) print('replayed best with reward %f' % new_rew) continue else: env.reset() new_ep = False rew, new_ep = move(env, 100) if not new_ep and rew <= 0: print('backtracking due to negative reward: %f' % rew) _, new_ep = move(env, 70, left=True) if new_ep: solutions.append(([max(env.reward_history)], env.best_sequence()))
def make_sonic_env( game, state, remote_env=False, scale_rew=True, video_dir="", short_life=False, backtracking=False, ): """ Create an environment with some standard wrappers. """ if remote_env: env = grc.RemoteEnv("tmp/sock") else: env = make(game=game, state=state, bk2dir=video_dir) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) # if stack: # env = FrameStack(env, 4) if short_life: env = ShortLife(env) if backtracking: env = AllowBacktracking(env) return env
def __init__(self, is_remote, game, state, max_timesteps, env_wrapper, do_render, monitor_path): if is_remote: import gym_remote.client as grc self.env = env_wrapper(grc.RemoteEnv('tmp/sock'), float('inf'), do_render, monitor_path) else: from retro_contest.local import make self.env = env_wrapper(make(game, state), max_timesteps, do_render, monitor_path)
def main(): # Set up a new TrackedEnv that can keep track of total timestamps and store # previous best solutions. env = grc.RemoteEnv('tmp/sock') env = TrackedEnv(env) # new_ep will keep track of if a new episode should be started. new_ep = True # solutions is an array of successful gameplay sequences as well as the # total reward earned from them. solutions = [] while True: if new_ep: if (solutions and random.random() < EXPLOIT_BIAS + env.total_steps_ever / TOTAL_TIMESTEPS): solutions = sorted(solutions, key=lambda x: np.mean(x[0])) best_pair = solutions[-1] new_rew = exploit(env, best_pair[1]) best_pair[0].append(new_rew) print('replayed best with reward %f' % new_rew) continue else: env.reset() new_ep = False rew, new_ep = move(env, 100) if not new_ep and rew <= 0: print('backtracking due to negative reward: %f' % rew) _, new_ep = move(env, 70, left=True) if new_ep: solutions.append(([max(env.reward_history)], env.best_sequence()))
def make_env(stack=True, scale_rew=True, local=False, level_choice=None): """ Create an environment with some standard wrappers. """ print(stack, scale_rew, local) if local: # Select Random Level if local from retro_contest.local import make levels = [ 'SpringYardZone.Act3', 'SpringYardZone.Act2', 'GreenHillZone.Act3', 'GreenHillZone.Act1', 'StarLightZone.Act2', 'StarLightZone.Act1', 'MarbleZone.Act2', 'MarbleZone.Act1', 'MarbleZone.Act3', 'ScrapBrainZone.Act2', 'LabyrinthZone.Act2', 'LabyrinthZone.Act1', 'LabyrinthZone.Act3' ] if not level_choice: level_choice = levels[random.randrange(0, 13, 1)] else: level_choice = levels[level_choice] env = make(game='SonicTheHedgehog-Genesis', state=level_choice) else: print('connecting to remote environment') env = grc.RemoteEnv('tmp/sock') print('starting episode') if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def get_environment(): if is_local: from retro_contest.local import make env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1') else: import gym_remote.exceptions as gre import gym_remote.client as grc env = grc.RemoteEnv('tmp/sock') return env
def main(): #env = grc.RemoteEnv('tmp/sock') #env_data = get_training_envs() if len(sys.argv) < 4: print('Usage: python jerk_agent_train.py game state process_count') sys.exit() game = sys.argv[1] state = sys.argv[2] process_count = sys.argv[3] sockets = [] for index in range(int(process_count)): #game, state = random.choice(env_data) # retro-contest-remote run -s tmp/sock -m monitor -d SonicTheHedgehog-Genesis GreenHillZone.Act1 state_directory_name = state + '-' + str(index) base_dir = './remotes/' os.makedirs(base_dir, exist_ok=True) os.makedirs(base_dir + state_directory_name, exist_ok=True) socket_dir = base_dir + "{}/sock".format(state_directory_name) os.makedirs(socket_dir, exist_ok=True) monitor_dir = base_dir + "{}/monitor".format(state_directory_name) os.makedirs(monitor_dir, exist_ok=True) subprocess.Popen([ "retro-contest-remote", "run", "-s", socket_dir, '-m', monitor_dir, '-d', game, state ], stdout=subprocess.PIPE) print('launched {} ({})'.format(state, index)) sockets.append(socket_dir) #envs.append(lambda: make_env(socket_dir=state + '/sock')) print('remote processes launched') #env = lambda: make_training_env('SonicTheHedgehog-Genesis', 'GreenHillZone.Act1', stack=True, scale_rew=True) #env = MultigameEnvWrapper #load_path = '/root/compo/trained_on_images_nature_cnn.joblib' #load_path = './saved_weights.joblib' #logger.configure(dir='./logs', format_strs=['stdout', 'tensorboard']) print('training...') global_solutions = [] agents = [] for socket_dir in sockets: env = grc.RemoteEnv(socket_dir) agent = JerkAgent(env, global_solutions) agents.append(agent) for agent in agents: agent.start() print('Created {} agents'.format(len(agents))) for agent in agents: agent.join()
def make_env(stack=True, scale_rew=True, frame_wrapper=WarpFrame, reward_type=None): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') return wrap_env(env, stack, scale_rew, frame_wrapper, reward_type)
def get_environment(environment): """ Return a local or remote environment as requested """ if environment in ['aws', 'local']: from retro_contest.local import make env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1') else: import gym_remote.exceptions as gre import gym_remote.client as grc env = grc.RemoteEnv('tmp/sock') return env
def make_env(extra_wrap_fn=None): if 'RETRO_RECORD' in os.environ: from retro_contest.local import make game = os.environ['RETRO_GAME'] state = os.environ['RETRO_STATE'] env_id = game + "-" + state env = make(game=game, state=state, bk2dir=os.environ['RETRO_RECORD']) else: env_id = 'tmp/sock' env = grc.RemoteEnv('tmp/sock') return env_id, wrap_env(env)
def create_env(env_name, env_state, contest=False, human=False): if human: env = SonicDiscretizer( retro.make(env_name, env_state, scenario="contest", use_restricted_actions=retro.ACTIONS_FILTERED)) elif not contest: env = SonicDiscretizer(make(env_name, env_state)) else: env = SonicDiscretizer(grc.RemoteEnv('tmp/sock')) return env
def main(): # Connect to remote environment (from simple-agent.py) print('connecting to remote environment') env = grc.RemoteEnv('tmp/sock') print('starting episode') env.reset() # This bot is just a kludge that tries to remember things if it moves forward, and tries to remember the highest reward sequence of actions. #obs = env.reset() c = 0 lenActions = len(env.action_space.sample()) jerkQueue = np.reshape(env.action_space.sample(), (1, lenActions)) myActions = jerkQueue oldX = 0. bestReward = 0. rewAcc = 0. numRuns = 0. avgReward = 0. while True: if (c < len(jerkQueue)): act = jerkQueue[c, ...] else: act = env.action_space.sample() #if (np.random.random()>0.05): act[7] = 1 obs, reward, done, info = env.step(act) rewAcc += reward if (reward > 0): # Remember action when rewarded myActions = np.append(myActions, np.reshape(act, (1, lenActions)), axis=0) c += 1 if done: print('episode complete') if (rewAcc > bestReward): bestReward = rewAcc jerkQueue = myActions #print('new best reward: %.3f based on a %i action JERK queue'%(bestReward,len(jerkQueue))) avgReward = 0.95 * avgReward + 0.05 * rewAcc #print('avg. reward: %.3f, acc. reward: %.3f, best reward: %.3f, current JERK queue %i vs actions length %i'%(avgReward, rewAcc, bestReward, len(jerkQueue), len(myActions))) #Attenuate best reward (to diminish the effect of lucky runs) bestReward *= 0.95 #* bestReward env.reset() rewAcc = 0. numRuns += 1 oldX = 0. c = 0 myActions = np.reshape(env.action_space.sample(), (1, lenActions))
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') env = CustomSonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def main(): print('connecting to remote environment') env = grc.RemoteEnv('tmp/sock') print('starting episode') env.reset() while True: action = env.action_space.sample() action[7] = 1 _, _, done, _ = env.step(action) if done: print('episode complete') env.reset()
def make_remote_env(stack=True, scale_rew=True, socket_dir='/tmp'): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv(socket_dir) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) env = EpisodeInfo(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') # env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_batched_env(extra_wrap_fn=None): if 'RETRO_ROOT_DIR' in os.environ: subenv_ids, subenvs = build_envs(extra_wrap_fn=extra_wrap_fn) env = batched_gym_env(subenvs, sync=False) #env = BatchedGymEnv([[subenv() for subenv in subenvs]]) env.env_ids = subenv_ids return env else: env = BatchedGymEnv([[ wrap_env(grc.RemoteEnv('tmp/sock'), extra_wrap_fn=extra_wrap_fn) ]]) env.env_ids = ['tmp/sock'] return env
def make_env(game, state, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ #env = AllowBacktracking(make(game=game, state=state)) env = AllowBacktracking(grc.RemoteEnv('tmp/sock')) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) #env = WarpFrame(env) # Disabling FrameStack because it currently uses a different Box. (Box(0,255,w,h) compared to Box(0,1,w,h) #if stack: # env = FrameStack(env, 4) env = CustomWarpFrame(env) env = NormalizedEnv(env) return env
def main(): print('connecting to remote environment') env = grc.RemoteEnv('tmp/sock') print('starting episode') env.reset() while True: rewAcc = 0 action = env.action_space.sample() action[7] = 1 ob, reward, done, _ = env.step(action) rewAcc += reward if done: print('episode complete with reward = %.3f'%rewAcc) env.reset()
def make(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ #env = retro.make(game, state) print("Connecting to remote environment") env = grc.RemoteEnv('/root/compo/tmp/sock') print("Starting preproc on env") env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def launch_env(game, state): #game, state = random.choice(env_data) # retro-contest-remote run -s tmp/sock -m monitor -d SonicTheHedgehog-Genesis GreenHillZone.Act1 base_dir = './remotes/' os.makedirs(base_dir, exist_ok=True) os.makedirs(base_dir + state, exist_ok=True) socket_dir = base_dir + "{}/sock".format(state) os.makedirs(socket_dir, exist_ok=True) monitor_dir = base_dir + "{}/monitor".format(state) os.makedirs(monitor_dir, exist_ok=True) subprocess.Popen([ "retro-contest-remote", "run", "-s", socket_dir, '-m', monitor_dir, '-d', game, state ], stdout=subprocess.PIPE) return grc.RemoteEnv(socket_dir)
def make_sonic_test(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') #env_id = "SonicTheHedgehog-Genesis,GreenHillZone.Act1" #game, state = env_id.split(',') #env = make(game=game, state=state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def main(): """Run JERK on the attached environment.""" env = grc.RemoteEnv('tmp/sock') env = SonicDiscretizer(env) env = RewardScaler(env) env = WarpFrame(env) env = FrameStack(env, 4) print('action space') print(env.action_space) print('observation space') print(env.observation_space) agent = JerkAgent(env) agent.train() return
def test(): agent = CNQAgent(12, AGENT_NAME) agent.load_model() agent.epsilon = 0.1 print('connecting to remote environment') env = grc.RemoteEnv('tmp/sock') # env = make(game='SonicTheHedgehog-Genesis', state='SpringYardZone.Act3') print('starting episode') state = env.reset() reward = 0 action = np.zeros(12) while True: state = np.reshape(state, (1, 224, 320, 3)) action = agent.act(state, action, reward) state, reward, done, _ = env.step(action) if done: print('episode complete') env.reset()
def main(): """Run JERK on the attached environment.""" env = grc.RemoteEnv('tmp/sock') #env = launch_env('SonicTheHedgehog-Genesis', 'SpringYardZone.Act1') env = SonicDiscretizer(env) env = RewardScaler(env) env = WarpFrame(env) env = FrameStack(env, 4) print('action space') print(env.action_space) print('observation space') print(env.observation_space) agent = JerkAgent(env) agent.train() return