def _get_optimizer(self, name): from tensorpack.tfutils import optimizer from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip, MapGradient init_lr = INIT_LEARNING_RATE_A if name == 'actor' else INIT_LEARNING_RATE_C import tensorpack.tfutils.symbolic_functions as symbf lr = symbf.get_scalar_var('learning_rate/' + name, init_lr, summary=True) opt = tf.train.AdamOptimizer(lr) logger.info("create opt {}".format(name)) if name == 'critic': gradprocs = [ MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05), regex='^critic/.*') ] elif name == 'actor': gradprocs = [ MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1), regex='^actor/.*') ] else: assert (0) gradprocs.append(SummaryGradient()) opt = optimizer.apply_grad_processors(opt, gradprocs) return opt
def do_rollout(env, action_function): """ Builds a path by running through an environment using a provided function to select actions. """ obs, rewards, actions, human_obs, distances = [], [], [], [], [] max_timesteps_per_episode = 10000 raw_ob = env.reset() logger.info('get obs {}'.format(raw_ob.shape)) ob = raw_ob[:-1] distance = raw_ob[-1] # Primary environment loop for i in range(max_timesteps_per_episode): action = action_function(env, ob) obs.append(ob) raw_ob, action, reward, done = env.step((action, 0., [0., 0.], [0., 0.])) ob = raw_ob[0:-1] distance = raw_ob[-1] # logger.info('agent {} running at distance {}'.format(env._agentIdent,distance)) actions.append(action) rgb = env._cur_screen # logger.info("[{:04d}: step".format(env._agentIdent)) rewards.append(reward) human_obs.append(rgb) distances.append(distance) if done: break # Build path dictionary path = { "obs": np.array(obs), "original_rewards": np.array(rewards), "actions": np.array(actions), "human_obs": np.array(human_obs), 'distances': np.array((distances)) } return path
def write_segment_to_video(segment, fname): # logger.info("write_segment_to_video = {}".format(fname)) os.makedirs(osp.dirname(fname), exist_ok=True) frames = segment["human_obs"] logger.info("frames = {}".format(frames.shape)) # for i in range(int(env.fps * 0.2)): # frames.append(frames[-1]) export_video(frames, fname, fps=24)
def convert_segment_to_media_url(self, comparison_uuid, side, segment): tmp_media_dir = '/tmp/rl_teacher_media' logger.info('processing : {}'.format(comparison_uuid)) media_id = "%s-%s.mp4" % (comparison_uuid, side) local_path = osp.join(tmp_media_dir, media_id) self._upload_workers.apply_async(_write_and_upload_video, (local_path, segment)) local_ip=get_ip() media_url = "http://%s:5000/%s" % (local_ip,media_id) return media_url
def make_observaton(self, raw_obs): if self.vision is False: names = ['focus', 'speedX', 'speedY', 'speedZ', 'angle', 'damage', 'opponents', 'rpm', 'track', 'trackPos', 'wheelSpinVel', 'distFromStart'] from drlutils.utils import logger logger.info('distance from start {}'.format(raw_obs['distFromStart'] )) logger.info('distRaced {}'.format(raw_obs['distRaced'])) Observation = col.namedtuple('Observaion', names) return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., speedX=np.array(raw_obs['speedX'], dtype=np.float32)/300.0, speedY=np.array(raw_obs['speedY'], dtype=np.float32)/300.0, speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/300.0, angle=np.array(raw_obs['angle'], dtype=np.float32)/3.1416, damage=np.array(raw_obs['damage'], dtype=np.float32), opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200., rpm=np.array(raw_obs['rpm'], dtype=np.float32)/10000, track=np.array(raw_obs['track'], dtype=np.float32)/200., trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1., wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32), distFromStart=np.array(raw_obs['distFromStart'],dtype=np.float32)) else: assert(0) names = ['focus', 'speedX', 'speedY', 'speedZ', 'angle', 'opponents', 'rpm', 'track', 'trackPos', 'wheelSpinVel', 'img'] Observation = col.namedtuple('Observaion', names) # Get RGB from observation image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed, speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed, speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed, opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200., rpm=np.array(raw_obs['rpm'], dtype=np.float32), track=np.array(raw_obs['track'], dtype=np.float32)/200., trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1., wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32), img=image_rgb)
data_io, # PeriodicTrigger(Evaluator( # EVAL_EPISODE, ['state'], ['policy'], get_player), # every_k_epochs=3), ] + evaluators, session_creator=sesscreate.NewSessionCreator( config=get_default_sess_config(0.5)), steps_per_epoch=STEPS_PER_EPOCH, max_epoch=1000, ) if __name__ == '__main__': if args['train']: from drlutils.utils import logger logger.info("Begin train task") from ad_cur.autodrive.agent.pool import AgentPool, AgentPoolFake clsPool = AgentPoolFake if args['--fake_agent'] else AgentPool from drlutils.evaluator.evaluator import EvaluatorBase class Evaluator(EvaluatorBase): def _init(self): super(Evaluator, self)._init() if args['--gpu']: os.environ['CUDA_VISIBLE_DEVICES'] = ','.join( sorted(list(set(args['--gpu'].split(','))))) # os.system('killall -9 torcs-bin') dirname = '/tmp/torcs/trainlog' from tensorpack.utils import logger