def get_player(viz=False, train=False, dumpdir=None): if PC_METHOD and train: pl = GymEnv(ENV_NAME, dumpdir=dumpdir, pc_method=PC_METHOD) else: pl = GymEnv(ENV_NAME, dumpdir=dumpdir) def resize(img): return cv2.resize(img, IMAGE_SIZE) def grey(img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = resize(img) img = img[:, :, np.newaxis] return img pl = MapPlayerState(pl, grey) #show_images(pl.current_state()) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if not train: pl = HistoryFramePlayer(pl, FRAME_HISTORY) pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None): #TODO: (Next Plan)idea1 use CNN as features of our density model #TODO: idea1.5 clear counter in some intermeidate points #TODO: (on EXP now)idea2 time increasing with psuedo reward. IF the pseudo reward is less than a threshold (e.g.,0.01) for most of the states, increase the pseudo reward. #TODO: (on EXP now)Not decrease Explore Factor after several epochs. The exp results show not enough exploration afterwards. But the scores are remained greatly. #TODO: (Read more papers)idea2.5: Intuition from people. Exploration and Exploitation modes. Remember the good rewards and turn into Exploitation modes, explore other possibilities. #TODO: (Done)Evaluate with policy probability if PC_METHOD and train: pl = GymEnv(ENV_NAME, dumpdir=dumpdir, pc_method=PC_METHOD, pc_mult=PC_MULT, pc_thre=PC_THRE, pc_time=PC_TIME, feature=FEATURE, pc_action=PC_ACTION, pc_downsample_value=PC_DOWNSAMPLE_VALUE, pc_clean=PC_CLEAN, UCB1=UCB1) else: pl = GymEnv(ENV_NAME, dumpdir=dumpdir) def resize(img): return cv2.resize(img, IMAGE_SIZE) def grey(img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = resize(img) img = img[:, :, np.newaxis] return img pl = MapPlayerState(pl, grey) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() pl = HistoryFramePlayer(pl, FRAME_HISTORY) #if not train: # pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None): # pl = None # if ENV_NAME == "MultiRoomEnv": # print "An awful hack for the moment" # pl = nmt.NonMarkovEnvironment() # else: pl = GymEnv(ENV_NAME, dumpdir=dumpdir) # print "HAS ATTR:{}".format(hasattr(pl, "experiment_id")) if EXPERIMENT_ID != None and hasattr(pl.gymenv, "set_experiment_id"): pl.gymenv.set_experiment_id(EXPERIMENT_ID) pl = MapPlayerState(pl, EXPERIMENT_MODEL.get_screen_processor()) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if hasattr(EXPERIMENT_MODEL, "get_history_processor"): pl = HistoryFramePlayer(pl, FRAME_HISTORY, EXPERIMENT_MODEL.get_history_processor()) else: pl = HistoryFramePlayer( pl, FRAME_HISTORY, ) if not train: pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir) global NUM_ACTIONS global IMAGE_SHAPE3 global FRAME_HISTORY NUM_ACTIONS = pl.get_action_space().num_actions() def resize(img): return cv2.resize(img, IMAGE_SIZE) def grey(img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = resize(img) img = img[:, :, np.newaxis] / 255.0 return img if OBJECT_METHOD == 'swap_input_combine': #1 # swap the input with combined object image FRAME_HISTORY = 4 IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY,) pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize) #pl = HistoryFramePlayer(pl, FRAME_HISTORY) #show_images(pl.current_state()) if OBJECT_METHOD == 'add_input_combine': #2 # add the input with combined object image for each history FRAME_HISTORY = 4 IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY * 2,) pl = MapPlayerState(pl, grey) pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize) #pl = HistoryFramePlayer(pl, FRAME_HISTORY) #show_images(pl.current_state()) if OBJECT_METHOD == 'add_input_separate': #3 # For the current state, add the object images # (obj1_his1, obj2_his1, cur_his1, obj1_his2, ... ) # For each image, use the grey scale image, and resize it to 84 * 84 FRAME_HISTORY = 4 IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY * (len(TEMPLATE_MATCHER.index2obj)+1),) pl = MapPlayerState(pl, grey) pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize) #show_images(pl.current_state()) if OBJECT_METHOD == 'swap_input_separate': #4 # swap the input images with object images # (obj1_his1, obj2_his1, obj1_his2, obj2_his2, ... ) # TODO: If we need to add walls # Interesting thing is we don't have any wall info here FRAME_HISTORY = 4 IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY * len(TEMPLATE_MATCHER.index2obj),) pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize) if not train: pl = HistoryFramePlayer(pl, FRAME_HISTORY) pl = PreventStuckPlayer(pl, 30, 1) #show_images(pl.current_state()) pl = LimitLengthPlayer(pl, 40000) #show_images(pl.current_state()) #exit() return pl
def get_player(dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False) pl = MapPlayerState(pl, grey) pl = MapPlayerState(pl, resize) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() pl = HistoryFramePlayer(pl, FRAME_HISTORY) #show_images(pl.current_state()) return pl
def get_player(dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False) pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1])) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() pl = HistoryFramePlayer(pl, FRAME_HISTORY) return pl
def get_player(viz=False, train=False, dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir) def func(img): return cv2.resize(img, IMAGE_SIZE[::-1]) pl = MapPlayerState(pl, func) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if not train: pl = HistoryFramePlayer(pl, FRAME_HISTORY) pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir) def func(img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #img = cv2.resize(img, IMAGE_SIZE[::]) return img pl = MapPlayerState(pl, func) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir) #def func(img): # return cv2.resize(img, IMAGE_SIZE[::-1]) #pl = MapPlayerState(pl, func) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if not train: pass pl = HistoryFramePlayer(pl, FRAME_HISTORY) #pl = PreventStuckPlayer(pl, 30, 1) #TODO: I think we don't need this in freeway. Is any bug in this code? didn't see repeated actions. pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None, videofile=None, length=120): gpl = GymEnv(args.env, dumpdir=dumpdir, record=videofile, length=length) def func(img): return cv2.resize(img, IMAGE_SIZE[::-1]) pl = MapPlayerState(gpl, func) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() pl = HistoryFramePlayer(pl, FRAME_HISTORY) if not train: pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl, gpl
def get_player(viz=False, train=False, dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir) def func(img): return cv2.resize( img, IMAGE_SIZE[::-1] ) #TODO: Do we really need to resize here? Check the original paper. pl = MapPlayerState(pl, func) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if not train: # When testing pl = HistoryFramePlayer(pl, FRAME_HISTORY) #pl = PreventStuckPlayer(pl, 30, 1) #TODO: Need to know the start button. Is it different for each game? pl = LimitLengthPlayer(pl, 30000) # 500s return pl
def get_player(dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False) def resize(img): return cv2.resize(img, IMAGE_SIZE) def grey(img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img = resize(img) img = img[:, :, np.newaxis] return img pl = MapPlayerState(pl, grey) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() pl = HistoryFramePlayer(pl, FRAME_HISTORY) return pl
def get_player(dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir, force=True) pl = MapPlayerState(pl, EXPERIMENT_MODEL.get_screen_processor()) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if hasattr(EXPERIMENT_MODEL, "get_history_processor"): pl = HistoryFramePlayer(pl, FRAME_HISTORY, EXPERIMENT_MODEL.get_history_processor()) else: pl = HistoryFramePlayer( pl, FRAME_HISTORY, ) if not train: pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl
def get_player(viz=False, train=False, dumpdir=None): pl = GymEnv(ENV_NAME, dumpdir=dumpdir) #TODO: Preprocessing goes here def func(img): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #img = img[:,:,0] #img = cv2.resize(img, IMAGE_SIZE[::]) return img pl = MapPlayerState(pl, func) global NUM_ACTIONS NUM_ACTIONS = pl.get_action_space().num_actions() if not train: pass #pl = HistoryFramePlayer(pl, FRAME_HISTORY) #pl = PreventStuckPlayer(pl, 30, 1) pl = LimitLengthPlayer(pl, 40000) return pl