def get_player(viz=False, train=False, dumpdir=None):
    # pl = None
    # if ENV_NAME == "MultiRoomEnv":
    #     print "An awful hack for the moment"
    #     pl = nmt.NonMarkovEnvironment()
    # else:
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
    # print "HAS ATTR:{}".format(hasattr(pl, "experiment_id"))
    if EXPERIMENT_ID != None and hasattr(pl.gymenv, "set_experiment_id"):
        pl.gymenv.set_experiment_id(EXPERIMENT_ID)

    pl = MapPlayerState(pl, EXPERIMENT_MODEL.get_screen_processor())

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()

    if hasattr(EXPERIMENT_MODEL, "get_history_processor"):
        pl = HistoryFramePlayer(pl, FRAME_HISTORY,
                                EXPERIMENT_MODEL.get_history_processor())
    else:
        pl = HistoryFramePlayer(
            pl,
            FRAME_HISTORY,
        )
    if not train:
        pl = PreventStuckPlayer(pl, 30, 1)
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 2
0
def get_player(viz=False, train=False, dumpdir=None):
    if PC_METHOD and train:
        pl = GymEnv(ENV_NAME, dumpdir=dumpdir, pc_method=PC_METHOD)
    else:
        pl = GymEnv(ENV_NAME, dumpdir=dumpdir)

    def resize(img):
        return cv2.resize(img, IMAGE_SIZE)

    def grey(img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = resize(img)
        img = img[:, :, np.newaxis]
        return img

    pl = MapPlayerState(pl, grey)
    #show_images(pl.current_state())

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()
    if not train:
        pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        pl = PreventStuckPlayer(pl, 30, 1)
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 3
0
def get_player(viz=False, train=False, dumpdir=None):
    #TODO: (Next Plan)idea1 use CNN as features of our density model
    #TODO: idea1.5 clear counter in some intermeidate points
    #TODO: (on EXP now)idea2 time increasing with psuedo reward.  IF the pseudo reward is less than a threshold (e.g.,0.01) for most of the states, increase the pseudo reward.
    #TODO: (on EXP now)Not decrease Explore Factor after several epochs. The exp results show not enough exploration afterwards. But the scores are remained greatly.
    #TODO: (Read more papers)idea2.5: Intuition from people. Exploration and Exploitation modes. Remember the good rewards and turn into Exploitation modes, explore other possibilities.
    #TODO: (Done)Evaluate with policy probability
    if PC_METHOD and train:
        pl = GymEnv(ENV_NAME, dumpdir=dumpdir, pc_method=PC_METHOD, pc_mult=PC_MULT, pc_thre=PC_THRE, pc_time=PC_TIME, feature=FEATURE, pc_action=PC_ACTION, pc_downsample_value=PC_DOWNSAMPLE_VALUE, pc_clean=PC_CLEAN, UCB1=UCB1)
    else:
        pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
    def resize(img):
        return cv2.resize(img, IMAGE_SIZE)
    def grey(img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = resize(img)
        img = img[:, :, np.newaxis]
        return img
    pl = MapPlayerState(pl, grey)

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()

    pl = HistoryFramePlayer(pl, FRAME_HISTORY)
    #if not train:
    #    pl = PreventStuckPlayer(pl, 30, 1)
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 4
0
def get_player(viz=False, train=False, dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
    global NUM_ACTIONS
    global IMAGE_SHAPE3
    global FRAME_HISTORY
    NUM_ACTIONS = pl.get_action_space().num_actions()
    def resize(img):
        return cv2.resize(img, IMAGE_SIZE)
    def grey(img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = resize(img)
        img = img[:, :, np.newaxis] / 255.0
        return img
    if OBJECT_METHOD == 'swap_input_combine': #1
        # swap the input with combined object image
        FRAME_HISTORY = 4
        IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY,)
        pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize)
        #pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        #show_images(pl.current_state())

    if OBJECT_METHOD == 'add_input_combine': #2
        # add the input with combined object image for each history
        FRAME_HISTORY = 4
        IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY * 2,)
        pl = MapPlayerState(pl, grey)
        pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize)
        #pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        #show_images(pl.current_state())

    if OBJECT_METHOD == 'add_input_separate': #3
        # For the current state, add the object images
        # (obj1_his1, obj2_his1, cur_his1, obj1_his2, ... )
        # For each image, use the grey scale image, and resize it to 84 * 84
        FRAME_HISTORY = 4
        IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY * (len(TEMPLATE_MATCHER.index2obj)+1),)
        pl = MapPlayerState(pl, grey)
        pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize)
        #show_images(pl.current_state())

    if OBJECT_METHOD == 'swap_input_separate': #4
        # swap the input images with object images
        # (obj1_his1, obj2_his1, obj1_his2, obj2_his2, ... )
        # TODO: If we need to add walls
        # Interesting thing is we don't have any wall info here
        FRAME_HISTORY = 4
        IMAGE_SHAPE3 = IMAGE_SIZE + (FRAME_HISTORY * len(TEMPLATE_MATCHER.index2obj),)
        pl = ObjectSensitivePlayer(pl, TEMPLATE_MATCHER, OBJECT_METHOD, resize)

    if not train:
        pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        pl = PreventStuckPlayer(pl, 30, 1)
        #show_images(pl.current_state())

    pl = LimitLengthPlayer(pl, 40000)
    #show_images(pl.current_state())
    #exit()
    return pl
Exemplo n.º 5
0
def get_player(dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
    pl = MapPlayerState(pl, grey)
    pl = MapPlayerState(pl, resize)
    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()
    pl = HistoryFramePlayer(pl, FRAME_HISTORY)
    #show_images(pl.current_state())
    return pl
Exemplo n.º 6
0
def get_player(dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
    pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()

    pl = HistoryFramePlayer(pl, FRAME_HISTORY)
    return pl
Exemplo n.º 7
0
def get_player(viz=False, train=False, dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
    def func(img):
        return cv2.resize(img, IMAGE_SIZE[::-1])
    pl = MapPlayerState(pl, func)

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()
    if not train:
        pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        pl = PreventStuckPlayer(pl, 30, 1)
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 8
0
def get_player(viz=False, train=False, dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)

    def func(img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #img = cv2.resize(img, IMAGE_SIZE[::])
        return img

    pl = MapPlayerState(pl, func)
    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 9
0
def get_player(viz=False, train=False, dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
    #def func(img):
    #    return cv2.resize(img, IMAGE_SIZE[::-1])
    #pl = MapPlayerState(pl, func)

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()
    if not train:
        pass
        pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        #pl = PreventStuckPlayer(pl, 30, 1) #TODO: I think we don't need this in freeway. Is any bug in this code? didn't see repeated actions.
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 10
0
def get_player(viz=False, train=False, dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)

    def func(img):
        return cv2.resize(
            img, IMAGE_SIZE[::-1]
        )  #TODO: Do we really need to resize here? Check the original paper.

    pl = MapPlayerState(pl, func)

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()

    if not train:  # When testing
        pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        #pl = PreventStuckPlayer(pl, 30, 1) #TODO: Need to know the start button. Is it different for each game?
    pl = LimitLengthPlayer(pl, 30000)  # 500s
    return pl
Exemplo n.º 11
0
def get_player(dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)

    def resize(img):
        return cv2.resize(img, IMAGE_SIZE)

    def grey(img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = resize(img)
        img = img[:, :, np.newaxis]
        return img

    pl = MapPlayerState(pl, grey)

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()

    pl = HistoryFramePlayer(pl, FRAME_HISTORY)
    return pl
Exemplo n.º 12
0
def get_player(dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir, force=True)

    pl = MapPlayerState(pl, EXPERIMENT_MODEL.get_screen_processor())

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()

    if hasattr(EXPERIMENT_MODEL, "get_history_processor"):
        pl = HistoryFramePlayer(pl, FRAME_HISTORY,
                                EXPERIMENT_MODEL.get_history_processor())
    else:
        pl = HistoryFramePlayer(
            pl,
            FRAME_HISTORY,
        )
    if not train:
        pl = PreventStuckPlayer(pl, 30, 1)
    pl = LimitLengthPlayer(pl, 40000)
    return pl
Exemplo n.º 13
0
def get_player(viz=False, train=False, dumpdir=None):
    pl = GymEnv(ENV_NAME, dumpdir=dumpdir)

    #TODO: Preprocessing goes here
    def func(img):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        #img = img[:,:,0]
        #img = cv2.resize(img, IMAGE_SIZE[::])
        return img

    pl = MapPlayerState(pl, func)

    global NUM_ACTIONS
    NUM_ACTIONS = pl.get_action_space().num_actions()
    if not train:
        pass
        #pl = HistoryFramePlayer(pl, FRAME_HISTORY)
        #pl = PreventStuckPlayer(pl, 30, 1)
    pl = LimitLengthPlayer(pl, 40000)
    return pl