Пример #1
0
class Ball(RawEnvironment):
    '''
    A fake environment that pretends that the paddle partion has been solved, gives three actions that produce
    desired behavior
    '''
    def __init__(self):
        self.num_actions = 4
        self.itr = 0
        self.save_path = ""
        self.screen = Screen()
        self.internal_screen = copy.deepcopy(screen)

    def step(self, action):
        if action == 1:
            action = 2
        elif action == 2:
            action = 3
        raw_state, factor_state = self.screen.getState()
        ball = factor_state["Ball"][0]
        ball_vel = self.screen.ball.vel
        if ball_vel[0] < 0 or ball[0] > 60: # ball is too far or moving up, so we don't care where it is
            # TODO: follow the ball
        else:
            self.internal_screen = copy.deepcopy(screen)
            while self.internal_screen.ball.pos[0] < 71:
                self.internal_screen.step([0])
            self.objective_location = self.internal_screen.ball.pos[1] + np.random.choice([-1, 0, 1])

        paddle = factor_state["Paddle"][0]
        raw_state, factor_state, done = self.screen.step(action)
        if factor_state["Action"][1] < 2:
            factor_state["Action"][1] = 0
        elif factor_state["Action"][1] == 2:
            factor_state["Action"][1] = 1
        elif factor_state["Action"][1] == 3:
            factor_state["Action"][1] = 2

    def getState(self):
        raw_state, factor_state = self.screen.getState()
        if factor_state["Action"][1] < 2:
            factor_state["Action"][1] = 0
        elif factor_state["Action"][1] == 2:
            factor_state["Action"][1] = 1
        elif factor_state["Action"][1] == 3:
            factor_state["Action"][1] = 2
Пример #2
0
class Paddle(RawEnvironment):
    '''
    A fake environment that pretends that the paddle partion has been solved, gives three actions that produce
    desired behavior
    '''
    def __init__(self, frameskip=1):
        self.num_actions = 3
        self.itr = 0
        self.save_path = ""
        self.screen = Screen(frameskip=frameskip)
        self.reward = 0
        self.episode_rewards = self.screen.episode_rewards

    def set_save(self, itr, save_dir, recycle, all_dir=""):
        self.save_path = save_dir
        self.itr = itr
        self.recycle = recycle
        self.screen.save_path = save_dir
        self.screen.itr = itr
        self.screen.recycle = recycle
        self.all_dir = all_dir

        try:
            os.makedirs(save_dir)
        except OSError:
            pass

    def step(self, action):
        # TODO: action is tenor, might not be safe assumption
        action = action.clone()
        if action == 1:
            action[0] = 2
        elif action == 2:
            action[0] = 3
        raw_state, factor_state, done = self.screen.step(action, render=True)
        self.reward = self.screen.reward
        if factor_state["Action"][1][0] < 2:
            factor_state["Action"] = (factor_state["Action"][0], 0)
        elif factor_state["Action"][1][0] == 2:
            factor_state["Action"] = (factor_state["Action"][0], 1)
        elif factor_state["Action"][1][0] == 3:
            factor_state["Action"] = (factor_state["Action"][0], 2)
        return raw_state, factor_state, done

    def getState(self):
        raw_state, factor_state = self.screen.getState()
        if factor_state["Action"][1][0] < 2:
            factor_state["Action"] = (factor_state["Action"][0], 0)
        elif factor_state["Action"][1][0] == 2:
            factor_state["Action"] = (factor_state["Action"][0], 1)
        elif factor_state["Action"][1][0] == 3:
            factor_state["Action"] = (factor_state["Action"][0], 2)
        return raw_state, factor_state
Пример #3
0
class FocusEnvironment(RawEnvironment):
    '''
    A fake environment that pretends that the paddle partion has been solved, gives three actions that produce
    desired behavior
    '''
    def __init__(self, focus_model):
        self.num_actions = 4
        self.itr = 0
        self.save_path = ""
        self.screen = Screen()
        self.focus_model = focus_model
        self.factor_state = None
        self.reward = 0
        # self.focus_model.cuda()

    def set_save(self, itr, save_dir, recycle):
        self.save_path = save_dir
        self.itr = itr
        self.recycle = recycle
        self.screen.save_path = save_dir
        self.screen.itr = itr
        self.screen.recycle = recycle
        try:
            os.makedirs(save_dir)
        except OSError:
            pass

    def step(self, action):
        # TODO: action is tenor, might not be safe assumption
        t = time.time()
        raw_state, raw_factor_state, done = self.screen.step(action,
                                                             render=True)
        self.reward = self.screen.reward
        factor_state = self.focus_model.forward(pytorch_model.wrap(
            raw_state, cuda=False).unsqueeze(0).unsqueeze(0),
                                                ret_numpy=True)
        for key in factor_state.keys():
            factor_state[key] *= 84
            factor_state[key] = (np.squeeze(factor_state[key]), (1.0, ))
        factor_state['Action'] = raw_factor_state['Action']
        self.factor_state = factor_state
        if self.screen.itr != 0:
            object_dumps = open(
                os.path.join(self.save_path, "focus_dumps.txt"), 'a')
        else:
            object_dumps = open(os.path.join(self.save_path,
                                             "focus_dumps.txt"),
                                'w')  # create file if it does not exist
        for key in factor_state.keys():
            object_dumps.write(
                key + ":" + " ".join([str(fs) for fs in factor_state[key]]) +
                "\t")  # TODO: attributes are limited to single floats
        object_dumps.write(
            "\n")  # TODO: recycling does not stop object dumping
        # print("elapsed ", time.time() - t)
        return raw_state, factor_state, done

    def getState(self):
        raw_state, raw_factor_state = self.screen.getState()
        if self.factor_state is None:
            factor_state = self.focus_model.forward(pytorch_model.wrap(
                raw_state, cuda=False).unsqueeze(0).unsqueeze(0),
                                                    ret_numpy=True)
            for key in factor_state.keys():
                factor_state[key] *= 84
                factor_state[key] = (np.squeeze(factor_state[key]), (1.0, ))
            factor_state['Action'] = raw_factor_state['Action']
            self.factor_state = factor_state
        factor_state = self.factor_state
        return raw_state, factor_state