Exemplo n.º 1
0
 def __init__(self, frameskip=1):
     self.num_actions = 3
     self.itr = 0
     self.save_path = ""
     self.screen = Screen(frameskip=frameskip)
     self.reward = 0
     self.episode_rewards = self.screen.episode_rewards
Exemplo n.º 2
0
 def __init__(self, focus_model):
     self.num_actions = 4
     self.itr = 0
     self.save_path = ""
     self.screen = Screen()
     self.focus_model = focus_model
     self.factor_state = None
     self.reward = 0
Exemplo n.º 3
0
class Paddle(RawEnvironment):
    '''
    A fake environment that pretends that the paddle partion has been solved, gives three actions that produce
    desired behavior
    '''
    def __init__(self, frameskip=1):
        self.num_actions = 3
        self.itr = 0
        self.save_path = ""
        self.screen = Screen(frameskip=frameskip)
        self.reward = 0
        self.episode_rewards = self.screen.episode_rewards

    def set_save(self, itr, save_dir, recycle, all_dir=""):
        self.save_path = save_dir
        self.itr = itr
        self.recycle = recycle
        self.screen.save_path = save_dir
        self.screen.itr = itr
        self.screen.recycle = recycle
        self.all_dir = all_dir

        try:
            os.makedirs(save_dir)
        except OSError:
            pass

    def step(self, action):
        # TODO: action is tenor, might not be safe assumption
        action = action.clone()
        if action == 1:
            action[0] = 2
        elif action == 2:
            action[0] = 3
        raw_state, factor_state, done = self.screen.step(action, render=True)
        self.reward = self.screen.reward
        if factor_state["Action"][1][0] < 2:
            factor_state["Action"] = (factor_state["Action"][0], 0)
        elif factor_state["Action"][1][0] == 2:
            factor_state["Action"] = (factor_state["Action"][0], 1)
        elif factor_state["Action"][1][0] == 3:
            factor_state["Action"] = (factor_state["Action"][0], 2)
        return raw_state, factor_state, done

    def getState(self):
        raw_state, factor_state = self.screen.getState()
        if factor_state["Action"][1][0] < 2:
            factor_state["Action"] = (factor_state["Action"][0], 0)
        elif factor_state["Action"][1][0] == 2:
            factor_state["Action"] = (factor_state["Action"][0], 1)
        elif factor_state["Action"][1][0] == 3:
            factor_state["Action"] = (factor_state["Action"][0], 2)
        return raw_state, factor_state
Exemplo n.º 4
0
class Ball(RawEnvironment):
    '''
    A fake environment that pretends that the paddle partion has been solved, gives three actions that produce
    desired behavior
    '''
    def __init__(self):
        self.num_actions = 4
        self.itr = 0
        self.save_path = ""
        self.screen = Screen()
        self.internal_screen = copy.deepcopy(screen)

    def step(self, action):
        if action == 1:
            action = 2
        elif action == 2:
            action = 3
        raw_state, factor_state = self.screen.getState()
        ball = factor_state["Ball"][0]
        ball_vel = self.screen.ball.vel
        if ball_vel[0] < 0 or ball[0] > 60: # ball is too far or moving up, so we don't care where it is
            # TODO: follow the ball
        else:
            self.internal_screen = copy.deepcopy(screen)
            while self.internal_screen.ball.pos[0] < 71:
                self.internal_screen.step([0])
            self.objective_location = self.internal_screen.ball.pos[1] + np.random.choice([-1, 0, 1])

        paddle = factor_state["Paddle"][0]
        raw_state, factor_state, done = self.screen.step(action)
        if factor_state["Action"][1] < 2:
            factor_state["Action"][1] = 0
        elif factor_state["Action"][1] == 2:
            factor_state["Action"][1] = 1
        elif factor_state["Action"][1] == 3:
            factor_state["Action"][1] = 2

    def getState(self):
        raw_state, factor_state = self.screen.getState()
        if factor_state["Action"][1] < 2:
            factor_state["Action"][1] = 0
        elif factor_state["Action"][1] == 2:
            factor_state["Action"][1] = 1
        elif factor_state["Action"][1] == 3:
            factor_state["Action"][1] = 2
Exemplo n.º 5
0
 def __init__(self):
     self.num_actions = 4
     self.itr = 0
     self.save_path = ""
     self.screen = Screen()
     self.internal_screen = copy.deepcopy(screen)
Exemplo n.º 6
0
    model.add_model('Ball', ball_model, ['Paddle'],
                    augment_pt=f)  #,augment_pt=util.JumpFiltering(2, 0.05))
    ####
    if args.true_environment:
        model = None
    print(args.true_environment, args.env)
    if args.env == 'SelfPusher':
        if args.true_environment:
            true_environment = Pushing(pushgripper=True,
                                       frameskip=args.frameskip)
        else:
            true_environment = None  # TODO: implement
    elif args.env == 'SelfBreakout':
        if args.true_environment:
            true_environment = Screen(frameskip=args.frameskip)
        else:
            true_environment = FocusEnvironment(model,
                                                display=args.display_focus)
    elif args.env.find('Atari') != -1:
        true_environment = FocusAtariEnvironment(model,
                                                 args.env[len("Atari"):],
                                                 args.seed, 0, args.save_dir)
    dataset_path = args.record_rollouts
    changepoint_path = args.changepoint_dir
    option_chain = OptionChain(true_environment, args.changepoint_dir,
                               args.train_edge, args)
    reward_paths = glob.glob(os.path.join(option_chain.save_dir, "*rwd.pkl"))
    print(reward_paths)
    reward_paths.sort(key=lambda x: int(x.split("__")[2]))
    # train-edge
    # state-forms
    # state-names
    # Example usage:
    # python paddle_bounce.py --model-form tab --optimizer-form TabQ --record-rollouts "data/action/" --train-edge "Paddle->Ball" --num-stack 1 --train --num-iters 100000 --save-dir data/paddleballtest --state-forms prox --state-names Paddle --base-node Paddle --changepoint-dir data/paddlegraph --factor 8 --greedy-epsilon .2 --lr .01 --normalize --behavior-policy egq --gamma .99 > out.txt
    # python paddle_bounce.py --model-form fourier --optimizer-form SARSA --record-rollouts "data/action/" --train-edge "Paddle->Ball" --num-stack 2 --train --num-iters 100000 --save-dir data/paddleballpg --state-forms xprox --state-names Paddle --base-node Paddle --changepoint-dir data/paddlegraphpg --factor 10 --num-layers 1 --greedy-epsilon .1 --lr .001 --normalize --behavior-policy egq --save-dir data/xstates/ --optim base > out.txt
    # python dopamine_paddle.py --record-rollouts data/integrationpaddle --changepoint-dir data/dopegraph --model-form rainbow --true-environment --train-edge "Action->Reward" --state-forms raw --state-names Action --num-steps 5 --num-stack 4 --num-iters 2000000 --log-interval 200 --save-dir ../datasets/caleb_data/dopamine/rainbow/ --optim base > baselines/rainbow.txt
    # python dopamine_paddle.py --record-rollouts data/extragripper --changepoint-dir data/dopepushgraph --model-form rainbow --true-environment --train-edge "Action->Reward" --state-forms raw --state-names Action --num-steps 5 --num-stack 4 --num-iters 10000000 --log-interval 200 --save-dir ../datasets/caleb_data/dopamine/rainbowpushing/ --optim base --env SelfPusher > pushingrainbow.txt
    # python dopamine_paddle.py --record-rollouts data/extragripper --changepoint-dir data/dopepushgraph --model-form rainbow --true-environment --train-edge "Action->Reward" --state-forms bounds bounds bounds prox prox --state-names Gripper Block Target Gripper__Block Block__Target --num-steps 5 --num-stack 1 --num-iters 10000000 --log-interval 200 --save-dir ../datasets/caleb_data/dopamine/rainbowpushing/ --optim base --env SelfPusher --gpu 3 --frameskip 3 --normalize --reward-form rawdist > pushingrainbowstate.txt
    args = get_args()
    # true_environment = Paddle()
    # true_environment = PaddleNoBlocks()
    if args.env == "SelfPusher":
        true_environment = Pushing(True, frameskip=args.frameskip)
    else:
        true_environment = Screen()
    dataset_path = args.record_rollouts
    changepoint_path = args.changepoint_dir
    option_chain = OptionChain(true_environment, args.changepoint_dir,
                               args.train_edge, args)
    if args.reward_form == 'rawdist' and args.env == 'SelfPusher':
        true_environment.use_distance_reward()
        args.reward_form = 'raw'

    head, tail = get_edge(args.train_edge)

    reward_classes = [BlockReward(args)]

    if args.reward_form == 'x':
        reward_classes = [Xreward(args)]
    elif args.reward_form.find('move_dirall') != -1:
Exemplo n.º 8
0
 def __init__(self):
     self.num_actions = 3
     self.itr = 0
     self.save_path = ""
     self.screen = Screen()
     self.reward = 0
Exemplo n.º 9
0
class FocusEnvironment(RawEnvironment):
    '''
    A fake environment that pretends that the paddle partion has been solved, gives three actions that produce
    desired behavior
    '''
    def __init__(self, focus_model):
        self.num_actions = 4
        self.itr = 0
        self.save_path = ""
        self.screen = Screen()
        self.focus_model = focus_model
        self.factor_state = None
        self.reward = 0
        # self.focus_model.cuda()

    def set_save(self, itr, save_dir, recycle):
        self.save_path = save_dir
        self.itr = itr
        self.recycle = recycle
        self.screen.save_path = save_dir
        self.screen.itr = itr
        self.screen.recycle = recycle
        try:
            os.makedirs(save_dir)
        except OSError:
            pass

    def step(self, action):
        # TODO: action is tenor, might not be safe assumption
        t = time.time()
        raw_state, raw_factor_state, done = self.screen.step(action,
                                                             render=True)
        self.reward = self.screen.reward
        factor_state = self.focus_model.forward(pytorch_model.wrap(
            raw_state, cuda=False).unsqueeze(0).unsqueeze(0),
                                                ret_numpy=True)
        for key in factor_state.keys():
            factor_state[key] *= 84
            factor_state[key] = (np.squeeze(factor_state[key]), (1.0, ))
        factor_state['Action'] = raw_factor_state['Action']
        self.factor_state = factor_state
        if self.screen.itr != 0:
            object_dumps = open(
                os.path.join(self.save_path, "focus_dumps.txt"), 'a')
        else:
            object_dumps = open(os.path.join(self.save_path,
                                             "focus_dumps.txt"),
                                'w')  # create file if it does not exist
        for key in factor_state.keys():
            object_dumps.write(
                key + ":" + " ".join([str(fs) for fs in factor_state[key]]) +
                "\t")  # TODO: attributes are limited to single floats
        object_dumps.write(
            "\n")  # TODO: recycling does not stop object dumping
        # print("elapsed ", time.time() - t)
        return raw_state, factor_state, done

    def getState(self):
        raw_state, raw_factor_state = self.screen.getState()
        if self.factor_state is None:
            factor_state = self.focus_model.forward(pytorch_model.wrap(
                raw_state, cuda=False).unsqueeze(0).unsqueeze(0),
                                                    ret_numpy=True)
            for key in factor_state.keys():
                factor_state[key] *= 84
                factor_state[key] = (np.squeeze(factor_state[key]), (1.0, ))
            factor_state['Action'] = raw_factor_state['Action']
            self.factor_state = factor_state
        factor_state = self.factor_state
        return raw_state, factor_state