Exemplo n.º 1
0
class Base(Wrapper):
    def __init__(self, env, args):
        super(Base, self).__init__(env)
        self.args = args
        self.gamma = float(args['--gamma'])
        self.init()

    def init(self):
        self.queue = CompetenceQueue()

    def step(self, exp):
        exp['s1'] = self.env.step(exp['a'])[0]
        exp = self.eval_exp(exp)
        return exp

    def end_episode(self, trajectory):
        R = 0
        for exp in reversed(trajectory):
            R = R * self.gamma + exp['r']
        self.queue.append(R)

    def eval_exp(self, exp):
        pass

    def reset(self):
        state = self.env.reset()
        return state

    # def shape(self, r, term):
    #     b = (self.gamma - 1) * self.opt_init
    #     r += b
    #     if term:
    #         c = -self.gamma * self.opt_init
    #         r += c
    #     return r
    #
    # def unshape(self, r, term):
    #     b = (self.gamma - 1) * self.opt_init
    #     r -= b
    #     if term:
    #         c = -self.gamma * self.opt_init
    #         r -= c
    #     return r

    def get_stats(self):

        stats = {}
        stats['agentR'] = float("{0:.3f}".format(self.queue.R[-1]))
        return stats

    @property
    def state_dim(self):
        return self.env.observation_space.shape

    @property
    def action_dim(self):
        return self.env.action_space.shape
Exemplo n.º 2
0
 def __init__(self, low = np.array([-np.inf]), high=np.array([np.inf]), window=10, maxlen=20, dtype='float32'):
     super(Region, self).__init__(low=low, high=high, dtype=dtype)
     self.queue = CompetenceQueue(window=window, maxlen=maxlen)
     self.dim_split = None
     self.val_split = None
     self.line = None
     self.freq = 0
     # self.max_CP = 0
     # self.min_CP = 0
     self.sum_CP = 0
Exemplo n.º 3
0
 def init(self):
     self.queues = [CompetenceQueue() for _ in self.goals]
     self.steps = [0 for _ in self.goals]
     self.interests = [0 for _ in self.goals]
     self.dones = [0 for _ in self.goals]
     self.explorations = [
         LinearSchedule(schedule_timesteps=int(5000),
                        initial_p=1.0,
                        final_p=.5) for _ in self.goals
     ]
Exemplo n.º 4
0
    def __init__(self, env, args):
        super(PlayroomGM, self).__init__(env)

        self.gamma = float(args['--gamma'])
        self.eps = float(args['--eps'])
        self.demo_f = [int(f) for f in args['--demo'].split(',')]

        self.feat = np.array([int(f) for f in args['--features'].split(',')])
        self.N = self.feat.shape[0]
        vs = np.zeros(shape=(self.N, self.state_dim[0]))
        vs[np.arange(self.N), self.feat] = 1
        self.vs = vs / np.sum(vs, axis=1, keepdims=True)
        self.R = 100
        self.idx = -1
        self.v = np.zeros(shape=(self.state_dim[0], 1))
        self.g = np.ones(shape=(self.state_dim[0]))
        self.queues = [CompetenceQueue() for _ in range(self.N)]
        self.names = ['s0', 'r0', 'a', 's1', 'r1', 'g', 'v', 'o', 'u']
        self.buffer = ReplayBuffer(limit=int(1e5), names=self.names, N=self.N)
Exemplo n.º 5
0
    def __init__(self, env, args):
        super(Playroom3GM, self).__init__(env)

        self.gamma = float(args['--gamma'])
        self.theta1 = float(args['--theta1'])
        self.theta2 = float(args['--theta2'])
        self.selfImit = bool(int(args['--selfImit']))
        self.tutorTask = args['--tutorTask']

        self.tasks = [o.name for o in self.env.objects]
        self.Ntasks = len(self.tasks)
        self.tasks_feat = [[4], [7], [10]]
        self.mask = None
        self.task = None
        self.goal = None
        self.queues = [CompetenceQueue() for _ in self.tasks]
        self.envsteps = [0 for _ in self.tasks]
        self.trainsteps = [0 for _ in self.tasks]
        self.offpolicyness = [0 for _ in self.tasks]
        self.termstates = [0 for _ in self.tasks]
        self.attempts = [0 for _ in self.tasks]
        # self.foreval = [False for _ in self.tasks]
        self.update_interests()

        self.state_low = self.env.low
        self.state_high = self.env.high
        self.init_state = np.array(self.env.initstate)
        self.r_done = 100
        self.r_notdone = 0
        self.terminal = True
        self.minQ = self.r_notdone / (1 - self.gamma)
        self.maxQ = self.r_done if self.terminal else self.r_done / (
            1 - self.gamma)

        self.names = ['s0', 'a', 's1', 'r', 't', 'g', 'm', 'pa', 'mcr', 'task']
        self.buffer = MultiTaskReplayBuffer(limit=int(1e6),
                                            Ntasks=self.Ntasks,
                                            names=self.names)
Exemplo n.º 6
0
    def __init__(self, env, args):
        super(Manipulator, self).__init__(env)

        self.gamma = float(args['--gamma'])
        self.theta1 = float(args['--theta1'])
        self.theta2 = float(args['--theta2'])
        self.selfImit = bool(int(args['--selfImit']))
        self.tutorTask = args['--tutorTask']

        self.tasks_feat = [[i] for i in range(12)]
        self.Ntasks = len(self.tasks_feat)
        self.mask = None
        self.task = None
        self.goal = None
        self.queues = [CompetenceQueue() for _ in self.tasks_feat]
        self.envsteps = [0 for _ in self.tasks_feat]
        self.trainsteps = [0 for _ in self.tasks_feat]
        self.offpolicyness = [0 for _ in self.tasks_feat]
        self.termstates = [0 for _ in self.tasks_feat]
        self.attempts = [0 for _ in self.tasks_feat]
        # self.foreval = [False for _ in self.tasks_feat]
        self.update_interests()

        self.state_low = self.env.observation_space.low
        self.state_high = self.env.observation_space.high

        self.r_done = 100
        self.r_notdone = 0
        self.terminal = True
        self.minQ = self.r_notdone / (1 - self.gamma)
        self.maxQ = self.r_done if self.terminal else self.r_done / (
            1 - self.gamma)

        self.names = ['s0', 'a', 's1', 'r', 't', 'g', 'm', 'pa', 'mcr', 'task']
        self.buffer = MultiTaskReplayBuffer(limit=int(1e6),
                                            Ntasks=self.Ntasks,
                                            names=self.names)
Exemplo n.º 7
0
 def init(self):
     self.queues = [CompetenceQueue() for _ in self.goals]
     self.steps = [0 for _ in self.goals]
     self.replays = [0 for _ in self.goals]
     self.update_interests()
Exemplo n.º 8
0
 def init(self):
     self.queue = CompetenceQueue()
Exemplo n.º 9
0
class Region(Box):

    def __init__(self, low = np.array([-np.inf]), high=np.array([np.inf]), window=10, maxlen=20, dtype='float32'):
        super(Region, self).__init__(low=low, high=high, dtype=dtype)
        self.queue = CompetenceQueue(window=window, maxlen=maxlen)
        self.dim_split = None
        self.val_split = None
        self.line = None
        self.freq = 0
        # self.max_CP = 0
        # self.min_CP = 0
        self.sum_CP = 0

    def sample(self):
        return np.random.uniform(low=self.low, high=self.high)

    def contains(self, x):
        shape_ok = (x.shape == self.low.shape)
        low_ok = (x >= self.low).all()
        high_ok = (x <= self.high).all()
        return shape_ok and low_ok and high_ok

    def split(self, dim, split_val):
        low_right = np.copy(self.low)
        low_right[dim] = split_val
        high_left = np.copy(self.high)
        high_left[dim] = split_val
        left = Region(self.low, high_left)
        right = Region(low_right, self.high)

        left.queue.CP = self.queue.CP
        right.queue.CP = self.queue.CP
        # left.queue.competence = self.queue.competence
        # right.queue.competence = self.queue.competence
        for point in self.queue.points:
            if left.contains(point[0]):
                left.queue.points.append(point)
            else:
                right.queue.points.append(point)
        left.queue.update_CP()
        right.queue.update_CP()
        eval = self.eval_split(left, right)
        return left, right, eval

    def add(self, point):
        self.queue.append(point)

    def eval_split(self, left, right):
        return left.size * right.size * np.sqrt((right.CP-left.CP)**2)

    def best_split(self, dims, n_split, split_min):
        best = 0
        best_left, best_right = Region(), Region()
        for dim in dims:
            sub_regions = np.linspace(self.low[dim], self.high[dim], n_split+2)
            for num_split, split_val in enumerate(sub_regions[1:-1]):
                left, right, eval = self.split(dim, split_val)
                if eval > best and eval > split_min:
                    best_left = left
                    best_right = right
                    self.val_split = split_val
                    self.dim_split = dim
                    best = eval
        return best_left, best_right

    def compute_line(self):
        if not self.is_leaf:
            if self.dim_split == 0:
                line1_xs = 2 * [self.val_split]
                line1_ys = [self.low[1], self.high[1]]
            else:
                line1_ys = 2 * [self.val_split]
                line1_xs = [self.low[0], self.high[0]]
            self.line = [line1_xs, line1_ys]

    @property
    def is_leaf(self):
        return (self.dim_split is None and self.is_init)

    @property
    def is_init(self):
        return (not np.isinf(self.high[0]))

    @property
    def CP(self):
        return self.queue.CP

    @property
    def size(self):
        return self.queue.size

    @property
    def area(self):
        return (self.high[0] - self.low[0]) * (self.high[1] - self.low[1])
Exemplo n.º 10
0
 def __init__(self, space, theta):
     self.theta = theta
     self.tasks = space
     self.task_queues = [CompetenceQueue() for _ in self.tasks]
     self.task_freqs = [0] * len(self.tasks)