Пример #1
0
    def observations(
        self,
        x,
        theta=None
    ):  # apply external noise and internal noise, to get observation
        '''
        takes in state x and output to observation of x
        '''
        if theta is not None:
            self.pro_gains, self.pro_noise_stds, self.obs_gains, self.obs_noise_stds, self.goal_radius = torch.split(
                theta.view(-1), 2)

        # observation of velocity and angle have gain and noise, but no noise of position
        on = w = torch.distributions.Normal(
            0,
            denorm_parameter(
                torch.sigmoid(self.obs_noise_stds),
                self.std_range[-2:])).sample()  # on is observation noise
        vel, ang_vel = torch.split(x.view(-1),
                                   1)[-2:]  # 1,5 to vector and take last two

        ovel = denorm_parameter(
            torch.sigmoid(self.obs_gains[0]),
            self.gains_range[-2:]) * vel + on[0]  # observe velocity
        oang_vel = denorm_parameter(torch.sigmoid(self.obs_gains[1]),
                                    self.gains_range[-2:]) * ang_vel + on[1]
        ox = torch.stack((ovel, oang_vel))  # observed x
        return ox
Пример #2
0
    def x_step(self, x, a, dt, box, pro_gains,
               pro_noise_stds):  # to state next
        '''
        oringal in fireflytask forward, return next state x.
        questions, progain and pronoise is applied even when calculating the new x. is it corret?
        '''
        # dynamics, return new position updated in format of x tuple
        px, py, ang, vel, ang_vel = torch.split(x.view(-1), 1)

        a_v = a[0]  # action for velocity
        a_w = a[1]  # action for angular velocity

        w = torch.distributions.Normal(
            0,
            denorm_parameter(torch.sigmoid(self.pro_noise_stds),
                             self.std_range[:2])).sample()

        vel = 0.0 * vel + denorm_parameter(torch.sigmoid(
            pro_gains[0]), self.gains_range[:2]) * a_v + w[
                0]  # discard prev velocity and new v=gain*new v+noise
        ang_vel = 0.0 * ang_vel + denorm_parameter(torch.sigmoid(
            pro_gains[1]), self.gains_range[:2]) * a_w + w[1]
        ang = ang + ang_vel * dt
        ang = range_angle(ang)  # adjusts the range of angle from -pi to pi

        px = px + vel * torch.cos(ang) * dt  # new position x and y
        py = py + vel * torch.sin(ang) * dt
        px = torch.clamp(px, -box,
                         box)  # restrict location inside arena, to the edge
        py = torch.clamp(py, -box, box)
        next_x = torch.stack((px, py, ang, vel, ang_vel))

        return next_x.view(1, -1)
Пример #3
0
    def forward(self, action, theta=None):

        # unpack theta
        if theta is not None:
            self.pro_gains, self.pro_noise_stds, self.obs_gains, self.obs_noise_stds, self.goal_radius = torch.split(
                theta.view(-1), 2)

        # true next state, xy position, reach target or not(have not decide if stop or not).
        next_x = self.x_step(self.x, action, self.dt, self.box, self.pro_gains,
                             self.pro_noise_stds)
        pos = next_x.view(-1)[:2]
        reached_target = (torch.norm(pos) <= denorm_parameter(
            torch.sigmoid(self.goal_radius), self.goal_radius_range)
                          )  # is within ring
        x = next_x
        self.x = next_x

        # o t+1
        # check the noise representation
        on = w = torch.distributions.Normal(
            0,
            denorm_parameter(
                torch.sigmoid(self.obs_noise_stds),
                self.std_range[-2:])).sample()  # on is observation noise
        vel, ang_vel = torch.split(self.x.view(-1),
                                   1)[-2:]  # 1,5 to vector and take last two.
        ovel = denorm_parameter(torch.sigmoid(self.obs_gains[0]),
                                self.gains_range[-2:]) * vel + on[
                                    0]  # observed velocity, has gain and noise
        oang_vel = denorm_parameter(torch.sigmoid(self.obs_gains[1]),
                                    self.gains_range[-2:]) * ang_vel + on[
                                        1]  # same for anglular velocity
        next_ox = torch.stack((ovel, oang_vel))  # observed x t+1
        self.o = next_ox

        # b t+1
        # belef step foward, kalman filter update with new observation
        next_b, info = self.belief_step(self.b, next_ox, action, self.box)
        self.b = next_b
        # belief next state, info['stop']=terminal # reward only depends on belief
        # in place update here. check

        # reshape b to give to policy
        self.belief = self.Breshape(
            b=next_b, time=self.time,
            theta=self.theta)  # state used in policy is different from belief

        # reward
        episode = 1  # sb has its own countings. will discard this later
        finetuning = 0  # not doing finetuning
        reward = return_reward(episode, info, reached_target, next_b,
                               self.goal_radius, self.REWARD, finetuning)

        # orignal return names
        self.time = self.time + 1
        self.stop = reached_target and info[
            'stop'] or self.time > self.episode_len

        return self.belief, self.stop
Пример #4
0
    def Breshape(self, **kwargs):  # reshape the belief, ready for policy
        '''
        reshape belief for policy
        '''
        argin = {}  # b, time, theta
        for key, value in kwargs.items():
            argin[key] = value

        try:
            pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius = InverseFuncs.unpack_theta(
                argin['theta'])  # unpack the theta
        except KeyError:
            pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius = InverseFuncs.unpack_theta(
                self.theta)

        try:
            time = argin['time']
        except KeyError:
            time = self.time

        try:
            b = argin['b']
        except KeyError:
            b = self.b

        x, P = b  # unpack the belief
        px, py, ang, vel, ang_vel = torch.split(x.view(-1),
                                                1)  # unpack state x
        r = torch.norm(torch.cat([px, py])).view(
            -1)  # what is r? relative distance to firefly
        rel_ang = ang - torch.atan2(-py, -px).view(-1)  # relative angel
        rel_ang = range_angle(
            rel_ang)  # resize relative angel into -pi pi range.
        vecL = vectorLowerCholesky(P)  # take the lower triangle of P
        state = torch.cat([
            r, rel_ang, vel, ang_vel, time, vecL,
            denorm_parameter(torch.sigmoid(pro_gains.view(-1)),
                             self.gains_range[:2]),
            denorm_parameter(torch.sigmoid(pro_noise_stds.view(-1)),
                             self.std_range[:2]),
            denorm_parameter(torch.sigmoid(obs_gains.view(-1)),
                             self.gains_range[-2:]),
            denorm_parameter(torch.sigmoid(obs_noise_stds.view(-1)),
                             self.std_range[-2:]),
            denorm_parameter(torch.sigmoid(torch.ones(1) * goal_radius),
                             self.goal_radius_range)
        ])
        #state = torch.cat([r, rel_ang, vel, ang_vel]) time, vecL]) #simple

        return state.view(1, -1)
Пример #5
0
    def step(self, action):  # state and action to state, and belief
        '''
        # input:
            # action
        # return:
            # observation, a object, 
            # reward, float, want to maximaz
            # done, bool, when 1, reset
            # info, dict, for debug

        in this case, we want to give the belief state as observation to choose action
        store real state in self to decide reward, done.

        self.states, self.belief, action, self.noises
        1. states update by action
        2. belief update by action and states with noise, by kalman filter
        
        '''
        # x t+1
        # true next state, xy position, reach target or not(have not decide if stop or not).
        next_x = self.x_step(self.x, action, self.dt, self.box, self.pro_gains,
                             self.pro_noise_stds)
        self.x = next_x

        # o t+1
        self.o = self.observations(self.x)

        # b t+1
        # belef step foward, kalman filter update with new observation
        self.b, info = self.belief_step(self.b, self.o, action, self.box)

        # reshape b to give to policy
        self.belief = self.Breshape(
            self.b, self.time,
            self.theta)  # state used in policy is different from belief

        # reward
        pos = next_x.view(-1)[:2]
        reached_target = (torch.norm(pos) <= denorm_parameter(
            torch.sigmoid(self.goal_radius), self.goal_radius_range)
                          )  # is within ring
        episode = 1  # sb has its own countings. will discard this later
        finetuning = 0  # not doing finetuning
        reward = return_reward(episode, info, reached_target, self.b,
                               self.goal_radius, self.REWARD, finetuning)

        # orignal return names
        self.time = self.time + 1
        self.stop = reached_target and info[
            'stop'] or self.time > self.episode_len

        return self.belief, reward, self.stop, info
Пример #6
0
    def reset(self):  # reset env
        '''
        retrun obs
        two ling of reset here:
        reset the episode progain, pronoise, goal radius, 
        state x, including [px, py, ang, vel, ang_vel]
        reset time

        then, reset belief:

        finally, return state
        '''

        # init world state
        # input; gains_range, noise_range, goal_radius_range
        if self.phi is None:  # defaul. generate theta from range if no preset phi avaliable

            if self.pro_gains == None or self.reset_theta:
                self.pro_gains = torch.zeros(2)
                self.pro_gains[0] = torch.zeros(1).uniform_(
                    self.gains_range[0], self.gains_range[1])  #[proc_gain_vel]
                self.pro_gains[1] = torch.zeros(1).uniform_(
                    self.gains_range[2],
                    self.gains_range[3])  # [proc_gain_ang]

            if self.pro_noise_stds == None or self.reset_theta:
                self.pro_noise_stds = torch.zeros(2)
                self.pro_noise_stds[0] = torch.zeros(1).uniform_(
                    self.std_range[0], self.std_range[1])
                self.pro_noise_stds[1] = torch.zeros(1).uniform_(
                    self.std_range[2], self.std_range[3])

            if self.goal_radius == None or self.reset_theta:
                self.max_goal_radius = min(
                    self.max_goal_radius + self.GOAL_RADIUS_STEP,
                    self.goal_radius_range[1])
                self.goal_radius = torch.zeros(1).uniform_(
                    self.goal_radius_range[0], self.max_goal_radius)

            if self.obs_gains == None or self.reset_theta:
                self.obs_gains = torch.zeros(2)
                self.obs_gains[0] = torch.zeros(1).uniform_(
                    self.gains_range[0], self.gains_range[1])  # [obs_gain_vel]
                self.obs_gains[1] = torch.zeros(1).uniform_(
                    self.gains_range[2], self.gains_range[3])  # [obs_gain_ang]
            if self.obs_noise_stds == None or self.reset_theta:
                self.obs_noise_stds = torch.zeros(2)
                self.obs_noise_stds[0] = torch.zeros(1).uniform_(
                    self.std_range[0], self.std_range[1])
                self.obs_noise_stds[1] = torch.zeros(1).uniform_(
                    self.std_range[2], self.std_range[3])

        else:
            # use the preset phi
            self.fetch_phi()

        self.theta = torch.cat([
            self.pro_gains, self.pro_noise_stds, self.obs_gains,
            self.obs_noise_stds, self.goal_radius
        ])

        # print(self.theta)

        self.time = torch.zeros(1)
        self.stop = False
        # min_r = torch.exp(self.goal_radius.item()) # when log
        min_r = (denorm_parameter(torch.sigmoid(self.goal_radius),
                                  self.goal_radius_range)).item()
        r = torch.zeros(1).uniform_(
            min_r, self.box)  # GOAL_RADIUS, self.box is world size
        loc_ang = torch.zeros(1).uniform_(
            -pi, pi)  # location angel: to determine initial location
        px = r * torch.cos(loc_ang)
        py = r * torch.sin(loc_ang)
        rel_ang = torch.zeros(1).uniform_(-pi / 4, pi / 4)
        ang = rel_ang + loc_ang + pi  # heading angle of monkey, pi is added in order to make the monkey toward firefly
        ang = range_angle(ang)
        vel = torch.zeros(1)
        ang_vel = torch.zeros(1)
        self.x = torch.cat([px, py, ang, vel,
                            ang_vel])  # this is state x at t0
        self.o = torch.zeros(2)  # this is observation o at t0
        self.action = torch.zeros(2)  # this will be action a at t0

        self.P = torch.eye(5) * 1e-8  # change 4 to size function
        self.b = self.x, self.P  # belief=x because is not move yet, and no noise on x, y, angle
        self.belief = self.Breshape(b=self.b, time=self.time, theta=self.theta)
        # return self.b, self.state, self.obs_gains, self.obs_noise_ln_vars
        # print(self.belief.shape) #1,29
        return self.belief  # this is belief at t0
Пример #7
0
    def belief_step(self, b, ox, a, box):  # to belief next
        I = torch.eye(5)

        # Q matrix, process noise for tranform xt to xt+1. only applied to v and w
        Q = torch.zeros(5, 5)
        Q[-2:, -2:] = torch.diag((denorm_parameter(
            torch.sigmoid(self.pro_noise_stds),
            self.std_range[:2]))**2)  # variance of vel, ang_vel

        # R matrix, observe noise for observation
        R = torch.diag((denorm_parameter(torch.sigmoid(self.obs_noise_stds),
                                         self.std_range[-2:]))**2)

        # H matrix, transform x into observe space. only applied to v and w.
        H = torch.zeros(2, 5)
        H[:, -2:] = torch.diag(
            denorm_parameter(torch.sigmoid(self.obs_gains),
                             self.gains_range[-2:]))

        # Extended Kalman Filter
        pre_bx_, P = b
        bx_ = self.x_step(pre_bx_, a, self.dt, box, self.pro_gains,
                          self.pro_noise_stds)  # estimate xt+1 from xt and at
        bx_ = bx_.t()  # make a column vector
        A = self.A(
            bx_)  # calculate the A matrix, to apply on covariance matrix
        P_ = A.mm(P).mm(A.t()) + Q  # estimate Pt+1 = APA^T+Q,
        if not is_pos_def(
                P_):  # should be positive definate. if not, show debug.
            # if noise go to 0, happens.
            print("P_:", P_)
            print("P:", P)
            print("A:", A)
            APA = A.mm(P).mm(A.t())
            print("APA:", APA)
            print("APA +:", is_pos_def(APA))
        error = ox - self.observations(
            bx_)  # error as z-hx, the xt+1 is estimated
        S = H.mm(P_).mm(
            H.t()
        ) + R  # S = HPH^T+R. the covarance of observation of xt->xt+1 transition
        K = P_.mm(H.t()).mm(torch.inverse(
            S))  # K = PHS^-1, kalman gain. has a H in front but canceled out
        bx = bx_ + K.matmul(
            error
        )  # update xt+1 from the estimated xt+1 using new observation zt
        I_KH = I - K.mm(H)
        P = I_KH.mm(
            P_
        )  # update covarance of xt+1 from the estimated xt+1 using new observation zt noise R

        if not is_pos_def(P):
            print("here")
            print("P:", P)
            P = (
                P + P.t()
            ) / 2 + 1e-6 * I  # make symmetric to avoid computational overflows

        bx = bx.t()  #return to a row vector
        b = bx.view(-1), P  # belief

        # terminal check
        terminal = self._isTerminal(bx, a)  # check the monkey stops or not
        return b, {
            'stop': terminal
        }  # the dict is info. will be changed in next version to put stop and reach target together.