示例#1
0
    def Breshape(self, b, time, theta): # reshape belief for policy
        pro_gains, pro_noise_ln_vars, obs_gains, obs_noise_ln_vars, goal_radius = theta # unpack the theta
        x, P = b # unpack the belief
        px, py, ang, vel, ang_vel = torch.split(x.view(-1), 1) # unpack states
        r = torch.norm(torch.cat([px, py])).view(-1) # what is r? relative distance to firefly
        rel_ang = ang - torch.atan2(-py, -px).view(-1) # relative angel
        rel_ang = range_angle(rel_ang) # resize relative angel into -pi pi range.
        vecL = vectorLowerCholesky(P) # take the lower triangle
        state = torch.cat([r, rel_ang, vel, ang_vel, time, vecL, pro_gains.view(-1), pro_noise_ln_vars.view(-1), obs_gains.view(-1), obs_noise_ln_vars.view(-1), torch.ones(1)*goal_radius]) # original
        #state = torch.cat([r, rel_ang, vel, ang_vel]) #, time, vecL]) #simple

        return state.view(1, -1)
    def Breshape(self, b, time, theta):  # reshape belief for policy
        pro_gains, pro_noise_ln_vars, obs_gains, obs_noise_ln_vars, goal_radius = theta
        x, P = b
        px, py, ang, vel, ang_vel = torch.split(x.view(-1), 1)
        r = torch.norm(torch.cat([px, py])).view(-1)
        rel_ang = ang - torch.atan2(-py, -px).view(-1)
        rel_ang = range_angle(rel_ang)
        vecL = vectorLowerCholesky(P)
        state = torch.cat([
            r, rel_ang, vel, ang_vel, time, vecL,
            pro_gains.view(-1),
            pro_noise_ln_vars.view(-1),
            obs_gains.view(-1),
            obs_noise_ln_vars.view(-1),
            torch.ones(1) * goal_radius
        ])  # original
        #state = torch.cat([r, rel_ang, vel, ang_vel]) #, time, vecL]) #simple

        return state.view(1, -1)
示例#3
0
        #time.sleep(0.1)  # delay for 0.005 sec
        if info['stop']:
            time.sleep(1)
        # check time limit
        TimeEnd = (
            t + 1 == arg.EPISODE_LEN
        )  # if the monkey can't catch the firefly in EPISODE_LEN, reset the game.
        mask = torch.tensor([1 - float(TimeEnd)])  # mask = 0: episode is over

        data = np.array([[
            tot_t, episode, t, reward,
            reached_target.item(), action[0][0].item(), action[0][1].item(),
            torch.norm(x.view(-1)[0:2]).item(),
            range_angle(
                x.view(-1)[2] -
                torch.atan2(-x.view(-1)[1], -x.view(-1)[0]).view(-1)).item(),
            state[0][0].item(), state[0][1].item(), state[0][2].item(),
            state[0][3].item(), state[0][5].item(), state[0][6].item(),
            state[0][7].item(), state[0][8].item(), state[0][9].item(),
            state[0][10].item(), state[0][11].item(), state[0][12].item(),
            state[0][13].item(), state[0][14].item(), state[0][15].item(),
            state[0][16].item(), state[0][17].item(), state[0][18].item(),
            state[0][19].item(), pro_gains[0].item(), pro_gains[1].item(),
            pro_noise_stds[0].item(), pro_noise_stds[1].item(),
            obs_gains[0].item(), obs_gains[1].item(), obs_noise_stds[0].item(),
            obs_noise_stds[1].item(),
            goal_radius.item(), arg.WORLD_SIZE, DISCOUNT_FACTOR
        ]])

        df1 = pd.DataFrame(data, columns=COLUMNS)