示例#1
0
def test_wrapper():
    env = gym.make('Pong-v0')
    env = gym_wrappers.RewardCountLimit(env, max_reward_count=5)
    obs = env.reset()
    env.render()
    done = False

    while not done:
        obs, r, done, info = env.step(cma_es.sample())
        env.render()
示例#2
0
def test_atari():
    env = gym.make('PongNoFrameskip-v4')
    env = w.atari_preprocessing.AtariPreprocessing(env.unwrapped,
                                                   terminal_on_life_loss=True)
    obs = env.reset()
    env.render()
    done = False

    while not done:
        obs, r, done, info = env.step(cma_es.sample())
        env.render()
示例#3
0
def test_expectation_my_multivariate_norm():

    xrange = range(1, 50)
    analyticE = [expect_multivariate_norm(N) for N in range(1, 50)]

    E = []
    for N in xrange:
        s, z = cma_es.sample(20, 1.0, torch.zeros(N), torch.eye(N),
                             torch.eye(N))
        E.append(sum([n.norm().item() for n in z.unbind(0)]) / 20)

    plt.plot(xrange, E, label='empirical')
    plt.plot(xrange, analyticE, label='analytic')
    plt.legend(loc='upper  left')
    plt.show()
示例#4
0
def test_pong():
    v = UniImageViewer()
    l = UniImageViewer(title='processed', screen_resolution=(32, 32))
    env = gym.make('Pong-v0')

    s = env.reset()
    done = False

    while not done:
        s, r, done, info = env.step(cma_es.sample())
        v.render(s)
        s = d.pong_color_prepro(s)
        #s = cv2.cvtColor(s, cv2.COLOR_RGB2GRAY)
        #s = s[34:168, :]
        #s = cv2.resize(s, dsize=(32, 32), interpolation=cv2.INTER_AREA)
        l.render(s)
示例#5
0
def test_patch():

    args = config.config(['--config', '../configs/cma_es/exp2/baseline.yaml'])

    torch.manual_seed(0)
    datapack = keypoints.ds.datasets.datasets[args.dataset]
    env = gym.make(datapack.env)
    env = gym_wrappers.RewardCountLimit(env, 5)
    done = False
    env.reset()
    transporter_net = transporter.make(args, map_device='cpu')
    view = main.Keypoints(transporter_net)

    while not done:
        s, r, done, info = env.step(cma_es.sample())
        s = datapack.prepro(s)
        s_t = datapack.transforms(s).unsqueeze(0)
        kp = view(s_t)
        print(kp)
        env.render()
示例#6
0
def test_hyperparams():

    objective_f = akley

    features = 2
    step_size = 1.0
    epochs = 1e3 * features**2

    # selection settings
    samples = 4 + floor(3 * log(features))
    mu = samples / 2
    weights = torch.tensor([log(mu + 0.5)]) - torch.linspace(
        start=1, end=mu, steps=floor(mu)).log()
    weights = weights / weights.sum()
    mu = floor(mu)
    mueff = (weights.sum()**2 / (weights**2).sum()).item()
    '''
    cc = (4 + mueff / N) / (N + 4 + 2 * mueff / N);
    cs = (mueff + 2) / (N + mueff + 5);
    c1 = 2 / ((N + 1.3)ˆ2+mueff);
    cmu = 2 * (mueff - 2 + 1 / mueff) / ((N + 2)ˆ2+2 * mueff / 2);
    damps = 1 + 2 * max(0, sqrt((mueff - 1) / (N + 1)) - 1) + cs;
    '''

    # adaptation settings
    cc = (4 + mueff / features) / (features + 4 + 2 * mueff / features)
    cs = (mueff + 2) / (features + mueff + 5)
    c1 = 2 / ((features + 1.3)**2 + mueff)
    cmu = 2 * (mueff - 2 + 1 / mueff) / ((features + 2)**2 + 2 * mueff / 2)
    damps = 1 + 2 * max(0.0, sqrt((mueff - 1.0) / (features + 1)) - 1) + cs
    chiN = expect_multivariate_norm(features)

    mean = torch.zeros(features)
    b = torch.eye(features)
    d = torch.eye(features)
    c = torch.matmul(b.matmul(d), b.matmul(d).T)

    pc = torch.zeros(features)
    ps = torch.zeros(features)

    print(
        f'mu: {mu}. mueff: {mueff}, cc : {cc}, cs: {cs}, c1: {c1}, cmu: {cmu}, damps: {damps}, chiN:{chiN}'
    )

    plt.title('weights')
    plt.plot(weights)
    print(weights)
    plt.show()
    step_size_l = [step_size]
    correlation_l = [1.0]
    ps_l = [ps[0].item()]
    fitness_l = [0]
    plot_freq = 1

    for counteval in range(1, 10):

        # sample parameters
        s, z = cma_es.sample(samples, step_size, mean, b, d)

        # rank by fitness
        f = objective_f(s[:, 0], s[:, 1])
        g = [{
            'sample': s[i],
            'z': z[i],
            'fitness': f.item()
        } for i, f in enumerate(f)]
        g = sorted(g, key=lambda x: x['fitness'], reverse=True)
        g = g[0:mu]
        fitness_l.append(g[0]['fitness'])
        z = torch.stack([g['z'] for g in g])
        g = torch.stack([g['sample'] for g in g])

        if counteval % plot_freq == 0:
            plot_heatmap('sample ',
                         counteval,
                         mean,
                         b,
                         d,
                         samples=s,
                         g=g,
                         chiN=chiN,
                         step_size=step_size)

        # backup
        mean_prev = mean.clone()
        prev_cov = c.clone()
        g_raw = g.clone()

        mean = (g * weights.unsqueeze(1)).sum(0)
        zmean = (z * weights.unsqueeze(1)).sum(0)

        # step size
        ps = (1 - cs) * ps + sqrt(cs * (2.0 - cs)) * b.matmul(zmean)

        correlation = ps.norm() / chiN
        ps_l.append(ps[0].item())
        correlation_l.append(correlation.item())

        # delay the introduction of the rank 1 update
        denominator = sqrt(1 - (1 - cs)**(2 * counteval / samples))
        threshold = 1.4e2 / features + 1
        hsig = correlation / denominator < threshold
        hsig = 1.0 if hsig else 0.0

        #step_size = step_size * ((cs / damps) * (correlation - 1.0)).exp()
        step_size = step_size * ((cs / damps) * (correlation - 1.0)).exp()

        step_size_l.append(step_size)

        # a mind bending way to write a exponential smoothed moving average
        # zmean does not contain step size or mean, so allows us to add together
        # updates of different step sizes
        pc = (1 - cc) * pc + hsig * sqrt(
            cc * (2.0 - cc) * mueff) * b.matmul(d).matmul(zmean)
        # which we then combine to make a covariance matrix, from 1 (mean) datapoint!
        # this is why it's called "rank 1" update
        pc_cov = pc.unsqueeze(1).matmul(pc.unsqueeze(1).t())
        # mix back in the old covariance if hsig == 0
        pc_cov = pc_cov + (1 - hsig) * cc * (2 - cc) * prev_cov

        # estimate cov for all selected samples (weighted by rank)
        bdz = b.matmul(d).matmul(z.t())
        cmu_cov = torch.matmul(bdz, weights.diag_embed())
        cmu_cov = cmu_cov.matmul(bdz.t())

        c = (1.0 - c1 - cmu) * prev_cov + (c1 * pc_cov) + (cmu * cmu_cov)

        # pull out the eigenthings and do the business
        d, b = torch.symeig(c, eigenvectors=True)
        d = d.sqrt().diag_embed()
        if counteval % plot_freq == 0:
            plot_heatmap('select',
                         counteval,
                         mean,
                         b,
                         d,
                         g=g_raw,
                         chiN=chiN,
                         step_size=step_size)
示例#7
0
def test_rank_mu_and_rank_one_update_with_step_size_control():

    features = 2
    step_size = 1.0
    epochs = 1e3 * features**2

    # selection settings
    samples = 4 + floor(3 * log(features))
    mu = samples / 2
    weights = log(mu + 0.5) + torch.linspace(start=1, end=mu,
                                             steps=floor(mu)).log()
    weights = weights / weights.sum()
    mu = floor(mu)
    mueff = (weights.sum()**2 / (weights**2).sum()).item()
    '''
    cc = (4 + mueff / N) / (N + 4 + 2 * mueff / N);
    cs = (mueff + 2) / (N + mueff + 5);
    c1 = 2 / ((N + 1.3)ˆ2+mueff);
    cmu = 2 * (mueff - 2 + 1 / mueff) / ((N + 2)ˆ2+2 * mueff / 2);
    damps = 1 + 2 * max(0, sqrt((mueff - 1) / (N + 1)) - 1) + cs;
    '''

    # adaptation settings
    #cmu = mueff / features ** 2
    cc = (4 + mueff / features) / (features + 4 + 2 * mueff / features)
    # cs = (mueff + 2) / (features + mueff + 5)
    cs = 0.95
    # c1 = 2 / ((features + 1.3) ** 2 + mueff)
    c1 = 0.3
    # cmu = 2 * (mueff - 2 + 1 / mueff) / ((features + 2)**2 + 2 * mueff / 2)
    cmu = 0.3
    damps = 1 + 2 * max(0.0, sqrt((mueff - 1.0) / (features + 1)) - 1) + cs
    damps = 1.0
    chiN = expect_multivariate_norm(features)

    print(
        f'cc : {cc}, cs: {cs}, c1: {c1}, cmu: {cmu}, damps: {damps}, chiN:{chiN}'
    )

    plt.title('weights')
    plt.plot(weights)
    plt.show()

    mean = torch.zeros(features)
    b = torch.eye(features)
    d = torch.eye(features)
    c = torch.matmul(b.matmul(d), b.matmul(d).T)

    pc = torch.zeros(features)
    ps = torch.zeros(features)

    for counteval in range(8):

        # sample parameters
        s, z = cma_es.sample(samples, step_size, mean, b, d)

        # rank by fitness
        f = spike(s[:, 0], s[:, 1])
        g = [{
            'sample': s[i],
            'z': z[i],
            'fitness': f.item()
        } for i, f in enumerate(f)]
        g = sorted(g, key=lambda x: x['fitness'], reverse=True)
        g = g[0:mu]
        z = torch.stack([g['z'] for g in g])
        g = torch.stack([g['sample'] for g in g])
        plot_heatmap('sample ',
                     counteval,
                     mean,
                     b,
                     d,
                     samples=s,
                     g=g,
                     chiN=chiN)

        # backup
        mean_prev = mean.clone()
        c_prev = c.clone()
        g_raw = g.clone()

        mean = (g * weights.unsqueeze(1)).sum(0)
        zmean = (z * weights.unsqueeze(1)).sum(0)

        # step size
        ps = (1 - cs) * ps + cs * b.matmul(zmean)
        step_size = step_size * ((cs / damps) * (ps.norm() / chiN - 1.0)).exp()

        # a mind bending way to write a exponential smoothed moving average
        # zmean does not contain step size or mean, so allows us to add together
        # updates of different step sizes
        pc = (1 - cc) * pc + cc * b.matmul(d).matmul(zmean)
        # which we then combine to make a covariance matrix, from 1 (mean) datapoint!
        # this is why it's called "rank 1" update
        cov_pc = pc.unsqueeze(1).matmul(pc.unsqueeze(1).t())

        # estimate cov for all selected samples (weighted by rank)
        bdz = b.matmul(d).matmul(z.t())
        cmu_cov = torch.matmul(bdz, weights.diag_embed())
        cmu_cov = cmu_cov.matmul(bdz.t())

        c = (1.0 - c1 - cmu) * c_prev + c1 * cov_pc + cmu * cmu_cov

        # pull out the eigenthings and do the business
        d, b = torch.symeig(c, eigenvectors=True)
        d = d.sqrt().diag_embed()
        plot_heatmap('select',
                     counteval,
                     mean,
                     b,
                     d,
                     g=g_raw,
                     chiN=chiN,
                     step_size=step_size)
        time.sleep(0.5)
示例#8
0
def test_rank_one_update():
    features = 2

    step_size = 1.0
    epochs = 1e3 * features**2

    # selection settings
    samples = 4 + floor(3 * log(features))
    mu = samples / 2
    weights = log(mu + 0.5) + torch.linspace(start=1, end=mu,
                                             steps=floor(mu)).log()
    weights = torch.flip(weights, dims=(0, )) / weights.sum()
    mu = floor(mu)
    mueff = (weights.sum()**2 / (weights**2).sum()).item()
    '''
    cc = (4 + mueff / N) / (N + 4 + 2 * mueff / N);
    cs = (mueff + 2) / (N + mueff + 5);
    c1 = 2 / ((N + 1.3)ˆ2+mueff);
    cmu = 2 * (mueff - 2 + 1 / mueff) / ((N + 2)ˆ2+2 * mueff / 2);
    damps = 1 + 2 * max(0, sqrt((mueff - 1) / (N + 1)) - 1) + cs;
    '''

    # adaptation settings
    #cmu = mueff / features ** 2
    cc = (4 + mueff / features) / (features + 4 + 2 * mueff / features)
    cs = (mueff + 2) / (features + mueff + 5)
    # c1 = 2 / ((features + 1.3) ** 2 + mueff)
    c1 = 0.5
    cmu = 2 * (mueff - 2 + 1 / mueff) / ((features + 2)**2 + 2 * mueff / 2)
    damps = 1 + 2 * max(0.0, sqrt((mueff - 1.0) / (features + 1)) - 1) + cs

    print(f'cc : {cc}, cs: {cs}, c1: {c1}, cmu: {cmu}, damps: {damps}')

    plt.title('weights')
    plt.plot(weights)
    plt.show()

    mean = torch.zeros(features)
    b = torch.eye(features)
    d = torch.eye(features)
    c = torch.matmul(b.matmul(d), b.matmul(d).T)

    pc = torch.zeros(features)

    for counteval in range(8):

        # sample parameters
        s, z = cma_es.sample(samples, step_size, mean, b, d)

        # rank by fitness
        f = spike(s[:, 0], s[:, 1])
        g = [{
            'sample': s[i],
            'z': z[i],
            'fitness': f.item()
        } for i, f in enumerate(f)]
        g = sorted(g, key=lambda x: x['fitness'], reverse=True)
        g = g[0:mu]
        z = torch.stack([g['z'] for g in g])
        g = torch.stack([g['sample'] for g in g])
        plot_heatmap('sample ', counteval, mean, b, d, samples=s, g=g)

        mean_prev = mean.clone()
        c_prev = c.clone()
        g_raw = g.clone()

        mean = (g * weights.unsqueeze(1)).sum(0)
        zmean = (z * weights.unsqueeze(1)).sum(0)

        # a mind bending way to write a exponential smoothed moving average for the variance
        # zmean does not contain step size or mean, so allows us to add together
        # updates of different step sizes
        pc = (1 - cc) * pc + cc * b.matmul(d).matmul(zmean)
        cov_pc = pc.unsqueeze(1).matmul(pc.unsqueeze(1).t())

        # update covariance from smoothed mean in zspace
        c = (1 - c1) * c + c1 * cov_pc

        # estimate weighted covariance in z-space
        # t = b.matmul(d).matmul(z.t())
        # c = torch.matmul(t, weights.diag_embed())
        # c = c.matmul(t.t())
        # c = (1.0 - cmu) * c_prev + cmu * c

        d, b = torch.symeig(c, eigenvectors=True)
        d = d.sqrt().diag_embed()
        plot_heatmap('select', counteval, mean, b, d, g=g_raw)
        time.sleep(0.5)
示例#9
0
def test_rank_mu_update():
    features = 2

    step_size = 1.0
    epochs = 1e3 * features**2

    # selection settings
    samples = 4 + floor(3 * log(features))
    mu = samples / 2
    weights = log(mu + 0.5) + torch.linspace(start=1, end=mu,
                                             steps=floor(mu)).log()
    weights = torch.flip(weights, dims=(0, )) / weights.sum()
    mu = floor(mu)
    mueff = weights.sum()**2 / (weights**2).sum()

    # adaptation settings
    cmu = mueff / features**2
    print(cmu)
    plt.title('weights')
    plt.plot(weights)
    plt.show()

    mean = torch.zeros(features)
    b = torch.eye(features)
    d = torch.eye(features)
    c = torch.matmul(b.matmul(d), b.matmul(d).T)

    for counteval in range(4):

        # sample parameters
        s, z = cma_es.sample(samples, step_size, mean, b, d)

        # rank by fitness
        f = spike(s[:, 0], s[:, 1])
        g = [{
            'sample': s[i],
            'z': z[i],
            'fitness': f.item()
        } for i, f in enumerate(f)]
        g = sorted(g, key=lambda x: x['fitness'], reverse=True)
        g = g[0:mu]
        z = torch.stack([g['z'] for g in g])
        g = torch.stack([g['sample'] for g in g])
        plot_heatmap('sample ', counteval, mean, b, d, samples=s, g=g)

        c_prev = c.clone()
        g_raw = g.clone()

        mean = (g * weights.unsqueeze(1)).sum(0)
        zmean = (z * weights.unsqueeze(1)).sum(0)

        # estimate weighted covariance in z-space
        t = b.matmul(d).matmul(z.t())
        c = torch.matmul(t, weights.diag_embed())
        c = c.matmul(t.t())

        c = (1.0 - cmu) * c_prev + cmu * c
        d, b = torch.symeig(c, eigenvectors=True)
        d = d.sqrt().diag_embed()
        plot_heatmap('select', counteval, mean, b, d, g=g_raw)
        time.sleep(0.5)
示例#10
0
from torchvision.transforms import functional as TVF

if __name__ == '__main__':

    args = config.config()

    with torch.no_grad():
        v = UniImageViewer()

        datapack = ds.datasets[args.dataset]
        transporter_net = transporter.make(args).to(args.device)

        if args.load is not None:
            transporter_net.load(args.load)

        env = gym.make(datapack.env)

        while True:
            s = env.reset()
            done = False

            while not done:
                s, r, done, i = env.step(cma_es.sample())
                s = datapack.prepro(s)
                s_t = datapack.transforms(s).unsqueeze(0).to(args.device)
                heatmap = transporter_net.keypoint(s_t)
                kp = KF.spacial_logsoftmax(heatmap)
                s = TVF.to_tensor(s).unsqueeze(0)
                s = plot_keypoints_on_image(kp[0], s[0])
                v.render(s)
                time.sleep(0.04)
示例#11
0
    'scratch': Pos(atari_width * 2 + 80, 400),
}

xpos, ypos = None, None

imageid = 0

# the main application loop
while not glfw.window_should_close(window):
    glfw.poll_events()

    xpos, ypos = glfw.get_cursor_pos(window)
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)

    # take a step in the environment
    image_data, r, done, info = env.step(cma_es.sample())
    glBindTexture(GL_TEXTURE_2D, texture)
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, image_data.shape[1],
                 image_data.shape[0], 0, GL_RGB, GL_UNSIGNED_BYTE, image_data)
    glTexParameter(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
    glTexParameter(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
    if done:
        env.reset()

    # render reference screen used for sampling
    glViewport(anchor['source'].x, anchor['source'].y, atari_width,
               atari_height)
    projection = pyrr.matrix44.create_orthogonal_projection_matrix(
        0, atari_width, 0, atari_height, -1000, 1000)
    glUniformMatrix4fv(proj_loc, 1, GL_FALSE, projection)
    glUniformMatrix4fv(model_loc, 1, GL_FALSE, atari_screen1_model)