Exemplo n.º 1
0
def fitness_car_race(w, render: bool=False, steps=1000): 
    score = 0

    nn.set_vectorized_weights(net, w, outs) 

    n = 2

    for _ in range(n): 
        # env._max_episode_steps = steps
        obs = env.reset() 
        last_obs = np.array(obs) / 255.0

        # net.clear() 

        # fitness 
        s = 0

        while True: 
            close = False

            if render: 
                close = not env.render()
                # print(obs) 

            obs = obs / 255.0 

            # if render: 
            #     plt.cla() 
            #     plt.imshow(obs[::8,::8,1]) 
            #     plt.pause(0.00001) 

            # determine action 
            res = net.predict(np.expand_dims(np.concatenate([
                last_obs[::8,::8,1].flatten(), 
                obs[::8,::8,1].flatten()
            ]), 0))[0]
            res = res * 2 - 1 
            action = res #np.argmax(res) 

            last_obs = obs 
            obs, reward, done, _ = env.step(action)

            s += reward

            if done or close: 
                break

        score += s 

        if render: 
            print(s) 
        
        env.close() 

        if close: 
            break 

    return score / n
Exemplo n.º 2
0
def fitness_xor(w: np.ndarray):
    total, p = 0, 2

    nn.set_vectorized_weights(net, w, outs)
    out = net.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))

    total += np.power(0 - out[0, 0], p)
    total += np.power(1 - out[1, 0], p)
    total += np.power(1 - out[2, 0], p)
    total += np.power(0 - out[3, 0], p)
    return 4 - total
Exemplo n.º 3
0
def fitness_pong(w, render: bool = False, steps=1000):
    score = 0

    nn.set_vectorized_weights(net, w, outs)

    for _ in range(1):
        # env._max_episode_steps = steps
        obs = env.reset()

        # fitness
        s = 0

        while True:
            close = False

            if render:
                close = not env.render()
                # print(obs)

            obs = obs / 256

            # determine action
            res = net.predict(np.expand_dims(obs, 0))[0]
            action = np.argmax(res)

            obs, reward, done, _ = env.step(action)

            s += reward

            if done or close:
                break

        score += s

        if render:
            print(s)

        env.close()

        if close:
            break

    return score
Exemplo n.º 4
0
def fitness_cartpole(w: np.ndarray, render: bool = False, steps=1000):
    score = 0

    nn.set_vectorized_weights(net, w, outs)

    n = 10
    if render:
        n = 1

    for _ in range(n):
        env._max_episode_steps = steps
        obs = env.reset()

        # total reward (fitness score)
        s = 0

        while True:
            close = False

            if render:
                close = not env.render()
                # print(obs)

            # determine action to take
            res = net.predict(np.expand_dims(obs, 0))[0]
            action = np.argmax(res)

            obs, reward, done, _ = env.step(action)

            s += reward

            if done or close:
                break

        score += s

        if render:
            print(s)
            env.close()

    return score / n
Exemplo n.º 5
0
                             1.0,
                             500,
                             10,
                             min_sigma=1e-3,
                             big_sigma=1e1,
                             wait_iter=100)

    try:
        for i in range(1000):
            scores = []
            pop = e.ask()

            # eval population
            for ind in pop:
                scores.append(fitness_xor(ind))

            e.tell(scores)

            max_score = np.max(scores)

            # if max_score >= 3.9:
            if i == 99:
                ind = pop[np.argmax(scores)]
                print(ind)
                nn.set_vectorized_weights(net, ind, outs)
                out = net.predict(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
                print(out)
                break

    finally:
        pass