示例#1
0
	def test_gym_environment(self):
		for name in ['AirRaid-v0', 'Amidar-v0', 'Asteroids-v0']:
			env = GymEnvironment(name)
			assert env.name == name
			
			env.reset(); env.step(0); env.close()
			
示例#2
0
total = 0
rewards = []
avg_rewards = []
lengths = []
avg_lengths = []

i, j, k = 0, 0, 0
while i < n:
    if render:
        env.render()

    # Select random action.
    a = np.random.choice(6)

    # Step environment with random action.
    obs, reward, done, info = env.step(a)

    total += reward

    rewards.append(reward)
    if i == 0:
        avg_rewards.append(reward)
    else:
        avg = (avg_rewards[-1] * (i - 1)) / i + reward / i
        avg_rewards.append(avg)

    if i % 100 == 0:
        print(
            "Iteration %d: last reward: %.2f, average reward: %.2f"
            % (i, reward, avg_rewards[-1])
        )
def main(seed=0, time=50, n_episodes=25, percentile=99.9, plot=False):

    np.random.seed(seed)

    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.manual_seed_all(seed)
    else:
        torch.manual_seed(seed)

    epsilon = 0

    print()
    print('Loading the trained ANN...')
    print()

    # Create and train an ANN on the MNIST dataset.
    ANN = Network()
    ANN.load_state_dict(
        torch.load('../../params/converted_dqn_time_difference_grayscale.pt'))

    environment = GymEnvironment('BreakoutDeterministic-v4')

    f = f'{seed}_{n_episodes}_states.pt'
    if os.path.isfile(os.path.join(params_path, f)):
        print('Loading pre-gathered observation data...')

        states = torch.load(os.path.join(params_path, f))
    else:
        print('Gathering observation data...')
        print()

        episode_rewards = np.zeros(n_episodes)
        noop_counter = 0
        total_t = 0
        states = []

        for i in range(n_episodes):
            obs = environment.reset().to(device)
            state = torch.stack([obs] * 4, dim=2)

            for t in itertools.count():
                encoded = torch.tensor([0.25, 0.5, 0.75, 1]) * state
                encoded = torch.sum(encoded, dim=2)

                states.append(encoded)

                q_values = ANN(encoded.view([1, -1]))[0]
                probs, best_action = policy(q_values, epsilon)
                action = np.random.choice(np.arange(len(probs)), p=probs)

                if action == 0:
                    noop_counter += 1
                else:
                    noop_counter = 0

                if noop_counter >= 20:
                    action = np.random.choice([0, 1, 2, 3])
                    noop_counter = 0

                next_obs, reward, done, _ = environment.step(action)
                next_obs = next_obs.to(device)

                next_state = torch.clamp(next_obs - obs, min=0)
                next_state = torch.cat(
                    (state[:, :, 1:],
                     next_state.view(
                         [next_state.shape[0], next_state.shape[1], 1])),
                    dim=2)

                episode_rewards[i] += reward
                total_t += 1

                if done:
                    print(
                        f'Step {t} ({total_t}) @ Episode {i + 1} / {n_episodes}'
                    )
                    print(f'Episode Reward: {episode_rewards[i]}')

                    break

                state = next_state
                obs = next_obs

        states = torch.stack(states).view(-1, 6400)

        torch.save(states, os.path.join(params_path, f))

    print()
    print(f'Collected {states.size(0)} Atari game frames.')
    print()
    print('Converting ANN to SNN...')

    # Do ANN to SNN conversion.
    SNN = ann_to_snn(ANN,
                     input_shape=(6400, ),
                     data=states,
                     percentile=percentile)

    for l in SNN.layers:
        if l != 'Input':
            SNN.add_monitor(Monitor(SNN.layers[l],
                                    state_vars=['s', 'v'],
                                    time=time),
                            name=l)

    spike_ims = None
    spike_axes = None
    inpt_ims = None
    inpt_axes = None

    new_life = True
    total_t = 0
    noop_counter = 0

    print()
    print('Testing SNN on Atari Breakout game...')
    print()

    # Test SNN on Atari Breakout.
    obs = environment.reset().to(device)
    state = torch.stack([obs] * 4, dim=2)
    prev_life = 5
    total_reward = 0

    for t in itertools.count():
        sys.stdout.flush()

        encoded_state = torch.tensor([0.25, 0.5, 0.75, 1]) * state
        encoded_state = torch.sum(encoded_state, dim=2)
        encoded_state = encoded_state.view([1, -1]).repeat(time, 1)

        inpts = {'Input': encoded_state}
        SNN.run(inpts=inpts, time=time)

        spikes = {
            layer: SNN.monitors[layer].get('s')
            for layer in SNN.monitors
        }
        voltages = {
            layer: SNN.monitors[layer].get('v')
            for layer in SNN.monitors
        }
        action = torch.softmax(voltages['3'].sum(1), 0).argmax()

        if action == 0:
            noop_counter += 1
        else:
            noop_counter = 0

        if noop_counter >= 20:
            action = np.random.choice([0, 1, 2, 3])
            noop_counter = 0

        if new_life:
            action = 1

        next_obs, reward, done, info = environment.step(action)
        next_obs = next_obs.to(device)

        if prev_life - info["ale.lives"] != 0:
            new_life = True
        else:
            new_life = False

        prev_life = info["ale.lives"]

        next_state = torch.clamp(next_obs - obs, min=0)
        next_state = torch.cat(
            (state[:, :, 1:],
             next_state.view([next_state.shape[0], next_state.shape[1], 1])),
            dim=2)

        total_reward += reward
        total_t += 1

        SNN.reset_()

        if plot:
            # Get voltage recording.
            inpt = encoded_state.view(time, 6400).sum(0).view(80, 80)
            spike_ims, spike_axes = plot_spikes(
                {layer: spikes[layer]
                 for layer in spikes},
                ims=spike_ims,
                axes=spike_axes)
            inpt_axes, inpt_ims = plot_input(state,
                                             inpt,
                                             ims=inpt_ims,
                                             axes=inpt_axes)
            plt.pause(1e-8)

        if done:
            print(f'Episode Reward: {total_reward}')
            print()

            break

        state = next_state
        obs = next_obs

    model_name = '_'.join(
        [str(x) for x in [seed, time, n_episodes, percentile]])
    columns = ['seed', 'time', 'n_episodes', 'percentile', 'reward']
    data = [[seed, time, n_episodes, percentile, total_reward]]

    path = os.path.join(results_path, 'results.csv')
    if not os.path.isfile(path):
        df = pd.DataFrame(data=data, index=[model_name], columns=columns)
    else:
        df = pd.read_csv(path, index_col=0)

        if model_name not in df.index:
            df = df.append(
                pd.DataFrame(data=data, index=[model_name], columns=columns))
        else:
            df.loc[model_name] = data[0]

    df.to_csv(path, index=True)
示例#4
0
def main(seed=0, n_episodes=25, epsilon=0.05):

    np.random.seed(seed)

    if torch.cuda.is_available():
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.manual_seed_all(seed)
    else:
        torch.manual_seed(seed)

    print()
    print('Loading the trained ANN...')
    print()

    # Create and train an ANN on the MNIST dataset.
    ANN = Network()
    ANN.load_state_dict(
        torch.load(
            os.path.join(ROOT_DIR, 'params', 'converted_dqn_time_difference_grayscale.pt')
        )
    )

    environment = GymEnvironment('BreakoutDeterministic-v4')

    print('Gathering observation data...')
    print()

    episode_rewards = np.zeros(n_episodes)
    noop_counter = 0
    total_t = 0
    states = []
    new_life = True
    prev_life = 5

    for i in range(n_episodes):
        obs = environment.reset().to(device)
        state = torch.stack([obs] * 4, dim=2)

        for t in itertools.count():
            encoded = torch.tensor([0.25, 0.5, 0.75, 1]) * state
            encoded = torch.sum(encoded, dim=2)

            states.append(encoded)

            q_values = ANN(encoded.view([1, -1]))[0]
            probs, best_action = policy(q_values, epsilon)
            action = np.random.choice(np.arange(len(probs)), p=probs)

            if action == 0:
                noop_counter += 1
            else:
                noop_counter = 0

            if noop_counter >= 20:
                action = np.random.choice([0, 1, 2, 3])
                noop_counter = 0

            if new_life:
                action = 1

            next_obs, reward, done, info = environment.step(action)
            next_obs = next_obs.to(device)

            if prev_life - info["ale.lives"] != 0:
                new_life = True
            else:
                new_life = False

            prev_life = info["ale.lives"]

            next_state = torch.clamp(next_obs - obs, min=0)
            next_state = torch.cat(
                (state[:, :, 1:], next_state.view(
                    [next_state.shape[0], next_state.shape[1], 1]
                )), dim=2
            )

            episode_rewards[i] += reward
            total_t += 1

            if done:
                print(f'Step {t} ({total_t}) @ Episode {i + 1} / {n_episodes}')
                print(f'Episode Reward: {episode_rewards[i]}')

                break

            state = next_state
            obs = next_obs

    model_name = '_'.join([str(x) for x in [seed, n_episodes, epsilon]])
    columns = [
        'seed', 'n_episodes', 'epsilon', 'avg. reward', 'std. reward'
    ]
    data = [[
        seed, n_episodes, epsilon, np.mean(episode_rewards), np.std(episode_rewards)
    ]]

    path = os.path.join(results_path, 'results.csv')
    if not os.path.isfile(path):
        df = pd.DataFrame(data=data, index=[model_name], columns=columns)
    else:
        df = pd.read_csv(path, index_col=0)

        if model_name not in df.index:
            df = df.append(pd.DataFrame(data=data, index=[model_name], columns=columns))
        else:
            df.loc[model_name] = data[0]

    df.to_csv(path, index=True)

    torch.save(episode_rewards, os.path.join(results_path, f'{model_name}_episode_rewards.pt'))