Exemplo n.º 1
0
def return_score(network_list, k):
    def run_pipeline(pipeline, episode_count):
        for i in range(episode_count):
            total_reward = 0
            pipeline.reset_state_variables()
            is_done = False
            while not is_done:
                result = pipeline.env_step()
                result = (result[0].cuda(), *result[1:])
                pipeline.step(result)
                reward = result[1]
                total_reward += reward

                is_done = result[2]
        print(f"Episode {i} total reward:{total_reward}")
        return total_reward

    score_list = []
    for i, network in enumerate(network_list):
        score_sum = 0
        if torch.cuda.is_available():
            network = network.to('cuda:0')
        else:
            pass
        environment = GymEnvironment('BreakoutDeterministic-v4')
        environment.reset()
        # Build pipeline from specified components.
        environment_pipeline = EnvironmentPipeline(
            network,
            environment,
            encoding=bernoulli,
            action_function=select_softmax,
            output="Output Layer",
            time=100,
            history_length=1,
            delta=1,
            plot_interval=1,
        )
        environment_pipeline.network.learning = False

        print("Testing: ")
        score_sum += run_pipeline(environment_pipeline, episode_count=2)
        score_list.append(score_sum / 2)
        torch.cuda.empty_cache()
    f = open('Score/' + str(k) + '.txt', 'w')
    f.write(str(score_list))
    f.close()

    return score_list
Exemplo n.º 2
0
environment = GymEnvironment(
    "SpaceInvaders-v0",
    BernoulliEncoder(time=int(network.dt), dt=network.dt),
    history_length=2,
    delta=4,
)
environment.reset()

# Plotting configuration.
plot_config = {
    "data_step": 1,
    "data_length": 10,
    "reward_eps": 1,
    "reward_window": 10,
    "volts_type": "line"
}

# Build pipeline from specified components.
pipeline = EnvironmentPipeline(
    network,
    environment,
    time=network.dt,
    action_function=select_multinomial,
    output="Z",
    plot_config=plot_config,
    render_interval=5,
)

# Run environment simulation and network training.
pipeline.train()
Exemplo n.º 3
0
network.add_layer(middle, name="Hidden Layer")
network.add_layer(out, name="Output Layer")
network.add_connection(inpt_middle, source="Input Layer", target="Hidden Layer")
network.add_connection(middle_out, source="Hidden Layer", target="Output Layer")

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

# Build pipeline from specified components.
environment_pipeline = EnvironmentPipeline(
    network,
    environment,
    encoding=bernoulli,
    action_function=select_softmax,
    output="Output Layer",
    time=100,
    history_length=1,
    delta=1,
    plot_interval=1,
    render_interval=1,
)


def run_pipeline(pipeline, episode_count):
    for i in range(episode_count):
        total_reward = 0
        pipeline.reset_state_variables()
        is_done = False
        while not is_done:
            result = pipeline.env_step()
            pipeline.step(result)
    network.add_monitor(spikes[layer], name="%s_spikes" % layer)

    if layer in voltages:
        network.add_monitor(voltages[layer], name="%s_voltages" % layer)

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

pipeline = EnvironmentPipeline(
    network,
    environment,
    encoding=bernoulli,
    time=1,
    history=5,
    delta=10,
    plot_interval=plot_interval,
    print_interval=print_interval,
    render_interval=render_interval,
    action_function=select_multinomial,
    output="R",
)

total = 0
rewards = []
avg_rewards = []
lengths = []
avg_lengths = []

i = 0
try:
def main(seed=0, n_neurons=100, n_train=60000, n_test=10000, inhib=100, lr=0.01, lr_decay=1, time=350, dt=1,
         theta_plus=0.05, theta_decay=1e-7, progress_interval=10, update_interval=250, plot=False,
         train=True, gpu=False):

    assert n_train % update_interval == 0 and n_test % update_interval == 0, \
                            'No. examples must be divisible by update_interval'

    params = [
        seed, n_neurons, n_train, inhib, lr_decay, time, dt,
        theta_plus, theta_decay, progress_interval, update_interval
    ]

    model_name = '_'.join([str(x) for x in params])

    np.random.seed(seed)

    if gpu:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.manual_seed_all(seed)
    else:
        torch.manual_seed(seed)

    n_examples = n_train if train else n_test
    n_classes = 10

    # Build network.
    if train:
        network = Network(dt=dt)

        input_layer = Input(n=784, traces=True, trace_tc=5e-2)
        network.add_layer(input_layer, name='X')

        output_layer = DiehlAndCookNodes(
            n=n_classes, rest=0, reset=1, thresh=1, decay=1e-2,
            theta_plus=theta_plus, theta_decay=theta_decay, traces=True, trace_tc=5e-2
        )
        network.add_layer(output_layer, name='Y')

        w = torch.rand(784, n_classes)
        input_connection = Connection(
            source=input_layer, target=output_layer, w=w,
            update_rule=MSTDPET, nu=lr, wmin=0, wmax=1,
            norm=78.4, tc_e_trace=0.1
        )
        network.add_connection(input_connection, source='X', target='Y')

    else:
        network = load(os.path.join(params_path, model_name + '.pt'))
        network.connections['X', 'Y'].update_rule = NoOp(
            connection=network.connections['X', 'Y'], nu=network.connections['X', 'Y'].nu
        )
        network.layers['Y'].theta_decay = torch.IntTensor([0])
        network.layers['Y'].theta_plus = torch.IntTensor([0])

    # Load MNIST data.
    environment = MNISTEnvironment(
        dataset=MNIST(root=data_path, download=True), train=train, time=time
    )

    # Create pipeline.
    pipeline = Pipeline(
        network=network, environment=environment, encoding=repeat,
        action_function=select_spiked, output='Y', reward_delay=None
    )

    spikes = {}
    for layer in set(network.layers):
        spikes[layer] = Monitor(network.layers[layer], state_vars=('s',), time=time)
        network.add_monitor(spikes[layer], name='%s_spikes' % layer)

    if train:
        network.add_monitor(Monitor(
                network.connections['X', 'Y'].update_rule, state_vars=('tc_e_trace',), time=time
            ), 'X_Y_e_trace')

    # Train the network.
    if train:
        print('\nBegin training.\n')
    else:
        print('\nBegin test.\n')

    spike_ims = None
    spike_axes = None
    weights_im = None
    elig_axes = None
    elig_ims = None

    start = t()
    for i in range(n_examples):
        if i % progress_interval == 0:
            print(f'Progress: {i} / {n_examples} ({t() - start:.4f} seconds)')
            start = t()

            if i > 0 and train:
                network.connections['X', 'Y'].update_rule.nu[1] *= lr_decay

        # Run the network on the input.
        # print("Example",i,"Results:")
        # for j in range(time):
        #     result = pipeline.env_step()
        #     pipeline.step(result,a_plus=1, a_minus=0)
        # print(result)
        for j in range(time):
            pipeline.train()

        if not train:
            _spikes = {layer: spikes[layer].get('s') for layer in spikes}

        if plot:
            _spikes = {layer: spikes[layer].get('s') for layer in spikes}
            w = network.connections['X', 'Y'].w
            square_weights = get_square_weights(w.view(784, n_classes), 4, 28)

            spike_ims, spike_axes = plot_spikes(_spikes, ims=spike_ims, axes=spike_axes)
            weights_im = plot_weights(square_weights, im=weights_im)
            elig_ims, elig_axes = plot_voltages(
                {'Y': network.monitors['X_Y_e_trace'].get('e_trace').view(-1, time)[1500:2000]},
                plot_type='line', ims=elig_ims, axes=elig_axes
            )

            plt.pause(1e-8)

        pipeline.reset_state_variables()  # Reset state variables.
        network.connections['X', 'Y'].update_rule.tc_e_trace = torch.zeros(784, n_classes)

    print(f'Progress: {n_examples} / {n_examples} ({t() - start:.4f} seconds)')

    if train:
        network.save(os.path.join(params_path, model_name + '.pt'))
        print('\nTraining complete.\n')
    else:
        print('\nTest complete.\n')
Exemplo n.º 6
0
                       target="Output Layer")

network.to(device)

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

# Build pipeline from specified components.
pipeline = EnvironmentPipeline(
    network,
    environment,
    encoding=bernoulli,
    action_function=select_softmax,
    output="Output Layer",
    time=100,
    history_length=1,
    delta=1,
    plot_interval=1,
    render_interval=1,
    device=device,
)

# Run environment simulation for 100 episodes.
for i in range(100):
    total_reward = 0
    pipeline.reset_state_variables()
    is_done = False
    while not is_done:
        result = pipeline.env_step()
        pipeline.step(result)
                       target="Output Layer")

# Load the Breakout environment.
environment = GymEnvironment("BreakoutDeterministic-v4")
environment.reset()

# Build pipeline from specified components.
pipeline = EnvironmentPipeline(
    network,
    environment,
    encoding=poisson,
    encode_factor=50,
    action_function=select_highest,
    percent_of_random_action=0.05,
    random_action_after=5,
    output="Output Layer",
    reset_output_spikes=True,
    time=500,
    overlay_input=4,
    history_length=1,
    plot_interval=plot_interval if plot else None,
    render_interval=render_interval if render else None,
    device=device,
)

# Run environment simulation for number of episodes.
for i in tqdm(range(num_episodes)):
    total_reward = 0
    pipeline.reset_state_variables()
    is_done = False
    pipeline.env.step(1)  # start with fire the ball
Exemplo n.º 8
0
    network.add_monitor(spikes[layer], name="%s_spikes" % layer)

    if layer in voltages:
        network.add_monitor(voltages[layer], name="%s_voltages" % layer)

# Load SpaceInvaders environment.
environment = GymEnvironment(
    "SpaceInvaders-v0",
    BernoulliEncoder(time=1, dt=network.dt),
    history_length=2,
    delta=4,
)
environment.reset()

# Build pipeline from specified components.
pipeline = EnvironmentPipeline(
    network,
    environment,
    action_function=select_multinomial,
    output="R",
    plot_interval=plot_interval,
    print_interval=print_interval,
    render_interval=render_interval,
)

try:
    pipeline.train()
except KeyboardInterrupt:
    plt.close("all")
    environment.close()
Exemplo n.º 9
0
# Define the environment
environment = GymEnvironment('Riverraid-ram-v0')

# Define observer agent, acting on first spike
observer = RiverraidAgent(environment,
                          dt=1.0,
                          method='first_spike',
                          reward_fn=RiverraidReward)
observer.build_network()

pipeline = EnvironmentPipeline(
    network=observer.network,
    environment=environment,
    action_function=select_action,
    encoding=ram_observation_encoder,
    device=observer.device,
    output="PM",
    time=64,
    num_episodes=5000,
)

w1 = pipeline.network.connections[("S2", "PM")].w
# plot_weights(w1)
print(w1)

pipeline.train()
print("Training Finished")
#
w1 = pipeline.network.connections[("S2", "PM")].w
# plot_weights(w1)
print(w1)