def return_score(network_list, k): def run_pipeline(pipeline, episode_count): for i in range(episode_count): total_reward = 0 pipeline.reset_state_variables() is_done = False while not is_done: result = pipeline.env_step() result = (result[0].cuda(), *result[1:]) pipeline.step(result) reward = result[1] total_reward += reward is_done = result[2] print(f"Episode {i} total reward:{total_reward}") return total_reward score_list = [] for i, network in enumerate(network_list): score_sum = 0 if torch.cuda.is_available(): network = network.to('cuda:0') else: pass environment = GymEnvironment('BreakoutDeterministic-v4') environment.reset() # Build pipeline from specified components. environment_pipeline = EnvironmentPipeline( network, environment, encoding=bernoulli, action_function=select_softmax, output="Output Layer", time=100, history_length=1, delta=1, plot_interval=1, ) environment_pipeline.network.learning = False print("Testing: ") score_sum += run_pipeline(environment_pipeline, episode_count=2) score_list.append(score_sum / 2) torch.cuda.empty_cache() f = open('Score/' + str(k) + '.txt', 'w') f.write(str(score_list)) f.close() return score_list
environment = GymEnvironment( "SpaceInvaders-v0", BernoulliEncoder(time=int(network.dt), dt=network.dt), history_length=2, delta=4, ) environment.reset() # Plotting configuration. plot_config = { "data_step": 1, "data_length": 10, "reward_eps": 1, "reward_window": 10, "volts_type": "line" } # Build pipeline from specified components. pipeline = EnvironmentPipeline( network, environment, time=network.dt, action_function=select_multinomial, output="Z", plot_config=plot_config, render_interval=5, ) # Run environment simulation and network training. pipeline.train()
network.add_layer(middle, name="Hidden Layer") network.add_layer(out, name="Output Layer") network.add_connection(inpt_middle, source="Input Layer", target="Hidden Layer") network.add_connection(middle_out, source="Hidden Layer", target="Output Layer") # Load the Breakout environment. environment = GymEnvironment("BreakoutDeterministic-v4") environment.reset() # Build pipeline from specified components. environment_pipeline = EnvironmentPipeline( network, environment, encoding=bernoulli, action_function=select_softmax, output="Output Layer", time=100, history_length=1, delta=1, plot_interval=1, render_interval=1, ) def run_pipeline(pipeline, episode_count): for i in range(episode_count): total_reward = 0 pipeline.reset_state_variables() is_done = False while not is_done: result = pipeline.env_step() pipeline.step(result)
network.add_monitor(spikes[layer], name="%s_spikes" % layer) if layer in voltages: network.add_monitor(voltages[layer], name="%s_voltages" % layer) # Load the Breakout environment. environment = GymEnvironment("BreakoutDeterministic-v4") environment.reset() pipeline = EnvironmentPipeline( network, environment, encoding=bernoulli, time=1, history=5, delta=10, plot_interval=plot_interval, print_interval=print_interval, render_interval=render_interval, action_function=select_multinomial, output="R", ) total = 0 rewards = [] avg_rewards = [] lengths = [] avg_lengths = [] i = 0 try:
def main(seed=0, n_neurons=100, n_train=60000, n_test=10000, inhib=100, lr=0.01, lr_decay=1, time=350, dt=1, theta_plus=0.05, theta_decay=1e-7, progress_interval=10, update_interval=250, plot=False, train=True, gpu=False): assert n_train % update_interval == 0 and n_test % update_interval == 0, \ 'No. examples must be divisible by update_interval' params = [ seed, n_neurons, n_train, inhib, lr_decay, time, dt, theta_plus, theta_decay, progress_interval, update_interval ] model_name = '_'.join([str(x) for x in params]) np.random.seed(seed) if gpu: torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.manual_seed_all(seed) else: torch.manual_seed(seed) n_examples = n_train if train else n_test n_classes = 10 # Build network. if train: network = Network(dt=dt) input_layer = Input(n=784, traces=True, trace_tc=5e-2) network.add_layer(input_layer, name='X') output_layer = DiehlAndCookNodes( n=n_classes, rest=0, reset=1, thresh=1, decay=1e-2, theta_plus=theta_plus, theta_decay=theta_decay, traces=True, trace_tc=5e-2 ) network.add_layer(output_layer, name='Y') w = torch.rand(784, n_classes) input_connection = Connection( source=input_layer, target=output_layer, w=w, update_rule=MSTDPET, nu=lr, wmin=0, wmax=1, norm=78.4, tc_e_trace=0.1 ) network.add_connection(input_connection, source='X', target='Y') else: network = load(os.path.join(params_path, model_name + '.pt')) network.connections['X', 'Y'].update_rule = NoOp( connection=network.connections['X', 'Y'], nu=network.connections['X', 'Y'].nu ) network.layers['Y'].theta_decay = torch.IntTensor([0]) network.layers['Y'].theta_plus = torch.IntTensor([0]) # Load MNIST data. environment = MNISTEnvironment( dataset=MNIST(root=data_path, download=True), train=train, time=time ) # Create pipeline. pipeline = Pipeline( network=network, environment=environment, encoding=repeat, action_function=select_spiked, output='Y', reward_delay=None ) spikes = {} for layer in set(network.layers): spikes[layer] = Monitor(network.layers[layer], state_vars=('s',), time=time) network.add_monitor(spikes[layer], name='%s_spikes' % layer) if train: network.add_monitor(Monitor( network.connections['X', 'Y'].update_rule, state_vars=('tc_e_trace',), time=time ), 'X_Y_e_trace') # Train the network. if train: print('\nBegin training.\n') else: print('\nBegin test.\n') spike_ims = None spike_axes = None weights_im = None elig_axes = None elig_ims = None start = t() for i in range(n_examples): if i % progress_interval == 0: print(f'Progress: {i} / {n_examples} ({t() - start:.4f} seconds)') start = t() if i > 0 and train: network.connections['X', 'Y'].update_rule.nu[1] *= lr_decay # Run the network on the input. # print("Example",i,"Results:") # for j in range(time): # result = pipeline.env_step() # pipeline.step(result,a_plus=1, a_minus=0) # print(result) for j in range(time): pipeline.train() if not train: _spikes = {layer: spikes[layer].get('s') for layer in spikes} if plot: _spikes = {layer: spikes[layer].get('s') for layer in spikes} w = network.connections['X', 'Y'].w square_weights = get_square_weights(w.view(784, n_classes), 4, 28) spike_ims, spike_axes = plot_spikes(_spikes, ims=spike_ims, axes=spike_axes) weights_im = plot_weights(square_weights, im=weights_im) elig_ims, elig_axes = plot_voltages( {'Y': network.monitors['X_Y_e_trace'].get('e_trace').view(-1, time)[1500:2000]}, plot_type='line', ims=elig_ims, axes=elig_axes ) plt.pause(1e-8) pipeline.reset_state_variables() # Reset state variables. network.connections['X', 'Y'].update_rule.tc_e_trace = torch.zeros(784, n_classes) print(f'Progress: {n_examples} / {n_examples} ({t() - start:.4f} seconds)') if train: network.save(os.path.join(params_path, model_name + '.pt')) print('\nTraining complete.\n') else: print('\nTest complete.\n')
target="Output Layer") network.to(device) # Load the Breakout environment. environment = GymEnvironment("BreakoutDeterministic-v4") environment.reset() # Build pipeline from specified components. pipeline = EnvironmentPipeline( network, environment, encoding=bernoulli, action_function=select_softmax, output="Output Layer", time=100, history_length=1, delta=1, plot_interval=1, render_interval=1, device=device, ) # Run environment simulation for 100 episodes. for i in range(100): total_reward = 0 pipeline.reset_state_variables() is_done = False while not is_done: result = pipeline.env_step() pipeline.step(result)
target="Output Layer") # Load the Breakout environment. environment = GymEnvironment("BreakoutDeterministic-v4") environment.reset() # Build pipeline from specified components. pipeline = EnvironmentPipeline( network, environment, encoding=poisson, encode_factor=50, action_function=select_highest, percent_of_random_action=0.05, random_action_after=5, output="Output Layer", reset_output_spikes=True, time=500, overlay_input=4, history_length=1, plot_interval=plot_interval if plot else None, render_interval=render_interval if render else None, device=device, ) # Run environment simulation for number of episodes. for i in tqdm(range(num_episodes)): total_reward = 0 pipeline.reset_state_variables() is_done = False pipeline.env.step(1) # start with fire the ball
network.add_monitor(spikes[layer], name="%s_spikes" % layer) if layer in voltages: network.add_monitor(voltages[layer], name="%s_voltages" % layer) # Load SpaceInvaders environment. environment = GymEnvironment( "SpaceInvaders-v0", BernoulliEncoder(time=1, dt=network.dt), history_length=2, delta=4, ) environment.reset() # Build pipeline from specified components. pipeline = EnvironmentPipeline( network, environment, action_function=select_multinomial, output="R", plot_interval=plot_interval, print_interval=print_interval, render_interval=render_interval, ) try: pipeline.train() except KeyboardInterrupt: plt.close("all") environment.close()
# Define the environment environment = GymEnvironment('Riverraid-ram-v0') # Define observer agent, acting on first spike observer = RiverraidAgent(environment, dt=1.0, method='first_spike', reward_fn=RiverraidReward) observer.build_network() pipeline = EnvironmentPipeline( network=observer.network, environment=environment, action_function=select_action, encoding=ram_observation_encoder, device=observer.device, output="PM", time=64, num_episodes=5000, ) w1 = pipeline.network.connections[("S2", "PM")].w # plot_weights(w1) print(w1) pipeline.train() print("Training Finished") # w1 = pipeline.network.connections[("S2", "PM")].w # plot_weights(w1) print(w1)