def set_stdp_calib(synram, calib): for row in range(pydls.Synapse_driver.num_drivers): for col in range(pydls.Neuron_index.num_neurons): synapse = synram.get(pydls.Synapse_row(row), pydls.Synapse_column(col)) synapse.config(calib[row, col]) synram.set(pydls.Synapse_row(row), pydls.Synapse_column(col), synapse)
def make_synapse_array(synram): num_rows = pydls.Synapse_driver.num_drivers num_cols = pydls.Neuron_index.num_neurons synapse_array = numpy.zeros((num_rows, num_cols, 2), dtype=numpy.uint8) for row, col in itertools.product(range(num_rows), range(num_cols)): synapse = synram.get(pydls.Synapse_row(row), pydls.Synapse_column(col)) synapse_array[row, col, 0] = synapse.weight() synapse_array[row, col, 1] = synapse.address() return synapse_array
def set_fixed_indegree(synram, weight, degree, address=0): nonzero_synapse = pydls.Synapse() nonzero_synapse.address(address) nonzero_synapse.weight(weight) for col in range(pydls.Neuron_index.num_neurons): perm = numpy.random.permutation(pydls.Neuron_index.num_neurons) perm = perm[:degree] for row in perm: synram.set(pydls.Synapse_row(row), pydls.Synapse_column(col), nonzero_synapse)
def set_syndrv_inhibitory(syndrv, indexes): for index in indexes: syndrv.senx(pydls.Synapse_row(index), False) syndrv.seni(pydls.Synapse_row(index), True)
def set_correlation_switches(synram, config): switch = pydls.Synapse() switch.config(config) for col in range(pydls.Neuron_index.num_neurons): synram.set(pydls.Synapse_row(33), pydls.Synapse_column(col), switch)
def CreateNetwork(self, weightMatrix=[], exitatory=None, inhibitory=None): '''Wire the network''' #Network layout #The address of this spike determines which state neuron was firing #syn address 0 - (nStates - 1) -> addresses for state neurons #syn address nStates - (nstates + nActions) -> addresses for action neurons #This makes up the recurrent connections. The diagonal elements contain the recurrent connections #Synram contains the synapses -> access with row and column #The state neurons are connected on the diagonal elements in the synram for stateNeuronID in range(self.nStates): syn = self.chip.synram.get(dls.Synapse_row(stateNeuronID), dls.Synapse_column(stateNeuronID)) syn.weight(63) # 6bit max weight to ensure spiking syn.address(self.recurrentSpikeAddress) # address self.chip.synram.set(dls.Synapse_row(stateNeuronID), dls.Synapse_column(stateNeuronID), syn) #This makes up the off-diagonal elements for stateNeuronID in range(self.nStates): neuronID = stateNeuronID + 1 if neuronID == self.nStates: neuronID = 0 syn = self.chip.synram.get(dls.Synapse_row(neuronID), dls.Synapse_column(stateNeuronID)) syn.weight(63) # 6bit max weight to ensure spiking syn.address(stateNeuronID) # address self.chip.synram.set(dls.Synapse_row(neuronID), dls.Synapse_column(stateNeuronID), syn) #The action neurons get fully connected to the state neurons for actionNeuron in range(self.nActions): #if actionNeuron != 2: # continue actionNeuronID = self.nStates + actionNeuron for stateNeuronID in range(self.nStates): syn = self.chip.synram.get(dls.Synapse_row(stateNeuronID), dls.Synapse_column(actionNeuronID)) if weightMatrix == []: if exitatory != None: syn.weight( exitatory ) # 6bit #initialise the weights somewhere in the middle else: syn.weight(35) else: print str(stateNeuronID) + ' | ' + str( actionNeuronID) + ':' + str( weightMatrix[stateNeuronID][actionNeuronID]) syn.weight(weightMatrix[stateNeuronID][actionNeuronID]) syn.address(self.recurrentSpikeAddress) # address self.chip.synram.set(dls.Synapse_row(stateNeuronID), dls.Synapse_column(actionNeuronID), syn) self.chip.rate_counter.enable(dls.Neuron_index(stateNeuronID), False) self.chip.rate_counter.enable(dls.Neuron_index(actionNeuronID), True) #Configure the action synapse drivers as inhibitory for rest in range(self.nStates, 32): self.chip.syndrv_config.senx(dls.Synapse_row(rest), False) self.chip.syndrv_config.seni(dls.Synapse_row(rest), True) #Configure inhibitory weights for action neurons #Here they are only inhibitory for the state neurons for actionNeuron in range(self.nActions): actionNeuronID = self.nStates + actionNeuron for neuronID in range(self.nStates): syn = self.chip.synram.get(dls.Synapse_row(actionNeuronID), dls.Synapse_column(neuronID)) syn.weight(63) syn.address(self.recurrentSpikeAddress) # address self.chip.synram.set(dls.Synapse_row(actionNeuronID), dls.Synapse_column(neuronID), syn) for neuronID in range(self.nStates, self.nStates + self.nActions): syn = self.chip.synram.get(dls.Synapse_row(actionNeuronID), dls.Synapse_column(neuronID)) if inhibitory != None or self.inhibitory != None: if self.inhibitory == None: self.inhibitory = inhibitory syn.weight( self.inhibitory ) # 6bit #initialise the weights somewhere in the middle else: syn.weight(63) syn.address(self.recurrentSpikeAddress) # address self.chip.synram.set(dls.Synapse_row(actionNeuronID), dls.Synapse_column(neuronID), syn) #Set the pulse length to 1 self.chip.syndrv_config.pulse_length(1) #For now leave the first neuron empty to not mix up external inputs with state neuron spikes #Enable the output config for the state and action neurons for neuronID in range(self.nStates + self.nActions): neuron = self.chip.neurons.get(dls.Neuron_index(neuronID)) #neuron.enable_out(True) self.chip.neurons.set(dls.Neuron_index(neuronID), neuron) self.chip.rate_counter.clear_on_read(True) '''Think about addressing and timing of 'recurrent' spikes''' self.fpga_conf = dls.Config_reg() self.fpga_conf.spike_router_enable = True self.router = dls.Spike_router_bypass( self.recurrentSpikeCumulationTime, self.recurrentSpikeAddress) pass
def EvaluateNetwork(self, maxIteration, verbose=True): '''Collect the results and evaluate the network''' #Print the spikes spike_train = self.spikes_builder.get_spikes() spike_times = [] spike_address = [] for spike in spike_train: spike_times.append(spike.time) spike_address.append(spike.address) #print 'A ' + str(spike.address) + ' T: ' + str(spike.time) #print str(len(spike_train)) + ' were sent' #print list(set(spike_address)) mailbox_result = self.mailbox_handle.get() status = self.status_handle.get() if status.sleep() != True: print 'PPU did not finish!' if verbose: for spike in spike_train: print 'Adr ' + str(spike.address) + ' Time: ' + str(spike.time) #Print the mailbox content if False: utils.print_mailbox_string(mailbox_result) else: utils.print_mailbox(mailbox_result) #Read the mailbox and collect the results stateOffset = 0x000 actionOffset = 0x800 iterationCounterOffset = 0xffc states = utils.convertByteListToInt8( utils.readRange_mailbox(mailbox_result, stateOffset, stateOffset + maxIteration), False) actions = utils.convertByteListToInt8( utils.readRange_mailbox(mailbox_result, actionOffset, actionOffset + maxIteration), False) iterationCounter = utils.convertByteListToInt( utils.readRange_mailbox(mailbox_result, iterationCounterOffset, iterationCounterOffset + 4), False) if iterationCounter[0] != 2000: print 'Iteration counter was wrong! ' + str(iterationCounter[0]) raise Exception('Iteration counter was wrong!') #Compute the rewards based on the state and action pair rewards = [0] for i in range(1, len(states)): rewards.append(self.R[actions[i - 1]][states[i - 1]][states[i]]) if verbose: print 'Executed iterations: ' + str(iterationCounter[0]) print rewards synram = self.synram_handle.get() policy = [] Q_table = [] for state in range(self.nStates): weights = [ synram.get(dls.Synapse_row(state), dls.Synapse_column(self.nStates + actionNeuron)).weight() for actionNeuron in range(self.nActions) ] Q_table.append(weights) maxQIndex = np.argmax(weights) policy.append(maxQIndex) weights = [] for row in range(32): rowStr = '' weights.append([ synram.get(dls.Synapse_row(row), dls.Synapse_column(col)).weight() for col in range(32) ]) for col in range(32): rowStr += '{:2d}, '.format( synram.get(dls.Synapse_row(row), dls.Synapse_column(col)).weight()) if verbose: print rowStr weights = np.array(weights) if verbose: print policy return spike_times, spike_address, states, actions, rewards, policy, weights, Q_table
def play_bandit_batch(self, bandit_probabilities, n_pulls, n_runs, hyperparameters, learning_rule, connector): n_batch = 1 mailbox = dls.Mailbox() bandit_probabilities_machine = (bandit_probabilities * 2**32).astype( np.int) set_env(mailbox, bandit_probabilities_machine, n_pulls, n_runs, n_batch, learning_rule) n_arms = int(len(bandit_probabilities) / n_runs) action_inhibition = int(hyperparameters['action_inhibition']) stim_inhibition = int(hyperparameters['stim_inhibition']) weights = np.zeros((32, 32), dtype=np.int) addresses = np.zeros((32, 32), dtype=np.int) weights[self.stimulate_row, :] = 10 weights[self.stimulate_row, self.stimulate_row] = 63 addresses[self.stimulate_row, self.stimulate_row] = connector.recurrent_address for i in mapping: # weights[i, :n_arms] = action_inhibition weights[i, :] = action_inhibition weights[ i, self.stimulate_row] = stim_inhibition # same for state neuron weights[i, i] = 0 addresses[i, :] = connector.recurrent_address addresses[self.stimulate_row, i] = connector.recurrent_address addresses[self.stimulate_row, :n_arms] = connector.recurrent_address synapses.setup_synram(weights, addresses, connector.chip) pre_builder = dls.Dls_program_builder() pre_builder.set_time(0) pre_builder.set_chip(connector.chip) pre_builder.wait_for(1000000) pre_builder.halt() #print('-- before tranfer') pre_builder.transfer(connector.connection, 0x0) #print('-- before execute') pre_builder.execute(connector.connection, 0x0) #print('-- before safefetch') # pre_builder.fetch(connector.connection) safe_fetch(pre_builder, connector.connection) #print('-- after safefetch') # self.logger.info('executed pre_builder') # Playback memory program builder = dls.Dls_program_builder() builder.set_synram_config_reg(connector.synram_config_reg) builder.set_mailbox(mailbox) builder.set_ppu_program(learning_rule.dls_program) builder.set_ppu_control_reg(connector.ppu_control_reg_end) builder.set_ppu_control_reg(connector.ppu_control_reg_start) builder.set_time(0) builder.wait_until(self.wait) status_handle = builder.get_ppu_status_reg() builder.set_ppu_control_reg(connector.ppu_control_reg_end) mailbox_handle = builder.get_mailbox() synram_handle = builder.get_synram() builder.halt() # Transfer execute and copy back results #print('-- before transfer') builder.transfer(connector.connection, 0x0) #print('-- before execute') builder.execute(connector.connection, 0x0) #print('-- before safefetch') # builder.fetch(connector.connection) safe_fetch(builder, connector.connection) #print('-- after safefetch') # self.logger.info('program executed') if False: synram = synram_handle.get() weight_matrix = np.zeros((32, 32), dtype=np.int) for row in range(32): for col in range(32): syn = synram.get(dls.Synapse_row(row), dls.Synapse_column(col)) weight_matrix[row, col] = syn.weight() print('{:2d}'.format(syn.weight()), end=' ') print() spike_train = builder.get_spikes() spike_n = np.zeros((len(spike_train), 2), np.int) for i, spike in enumerate(spike_train): spike_n[i, 0] = spike.time spike_n[i, 1] = spike.address # Check status register status_reg_result = status_handle.get() if status_reg_result.sleep() is not True: raise BanditException('PPU did not stop') # results mailbox_result = mailbox_handle.get() a_r = np.zeros((n_runs, n_batch, n_pulls, 2), np.int) sampled_probs = np.zeros((n_runs, n_batch, n_arms)) mailbox_bytes = list(bytes_of_mailbox(mailbox_result)) all_expected_regrets = [] for run_index in range(n_runs): batch_expected_regrets = [] for batch_index in range(n_batch): for i in range(n_pulls): byte = mailbox_bytes[i + n_pulls * (batch_index + run_index * n_batch)] action = byte & 0x3f if action >= n_arms: if self.logger is not None: self.logger.info( 'Wrong Action in Mailbox! Ignoring Current Run...' ) if False: for i, b in enumerate(mailbox_bytes): if i % 16 == 0: print() print('{:02x}'.format(b), end=' ') raise BanditException('Mailbox has wrong values') reward = 1 if (byte & 0x80) != 0 else 0 a_r[run_index, batch_index, i, :] = reward, action for i in range(n_arms): a_ind = np.where(a_r[run_index, batch_index, :, 1] == i)[0] p = np.mean(a_r[run_index, batch_index, a_ind, 0]) sampled_probs[run_index, batch_index, i] = p p_max = np.max( bandit_probabilities[run_index * n_arms:(run_index + 1) * n_arms]) expected_regret = 0 for t_action in a_r[run_index, batch_index, :, 1]: expected_regret += p_max - bandit_probabilities[ run_index * n_arms + t_action] batch_expected_regrets.append(expected_regret) all_expected_regrets.append(batch_expected_regrets) results = dict(a_r=a_r, sampled_probs=sampled_probs, mailbox_bytes=mailbox_bytes, spikes=spike_n) all_expected_regrets = np.array(all_expected_regrets) return np.mean(all_expected_regrets), results