def pack_data(self, dt, buffer, offset=0): """Pack the struct describing the filter into the buffer.""" # Compute the filter coefficients b, a, _ = cont2discrete((self.num, self.den), dt) b = b.flatten() # Strip out the first values # `a` is negated so that it can be used with a multiply-accumulate # instruction on chip. assert b[0] == 0.0 # Oops! ab = np.vstack((-a[1:], b[1:])).T.flatten() # Convert the values to fixpoint and write into a data buffer struct.pack_into("<I", buffer, offset, self.order) buffer[offset + 4:4+self.order*8] = tp.np_to_fix(ab).tostring()
def pack_data(self, dt, buffer, offset=0): """Pack the struct describing the filter into the buffer.""" # Compute the filter coefficients b, a, _ = cont2discrete((self.num, self.den), dt) b = b.flatten() # Strip out the first values # `a` is negated so that it can be used with a multiply-accumulate # instruction on chip. assert b[0] == 0.0 # Oops! ab = np.vstack((-a[1:], b[1:])).T.flatten() # Convert the values to fixpoint and write into a data buffer struct.pack_into("<I{}s".format(self.order * 2 * 4), buffer, offset, self.order, tp.np_to_fix(ab).tostring())
def make_vertices(self, output_signals, machine_timestep, filter_region, filter_routing_region): """Partition the transform matrix into groups of rows and assign each group of rows to a core for computation. If the group needs to be split over multiple chips (i.e., the group is larger than 17 cores) then partition the matrix such that any used chips are used in their entirety. """ if OutputPort.standard not in output_signals: self.cores = list() else: # Get the output transform, keys and slices for this slice of the # filter. transform, keys, output_slices = \ get_transforms_and_keys(output_signals[OutputPort.standard], self.column_slice) size_out = transform.shape[0] # Build as many vertices as required to keep the number of rows # handled by each core below max_rows. n_cores = ( (size_out // self.max_rows) + (1 if size_out % self.max_rows else 0) ) # Build the transform region for these cores transform_region = regions.MatrixRegion( np_to_fix(transform), sliced_dimension=regions.MatrixPartitioning.rows ) # Build all the vertices self.cores = [ FilterCore(self.column_slice, out_slice, transform_region, keys, output_slices, machine_timestep, filter_region, filter_routing_region) for out_slice in divide_slice(slice(0, size_out), n_cores) ] return self.cores
def make_vertices(self, output_signals, machine_timestep, filter_region, filter_routing_region): """Partition the transform matrix into groups of rows and assign each group of rows to a core for computation. If the group needs to be split over multiple chips (i.e., the group is larger than 17 cores) then partition the matrix such that any used chips are used in their entirety. """ if OutputPort.standard not in output_signals: self.cores = list() else: # Get the output transform, keys and slices for this slice of the # filter. transform, keys, output_slices = \ get_transforms_and_keys(output_signals[OutputPort.standard], self.column_slice) size_out = transform.shape[0] # Build as many vertices as required to keep the number of rows # handled by each core below max_rows. n_cores = ((size_out // self.max_rows) + (1 if size_out % self.max_rows else 0)) # Build the transform region for these cores transform_region = regions.MatrixRegion( np_to_fix(transform), sliced_dimension=regions.MatrixPartitioning.rows) # Build all the vertices self.cores = [ FilterCore(self._label, self.column_slice, out_slice, transform_region, keys, output_slices, machine_timestep, filter_region, filter_routing_region) for out_slice in divide_slice(slice(0, size_out), n_cores) ] return self.cores
def test_pack_data(self, num, den, dt, order): # Create the filter lf = LinearFilter(0, False, num, den) # Create a buffer to pack data into data = bytearray((order*2 + 1)*4) # Pack the parameters lf.pack_data(dt, data, 0) # Generate what we expect the data to look like numd, dend, _ = cont2discrete((num, den), dt) numd = numd.flatten() exp = list() for a, b in zip(dend[1:], numd[1:]): exp.append(-a) exp.append(b) expected_data = tp.np_to_fix(np.array(exp)).tostring() # Check that's what we get assert struct.unpack_from("<I", data, 0)[0] == order assert data[4:] == expected_data
def test_pack_data(self, num, den, dt, order): # Create the filter lf = LinearFilter(0, False, num, den) # Create a buffer to pack data into data = bytearray((order * 2 + 1) * 4) # Pack the parameters lf.pack_data(dt, data, 0) # Generate what we expect the data to look like numd, dend, _ = cont2discrete((num, den), dt) numd = numd.flatten() exp = list() for a, b in zip(dend[1:], numd[1:]): exp.append(-a) exp.append(b) expected_data = tp.np_to_fix(np.array(exp)).tostring() # Check that's what we get assert struct.unpack_from("<I", data, 0)[0] == order assert data[4:] == expected_data
def make_vertices(self, model, n_steps): """Construct the data which can be loaded into the memory of a SpiNNaker machine. """ # Build encoders, gain and bias regions params = model.params[self.ensemble] ens_regions = dict() # Convert the encoders combined with the gain to S1615 before creating # the region. encoders_with_gain = params.scaled_encoders ens_regions[EnsembleRegions.encoders] = regions.MatrixRegion( tp.np_to_fix(encoders_with_gain), sliced_dimension=regions.MatrixPartitioning.rows) # Combine the direct input with the bias before converting to S1615 and # creating the region. bias_with_di = params.bias + np.dot(encoders_with_gain, self.direct_input) assert bias_with_di.ndim == 1 ens_regions[EnsembleRegions.bias] = regions.MatrixRegion( tp.np_to_fix(bias_with_di), sliced_dimension=regions.MatrixPartitioning.rows) # Convert the gains to S1615 before creating the region ens_regions[EnsembleRegions.gain] = regions.MatrixRegion( tp.np_to_fix(params.gain), sliced_dimension=regions.MatrixPartitioning.rows) # Extract all the filters from the incoming connections incoming = model.get_signals_to_object(self) (ens_regions[EnsembleRegions.input_filters], ens_regions[EnsembleRegions.input_routing]) = make_filter_regions( incoming[InputPort.standard], model.dt, True, model.keyspaces.filter_routing_tag, width=self.ensemble.size_in ) (ens_regions[EnsembleRegions.inhibition_filters], ens_regions[EnsembleRegions.inhibition_routing]) = \ make_filter_regions( incoming[EnsembleInputPort.global_inhibition], model.dt, True, model.keyspaces.filter_routing_tag, width=1 ) # Extract all the decoders for the outgoing connections and build the # regions for the decoders and the regions for the output keys. outgoing = model.get_signals_from_object(self) if OutputPort.standard in outgoing: decoders, output_keys = \ get_decoders_and_keys(outgoing[OutputPort.standard], True) else: decoders = np.array([]) output_keys = list() size_out = decoders.shape[0] ens_regions[EnsembleRegions.decoders] = regions.MatrixRegion( tp.np_to_fix(decoders / model.dt), sliced_dimension=regions.MatrixPartitioning.rows) ens_regions[EnsembleRegions.keys] = regions.KeyspacesRegion( output_keys, fields=[regions.KeyField({'cluster': 'cluster'})], partitioned_by_atom=True ) # The population length region stores information about groups of # co-operating cores. ens_regions[EnsembleRegions.population_length] = \ regions.ListRegion("I") # The ensemble region contains basic information about the ensemble ens_regions[EnsembleRegions.ensemble] = EnsembleRegion( model.machine_timestep, self.ensemble.size_in) # The neuron region contains information specific to the neuron type ens_regions[EnsembleRegions.neuron] = LIFRegion( model.dt, self.ensemble.neuron_type.tau_rc, self.ensemble.neuron_type.tau_ref ) # Manage profiling n_profiler_samples = 0 self.profiled = getconfig(model.config, self.ensemble, "profile", False) if self.profiled: # Try and get number of samples from config n_profiler_samples = getconfig(model.config, self.ensemble, "profile_num_samples") # If it's not specified, calculate sensible default if n_profiler_samples is None: n_profiler_samples = (len(EnsembleSlice.profiler_tag_names) * n_steps * 2) # Create profiler region ens_regions[EnsembleRegions.profiler] = regions.Profiler( n_profiler_samples) ens_regions[EnsembleRegions.ensemble].n_profiler_samples = \ n_profiler_samples # Manage probes for probe in self.local_probes: if probe.attr in ("output", "spikes"): self.record_spikes = True elif probe.attr == "voltage": self.record_voltages = True else: raise NotImplementedError( "Cannot probe {} on Ensembles".format(probe.attr) ) # Set the flags ens_regions[EnsembleRegions.ensemble].record_spikes = \ self.record_spikes ens_regions[EnsembleRegions.ensemble].record_voltages = \ self.record_voltages # Create the probe recording regions ens_regions[EnsembleRegions.spikes] = regions.SpikeRecordingRegion( n_steps if self.record_spikes else 0) ens_regions[EnsembleRegions.voltages] = regions.VoltageRecordingRegion( n_steps if self.record_voltages else 0) # Create constraints against which to partition, initially assume that # we can devote 16 cores to every problem. sdram_constraint = partition.Constraint(128 * 2**20, 0.9) # 90% of 128MiB dtcm_constraint = partition.Constraint(16 * 64 * 2**10, 0.9) # 90% of 16 cores DTCM # The number of cycles available is 200MHz * the machine timestep; or # 200 * the machine timestep in microseconds. cycles = 200 * model.machine_timestep cpu_constraint = partition.Constraint(cycles * 16, 0.8) # 80% of 16 cores compute # Form the constraints dictionary def _make_constraint(f, size_in, size_out, **kwargs): """Wrap a usage computation method to work with the partitioner.""" def f_(vertex_slice): # Calculate the number of neurons n_neurons = vertex_slice.stop - vertex_slice.start # Call the original method return f(size_in, size_out, n_neurons, **kwargs) return f_ partition_constraints = { sdram_constraint: _make_constraint(_lif_sdram_usage, self.ensemble.size_in, size_out), dtcm_constraint: _make_constraint(_lif_dtcm_usage, self.ensemble.size_in, size_out), cpu_constraint: _make_constraint(_lif_cpu_usage, self.ensemble.size_in, size_out), } # Partition the ensemble to create clusters of co-operating cores self.clusters = list() vertices = list() constraints = list() for sl in partition.partition(slice(0, self.ensemble.n_neurons), partition_constraints): # For each slice we create a cluster of co-operating cores. We # instantiate the cluster and then ask it to produce vertices which # will be added to the netlist. cluster = EnsembleCluster(sl, self.ensemble.size_in, size_out, ens_regions) self.clusters.append(cluster) # Get the vertices for the cluster cluster_vertices = cluster.make_vertices(cycles) vertices.extend(cluster_vertices) # Create a constraint which forces these vertices to be present on # the same chip constraints.append(SameChipConstraint(cluster_vertices)) # Return the vertices and callback methods return netlistspec(vertices, self.load_to_machine, after_simulation_function=self.after_simulation, constraints=constraints)
def make_vertices(self, model, n_steps): """Make vertices for the filter.""" # Get the outgoing transforms and keys sigs = model.get_signals_from_object(self) if OutputPort.standard in sigs: outgoing = sigs[OutputPort.standard] transform, output_keys, sigs_pars_slices = \ get_transforms_and_keys(outgoing) else: transform = np.array([[]]) output_keys = list() sigs_pars_slices = list() size_out = len(output_keys) # Calculate how many cores and chips to use. if self.n_cores_per_chip is None or self.n_chips is None: # The number of cores is largely a function of the input size, we # try to ensure that each core is receiving a max of 32 packets per # timestep. n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0))) # The number of chips is now determined by the size in (columns in # the transform matrix), the size out (rows in the transform # matrix) and the number of cores per chip. n_chips = self.n_chips or 1 n_cores = n_chips * n_cores_per_chip while True: rows_per_core = int( np.ceil(float(size_out) / (n_cores * n_chips))) load_per_core = rows_per_core * self.size_in # The 8,000 limits the number of columns in each row that we # need to process. This is a heuristic. if load_per_core <= 8000 or n_chips > 9: # The load per core is acceptable or we're using way too # many chips break if n_cores < 16: # Increase the number of cores per chip if we can n_cores += 1 else: # Otherwise increase the number of chips n_chips += 1 # Store the result self.n_cores_per_chip = n_cores self.n_chips = n_chips # Slice the input space into the given number of subspaces, this is # repeated on each chip. input_slices = list( divide_slice(slice(0, self.size_in), self.n_cores_per_chip)) # Slice the output space into the given number of subspaces, this is # sliced across all of the chips. output_slices = divide_slice(slice(0, size_out), self.n_cores_per_chip * self.n_chips) # Construct the output keys and transform regions; the output keys and # sliced, and the transform is sliced by rows. self.output_keys_region = regions.KeyspacesRegion( output_keys, fields=[regions.KeyField({'cluster': 'cluster'})], partitioned_by_atom=True) self.transform_region = regions.MatrixRegion( np_to_fix(transform), sliced_dimension=regions.MatrixPartitioning.rows) # Construct the system region self.system_region = SystemRegion(self.size_in, model.machine_timestep) # Get the incoming filters incoming = model.get_signals_to_object(self) self.filters_region, self.routing_region = make_filter_regions( incoming[InputPort.standard], model.dt, True, model.keyspaces.filter_routing_tag, width=self.size_in) # Make the vertices and constraints iter_output_slices = iter(output_slices) cons = list() # List of constraints # For each chip that we'll be using for _ in range(self.n_chips): chip_vertices = list() # Each core is given an input slice and an output slice. The same # set of input slices is used per chip, but we iterate through the # whole list of output slices. for in_slice, out_slice in zip(input_slices, iter_output_slices): # Determine the amount of SDRAM required (the 24 additional # bytes are for the application pointer table). We also # include this cores contribution to a shared SDRAM vector. sdram = (24 + 4 * (in_slice.stop - in_slice.start) + self.system_region.sizeof() + self.filters_region.sizeof_padded() + self.routing_region.sizeof_padded() + self.output_keys_region.sizeof_padded(out_slice) + self.transform_region.sizeof_padded(out_slice)) # Create the vertex and include in the list of vertices v = ParallelFilterSlice(in_slice, out_slice, { Cores: 1, SDRAM: sdram }, sigs_pars_slices) chip_vertices.append(v) self.vertices.append(v) # Create a constraint which will force all of the vertices to exist # of the same chip. cons.append(SameChipConstraint(chip_vertices)) # Return the spec return netlistspec(self.vertices, self.load_to_machine, constraints=cons)
def before_simulation(self, netlist, simulator, n_steps): """Generate the values to output for the next set of simulation steps. """ # Write out the system region to deal with the current run-time self.system_region.n_steps = n_steps # Evaluate the node for this period of time if self.period is not None: max_n = min(n_steps, int(np.ceil(self.period / simulator.dt))) else: max_n = n_steps ts = np.arange(simulator.steps, simulator.steps + max_n) * simulator.dt if callable(self.function): values = np.array([self.function(t) for t in ts]) elif isinstance(self.function, Process): values = self.function.run_steps(max_n, d=self.size_out, dt=simulator.dt) else: values = np.array([self.function for t in ts]) # Ensure that the values can be sliced, regardless of how they were # generated. values = npext.array(values, min_dims=2) # Compute the output for each connection outputs = [] for transmission_params, transform in self.transmission_parameters: output = [] # For each f(t) for the next set of simulations we calculate the # output at the end of the connection. To do this we first apply # the pre-slice, then the function and then the post-slice. for v in values: # Apply the pre-slice v = v[transmission_params.pre_slice] # Apply the function on the connection, if there is one. if transmission_params.function is not None: v = np.asarray(transmission_params.function(v), dtype=float) output.append(np.dot(transform, v.T)) outputs.append(np.array(output).reshape(max_n, -1)) # Combine all of the output values to form a large matrix which we can # dump into memory. output_matrix = np.hstack(outputs) new_output_region = regions.MatrixRegion( np_to_fix(output_matrix), sliced_dimension=regions.MatrixPartitioning.columns ) # Write the simulation values into memory for vertex in self.vertices: self.vertices_region_memory[vertex][self.system_region].seek(0) self.system_region.n_steps = max_n self.system_region.write_subregion_to_file( self.vertices_region_memory[vertex][self.system_region], vertex.slice ) self.vertices_region_memory[vertex][self.output_region].seek(0) new_output_region.write_subregion_to_file( self.vertices_region_memory[vertex][self.output_region], vertex.slice )
def make_vertices(self, model, n_steps): """Construct the data which can be loaded into the memory of a SpiNNaker machine. """ # Build encoders, gain and bias regions params = model.params[self.ensemble] ens_regions = dict() # Convert the encoders combined with the gain to S1615 before creating # the region. encoders_with_gain = params.scaled_encoders ens_regions[EnsembleRegions.encoders] = regions.MatrixRegion( tp.np_to_fix(encoders_with_gain), sliced_dimension=regions.MatrixPartitioning.rows) # Combine the direct input with the bias before converting to S1615 and # creating the region. bias_with_di = params.bias + np.dot(encoders_with_gain, self.direct_input) assert bias_with_di.ndim == 1 ens_regions[EnsembleRegions.bias] = regions.MatrixRegion( tp.np_to_fix(bias_with_di), sliced_dimension=regions.MatrixPartitioning.rows) # Convert the gains to S1615 before creating the region ens_regions[EnsembleRegions.gain] = regions.MatrixRegion( tp.np_to_fix(params.gain), sliced_dimension=regions.MatrixPartitioning.rows) # Extract all the filters from the incoming connections incoming = model.get_signals_to_object(self) (ens_regions[EnsembleRegions.input_filters], ens_regions[EnsembleRegions.input_routing]) = make_filter_regions( incoming[InputPort.standard], model.dt, True, model.keyspaces.filter_routing_tag, width=self.ensemble.size_in) (ens_regions[EnsembleRegions.inhibition_filters], ens_regions[EnsembleRegions.inhibition_routing]) = \ make_filter_regions( incoming[EnsembleInputPort.global_inhibition], model.dt, True, model.keyspaces.filter_routing_tag, width=1 ) # Extract all the decoders for the outgoing connections and build the # regions for the decoders and the regions for the output keys. outgoing = model.get_signals_from_object(self) if OutputPort.standard in outgoing: decoders, output_keys = \ get_decoders_and_keys(outgoing[OutputPort.standard], True) else: decoders = np.array([]) output_keys = list() size_out = decoders.shape[0] ens_regions[EnsembleRegions.decoders] = regions.MatrixRegion( tp.np_to_fix(decoders / model.dt), sliced_dimension=regions.MatrixPartitioning.rows) ens_regions[EnsembleRegions.keys] = regions.KeyspacesRegion( output_keys, fields=[regions.KeyField({'cluster': 'cluster'})], partitioned_by_atom=True) # The population length region stores information about groups of # co-operating cores. ens_regions[EnsembleRegions.population_length] = \ regions.ListRegion("I") # The ensemble region contains basic information about the ensemble ens_regions[EnsembleRegions.ensemble] = EnsembleRegion( model.machine_timestep, self.ensemble.size_in) # The neuron region contains information specific to the neuron type ens_regions[EnsembleRegions.neuron] = LIFRegion( model.dt, self.ensemble.neuron_type.tau_rc, self.ensemble.neuron_type.tau_ref) # Manage profiling n_profiler_samples = 0 self.profiled = getconfig(model.config, self.ensemble, "profile", False) if self.profiled: # Try and get number of samples from config n_profiler_samples = getconfig(model.config, self.ensemble, "profile_num_samples") # If it's not specified, calculate sensible default if n_profiler_samples is None: n_profiler_samples = (len(EnsembleSlice.profiler_tag_names) * n_steps * 2) # Create profiler region ens_regions[EnsembleRegions.profiler] = regions.Profiler( n_profiler_samples) ens_regions[EnsembleRegions.ensemble].n_profiler_samples = \ n_profiler_samples # Manage probes for probe in self.local_probes: if probe.attr in ("output", "spikes"): self.record_spikes = True elif probe.attr == "voltage": self.record_voltages = True else: raise NotImplementedError( "Cannot probe {} on Ensembles".format(probe.attr)) # Set the flags ens_regions[EnsembleRegions.ensemble].record_spikes = \ self.record_spikes ens_regions[EnsembleRegions.ensemble].record_voltages = \ self.record_voltages # Create the probe recording regions ens_regions[EnsembleRegions.spikes] = regions.SpikeRecordingRegion( n_steps if self.record_spikes else 0) ens_regions[EnsembleRegions.voltages] = regions.VoltageRecordingRegion( n_steps if self.record_voltages else 0) # Create constraints against which to partition, initially assume that # we can devote 16 cores to every problem. sdram_constraint = partition.Constraint(128 * 2**20, 0.9) # 90% of 128MiB dtcm_constraint = partition.Constraint(16 * 64 * 2**10, 0.9) # 90% of 16 cores DTCM # The number of cycles available is 200MHz * the machine timestep; or # 200 * the machine timestep in microseconds. cycles = 200 * model.machine_timestep cpu_constraint = partition.Constraint(cycles * 16, 0.8) # 80% of 16 cores compute # Form the constraints dictionary def _make_constraint(f, size_in, size_out, **kwargs): """Wrap a usage computation method to work with the partitioner.""" def f_(vertex_slice): # Calculate the number of neurons n_neurons = vertex_slice.stop - vertex_slice.start # Call the original method return f(size_in, size_out, n_neurons, **kwargs) return f_ partition_constraints = { sdram_constraint: _make_constraint(_lif_sdram_usage, self.ensemble.size_in, size_out), dtcm_constraint: _make_constraint(_lif_dtcm_usage, self.ensemble.size_in, size_out), cpu_constraint: _make_constraint(_lif_cpu_usage, self.ensemble.size_in, size_out), } # Partition the ensemble to create clusters of co-operating cores self.clusters = list() vertices = list() constraints = list() for sl in partition.partition(slice(0, self.ensemble.n_neurons), partition_constraints): # For each slice we create a cluster of co-operating cores. We # instantiate the cluster and then ask it to produce vertices which # will be added to the netlist. cluster = EnsembleCluster(sl, self.ensemble.size_in, size_out, ens_regions) self.clusters.append(cluster) # Get the vertices for the cluster cluster_vertices = cluster.make_vertices(cycles) vertices.extend(cluster_vertices) # Create a constraint which forces these vertices to be present on # the same chip constraints.append(SameChipConstraint(cluster_vertices)) # Return the vertices and callback methods return netlistspec(vertices, self.load_to_machine, after_simulation_function=self.after_simulation, constraints=constraints)
def make_vertices(self, model, n_steps): # TODO remove n_steps """Construct the data which can be loaded into the memory of a SpiNNaker machine. """ # Build encoders, gain and bias regions params = model.params[self.ensemble] # Convert the encoders combined with the gain to S1615 before creating # the region. encoders_with_gain = params.scaled_encoders self.encoders_region = regions.MatrixRegion( tp.np_to_fix(encoders_with_gain), sliced_dimension=regions.MatrixPartitioning.rows ) # Combine the direct input with the bias before converting to S1615 and # creating the region. bias_with_di = params.bias + np.dot(encoders_with_gain, self.direct_input) assert bias_with_di.ndim == 1 self.bias_region = regions.MatrixRegion( tp.np_to_fix(bias_with_di), sliced_dimension=regions.MatrixPartitioning.rows ) # Convert the gains to S1615 before creating the region self.gain_region = regions.MatrixRegion( tp.np_to_fix(params.gain), sliced_dimension=regions.MatrixPartitioning.rows ) # Extract all the filters from the incoming connections incoming = model.get_signals_connections_to_object(self) self.input_filters, self.input_filter_routing = make_filter_regions( incoming[InputPort.standard], model.dt, True, model.keyspaces.filter_routing_tag, width=self.ensemble.size_in ) self.inhib_filters, self.inhib_filter_routing = make_filter_regions( incoming[EnsembleInputPort.global_inhibition], model.dt, True, model.keyspaces.filter_routing_tag, width=1 ) self.mod_filters, self.mod_filter_routing = make_filter_regions( {}, model.dt, True, model.keyspaces.filter_routing_tag ) # Extract all the decoders for the outgoing connections and build the # regions for the decoders and the regions for the output keys. outgoing = model.get_signals_connections_from_object(self) decoders, output_keys = \ get_decoders_and_keys(model, outgoing[OutputPort.standard], True) size_out = decoders.shape[1] # TODO: Include learnt decoders self.pes_region = PESRegion() self.decoders_region = regions.MatrixRegion( tp.np_to_fix(decoders / model.dt), sliced_dimension=regions.MatrixPartitioning.rows ) self.output_keys_region = regions.KeyspacesRegion( output_keys, fields=[regions.KeyField({'cluster': 'cluster'})] ) # Create the recording regions for locally situated probes self.spike_region = None self.probe_spikes = False self.voltage_region = None self.probe_voltages = False for probe in self.local_probes: # For each probe determine which regions and flags should be set if probe.attr in ("output", "spikes"): # If spikes are being probed then ensure that the flag is set # and a region exists. if not self.probe_spikes: self.spike_region = SpikeRegion(n_steps) self.probe_spikes = True elif probe.attr in ("voltage"): # If voltages are being probed then ensure that the flag is set # and a region exists. if not self.probe_voltages: self.voltage_region = VoltageRegion(n_steps) self.probe_voltages = True # If profiling is enabled num_profiler_samples = 0 if getconfig(model.config, self.ensemble, "profile", False): # Try and get number of samples from config num_profiler_samples = getconfig(model.config, self.ensemble, "profile_num_samples") # If it's not specified, calculate sensible default if num_profiler_samples is None: num_profiler_samples =\ len(EnsembleLIF.profiler_tag_names) * n_steps * 2 # Create profiler region self.profiler_region = regions.Profiler(num_profiler_samples) # Create the regions list self.regions = [ SystemRegion(self.ensemble.size_in, size_out, model.machine_timestep, self.ensemble.neuron_type.tau_ref, self.ensemble.neuron_type.tau_rc, model.dt, self.probe_spikes, self.probe_voltages, num_profiler_samples ), self.bias_region, self.encoders_region, self.decoders_region, self.output_keys_region, self.input_filters, self.input_filter_routing, self.inhib_filters, self.inhib_filter_routing, self.gain_region, self.mod_filters, self.mod_filter_routing, self.pes_region, self.profiler_region, self.spike_region, self.voltage_region, ] # Partition the ensemble and get a list of vertices to load to the # machine. We can expect to be DTCM or CPU bound, so the SDRAM bound # can be quite lax to allow for lots of data probing. # TODO: Include other DTCM usage def cpu_usage(sl): """Calculate the CPU usage (in cycles) based on the number of neurons and the size_in and size_out of the ensemble. The equation and coefficients are taken from: "An Efficient SpiNNaker Implementation of the NEF", Mundy, Knight, Stewart and Furber [IJCNN 2015] """ n_neurons = (sl.stop - sl.start) return (245 + 43*self.ensemble.size_in + 100 + 702*size_out + 188 + 69*n_neurons + 13*n_neurons*self.ensemble.size_in) self.vertices = list() sdram_constraint = partition.Constraint(8*2**20) # Max 8MiB dtcm_constraint = partition.Constraint(64*2**10, .75) # 75% of 64KiB cpu_constraint = partition.Constraint(200000, .8) # 80% of 200k cycles constraints = { sdram_constraint: lambda s: regions.utils.sizeof_regions( self.regions, s), # **HACK** don't include last three regions in DTCM estimate # (profiler and spike recording) dtcm_constraint: lambda s: regions.utils.sizeof_regions( self.regions[:-3], s) + 5*(s.stop - s.start), cpu_constraint: cpu_usage, } app_name = ( "ensemble_profiled" if num_profiler_samples > 0 else "ensemble" ) for sl in partition.partition(slice(0, self.ensemble.n_neurons), constraints): resources = { Cores: 1, SDRAM: regions.utils.sizeof_regions(self.regions, sl), } vsl = VertexSlice(sl, get_application(app_name), resources) self.vertices.append(vsl) # Return the vertices and callback methods return netlistspec(self.vertices, self.load_to_machine, after_simulation_function=self.after_simulation)
def make_vertices(self, model, n_steps): """Make vertices for the filter.""" # Get the outgoing transforms and keys sigs = model.get_signals_from_object(self) if OutputPort.standard in sigs: outgoing = sigs[OutputPort.standard] transform, output_keys, sigs_pars_slices = \ get_transforms_and_keys(outgoing) else: transform = np.array([[]]) output_keys = list() sigs_pars_slices = list() size_out = len(output_keys) # Calculate how many cores and chips to use. if self.n_cores_per_chip is None or self.n_chips is None: # The number of cores is largely a function of the input size, we # try to ensure that each core is receiving a max of 32 packets per # timestep. n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0))) # The number of chips is now determined by the size in (columns in # the transform matrix), the size out (rows in the transform # matrix) and the number of cores per chip. n_chips = self.n_chips or 1 n_cores = n_chips * n_cores_per_chip while True: rows_per_core = int(np.ceil(float(size_out) / (n_cores * n_chips))) load_per_core = rows_per_core * self.size_in # The 8,000 limits the number of columns in each row that we # need to process. This is a heuristic. if load_per_core <= 8000 or n_chips > 9: # The load per core is acceptable or we're using way too # many chips break if n_cores < 16: # Increase the number of cores per chip if we can n_cores += 1 else: # Otherwise increase the number of chips n_chips += 1 # Store the result self.n_cores_per_chip = n_cores self.n_chips = n_chips # Slice the input space into the given number of subspaces, this is # repeated on each chip. input_slices = list(divide_slice(slice(0, self.size_in), self.n_cores_per_chip)) # Slice the output space into the given number of subspaces, this is # sliced across all of the chips. output_slices = divide_slice(slice(0, size_out), self.n_cores_per_chip * self.n_chips) # Construct the output keys and transform regions; the output keys and # sliced, and the transform is sliced by rows. self.output_keys_region = regions.KeyspacesRegion( output_keys, fields=[regions.KeyField({'cluster': 'cluster'})], partitioned_by_atom=True ) self.transform_region = regions.MatrixRegion( np_to_fix(transform), sliced_dimension=regions.MatrixPartitioning.rows ) # Construct the system region self.system_region = SystemRegion(self.size_in, model.machine_timestep) # Get the incoming filters incoming = model.get_signals_to_object(self) self.filters_region, self.routing_region = make_filter_regions( incoming[InputPort.standard], model.dt, True, model.keyspaces.filter_routing_tag, width=self.size_in ) # Make the vertices and constraints iter_output_slices = iter(output_slices) cons = list() # List of constraints # For each chip that we'll be using for _ in range(self.n_chips): chip_vertices = list() # Each core is given an input slice and an output slice. The same # set of input slices is used per chip, but we iterate through the # whole list of output slices. for in_slice, out_slice in zip(input_slices, iter_output_slices): # Determine the amount of SDRAM required (the 24 additional # bytes are for the application pointer table). We also # include this cores contribution to a shared SDRAM vector. sdram = (24 + 4*(in_slice.stop - in_slice.start) + self.system_region.sizeof() + self.filters_region.sizeof_padded() + self.routing_region.sizeof_padded() + self.output_keys_region.sizeof_padded(out_slice) + self.transform_region.sizeof_padded(out_slice)) # Create the vertex and include in the list of vertices v = ParallelFilterSlice(in_slice, out_slice, {Cores: 1, SDRAM: sdram}, sigs_pars_slices) chip_vertices.append(v) self.vertices.append(v) # Create a constraint which will force all of the vertices to exist # of the same chip. cons.append(SameChipConstraint(chip_vertices)) # Return the spec return netlistspec(self.vertices, self.load_to_machine, constraints=cons)
def before_simulation(self, netlist, simulator, n_steps): """Generate the values to output for the next set of simulation steps. """ # Write out the system region to deal with the current run-time self.system_region.n_steps = n_steps # Evaluate the node for this period of time if self.period is not None: max_n = min(n_steps, int(np.ceil(self.period / simulator.dt))) else: max_n = n_steps ts = np.arange(simulator.steps, simulator.steps + max_n) * simulator.dt if callable(self.function): values = np.array([self.function(t) for t in ts]) elif isinstance(self.function, Process): values = self.function.run_steps(max_n, d=self.size_out, dt=simulator.dt) else: values = np.array([self.function for t in ts]) # Ensure that the values can be sliced, regardless of how they were # generated. values = npext.array(values, min_dims=2) # Compute the output for each connection outputs = [] for conn, transform in self.conns_transforms: output = [] # For each f(t) for the next set of simulations we calculate the # output at the end of the connection. To do this we first apply # the pre-slice, then the function and then the post-slice. for v in values: # Apply the pre-slice v = v[conn.pre_slice] # Apply the function on the connection, if there is one. if conn.function is not None: v = np.asarray(conn.function(v), dtype=float) output.append(np.dot(transform, v.T)) outputs.append(np.array(output).reshape(max_n, -1)) # Combine all of the output values to form a large matrix which we can # dump into memory. output_matrix = np.hstack(outputs) new_output_region = regions.MatrixRegion( np_to_fix(output_matrix), sliced_dimension=regions.MatrixPartitioning.columns ) # Write the simulation values into memory for vertex in self.vertices: self.vertices_region_memory[vertex][self.system_region].seek(0) self.system_region.n_steps = max_n self.system_region.write_subregion_to_file( self.vertices_region_memory[vertex][self.system_region], vertex.slice ) self.vertices_region_memory[vertex][self.output_region].seek(0) new_output_region.write_subregion_to_file( self.vertices_region_memory[vertex][self.output_region], vertex.slice )