def make_vertices(self, model, n_steps): """Create the vertices to be simulated on the machine.""" # Create the system region self.system_region = SystemRegion(model.machine_timestep, self.period is not None, n_steps) # Get all the outgoing signals to determine how big the size out is and # to build a list of keys. sigs_conns = model.get_signals_from_object(self) if len(sigs_conns) == 0: return netlistspec([]) keys = list() self.transmission_parameters = list() for sig, transmission_params in sigs_conns[OutputPort.standard]: # Add the keys for this connection transform, sig_keys = get_transform_keys(sig, transmission_params) keys.extend(sig_keys) self.transmission_parameters.append((transmission_params, transform)) size_out = len(keys) # Build the keys region self.keys_region = regions.KeyspacesRegion( keys, [regions.KeyField({"cluster": "cluster"})], partitioned_by_atom=True ) # Create the output region self.output_region = regions.MatrixRegion( np.zeros((n_steps, size_out)), sliced_dimension=regions.MatrixPartitioning.columns ) self.regions = [self.system_region, self.keys_region, self.output_region] # Partition by output dimension to create vertices transmit_constraint = partition.Constraint(10) sdram_constraint = partition.Constraint(8*2**20) # Max 8MiB constraints = { transmit_constraint: lambda s: s.stop - s.start, sdram_constraint: ( lambda s: regions.utils.sizeof_regions(self.regions, s)), } for sl in partition.partition(slice(0, size_out), constraints): # Determine the resources resources = { Cores: 1, SDRAM: regions.utils.sizeof_regions(self.regions, sl), } vsl = VertexSlice(sl, self._label, get_application("value_source"), resources) self.vertices.append(vsl) # Return the vertices and callback methods return netlistspec(self.vertices, self.load_to_machine, self.before_simulation)
def make_vertices(self, model, n_steps): """Make vertices for the filter.""" # Get the outgoing transforms and keys sigs = model.get_signals_from_object(self) if OutputPort.standard in sigs: outgoing = sigs[OutputPort.standard] transform, output_keys, sigs_pars_slices = \ get_transforms_and_keys(outgoing) else: transform = np.array([[]]) output_keys = list() sigs_pars_slices = list() size_out = len(output_keys) # Calculate how many cores and chips to use. if self.n_cores_per_chip is None or self.n_chips is None: # The number of cores is largely a function of the input size, we # try to ensure that each core is receiving a max of 32 packets per # timestep. n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0))) # The number of chips is now determined by the size in (columns in # the transform matrix), the size out (rows in the transform # matrix) and the number of cores per chip. n_chips = self.n_chips or 1 n_cores = n_chips * n_cores_per_chip while True: rows_per_core = int( np.ceil(float(size_out) / (n_cores * n_chips))) load_per_core = rows_per_core * self.size_in # The 8,000 limits the number of columns in each row that we # need to process. This is a heuristic. if load_per_core <= 8000 or n_chips > 9: # The load per core is acceptable or we're using way too # many chips break if n_cores < 16: # Increase the number of cores per chip if we can n_cores += 1 else: # Otherwise increase the number of chips n_chips += 1 # Store the result self.n_cores_per_chip = n_cores self.n_chips = n_chips # Slice the input space into the given number of subspaces, this is # repeated on each chip. input_slices = list( divide_slice(slice(0, self.size_in), self.n_cores_per_chip)) # Slice the output space into the given number of subspaces, this is # sliced across all of the chips. output_slices = divide_slice(slice(0, size_out), self.n_cores_per_chip * self.n_chips) # Construct the output keys and transform regions; the output keys and # sliced, and the transform is sliced by rows. self.output_keys_region = regions.KeyspacesRegion( output_keys, fields=[regions.KeyField({'cluster': 'cluster'})], partitioned_by_atom=True) self.transform_region = regions.MatrixRegion( np_to_fix(transform), sliced_dimension=regions.MatrixPartitioning.rows) # Construct the system region self.system_region = SystemRegion(self.size_in, model.machine_timestep) # Get the incoming filters incoming = model.get_signals_to_object(self) self.filters_region, self.routing_region = make_filter_regions( incoming[InputPort.standard], model.dt, True, model.keyspaces.filter_routing_tag, width=self.size_in) # Make the vertices and constraints iter_output_slices = iter(output_slices) cons = list() # List of constraints # For each chip that we'll be using for _ in range(self.n_chips): chip_vertices = list() # Each core is given an input slice and an output slice. The same # set of input slices is used per chip, but we iterate through the # whole list of output slices. for in_slice, out_slice in zip(input_slices, iter_output_slices): # Determine the amount of SDRAM required (the 24 additional # bytes are for the application pointer table). We also # include this cores contribution to a shared SDRAM vector. sdram = (24 + 4 * (in_slice.stop - in_slice.start) + self.system_region.sizeof() + self.filters_region.sizeof_padded() + self.routing_region.sizeof_padded() + self.output_keys_region.sizeof_padded(out_slice) + self.transform_region.sizeof_padded(out_slice)) # Create the vertex and include in the list of vertices v = ParallelFilterSlice(in_slice, out_slice, { Cores: 1, SDRAM: sdram }, sigs_pars_slices) chip_vertices.append(v) self.vertices.append(v) # Create a constraint which will force all of the vertices to exist # of the same chip. cons.append(SameChipConstraint(chip_vertices)) # Return the spec return netlistspec(self.vertices, self.load_to_machine, constraints=cons)
def __init__(self, label, column_slice, output_slice, transform_region, output_keys, output_slices, machine_timestep, filter_region, filter_routing_region): """Allocate a portion of the overall matrix to a single processing core. Parameters ---------- column_slice : :py:class:`slice` Columns of the transform matrix managed by the group of vertices of which we are a member. output_slice : :py:class:`slice` Slice of the rows of the transform matrix that will be applied by this processing core. transform_region : MatrixRegion output_keys : [BitField, ...] Keys transmitted by filter. output_slices : [(TransmissionParameters, set), ...] Pairs of transmission parameters and sets containing the row indices of the transform matrix corresponding to the transmission parameters. """ # Check that the output slice is safe assert (output_slice.start is not None and output_slice.stop is not None and (output_slice.step is None or output_slice.step == 1)) # Store information about the slices of the for which matrix we're # responsible. self.output_slice = output_slice self.column_slice = column_slice # Store which signal parameter slices we contain self.transmission_params = set() out_set = set(range(output_slice.start, output_slice.stop)) for transmission_params, outs in output_slices: # If there is an intersection between the outs and the set of outs # we're responsible for then store transmission parameters. if out_set & outs: self.transmission_params.add(transmission_params) # Construct the regions self.regions = { Regions.system: SystemRegion(column_slice, output_slice, machine_timestep), Regions.transform: transform_region, Regions.keys: regions.KeyspacesRegion( output_keys, fields=[regions.KeyField(dict(cluster="cluster"))], partitioned_by_atom=True), Regions.input_filters: filter_region, Regions.input_routing: filter_routing_region, } # Construct the region arguments w = self.column_slice.stop - self.column_slice.start self.region_arguments = { Regions.transform: Args(vertex_slice=self.output_slice), Regions.keys: Args(vertex_slice=self.output_slice), Regions.system: Args(), # No arguments Regions.input_filters: Args(filter_width=w), # No arguments Regions.input_routing: Args(), # No arguments } # Determine the resource requirements and find the correct application sdram_usage = regions.utils.sizeof_regions_named( self.regions, self.region_arguments) super(FilterCore, self).__init__(label=self._label, application=get_application("filter"), resources={ Cores: 1, SDRAM: sdram_usage })