def pack_data(self, dt, buffer, offset=0):
        """Pack the struct describing the filter into the buffer."""
        # Compute the filter coefficients
        b, a, _ = cont2discrete((self.num, self.den), dt)
        b = b.flatten()

        # Strip out the first values
        # `a` is negated so that it can be used with a multiply-accumulate
        # instruction on chip.
        assert b[0] == 0.0  # Oops!
        ab = np.vstack((-a[1:], b[1:])).T.flatten()

        # Convert the values to fixpoint and write into a data buffer
        struct.pack_into("<I", buffer, offset, self.order)
        buffer[offset + 4:4+self.order*8] = tp.np_to_fix(ab).tostring()
示例#2
0
    def pack_data(self, dt, buffer, offset=0):
        """Pack the struct describing the filter into the buffer."""
        # Compute the filter coefficients
        b, a, _ = cont2discrete((self.num, self.den), dt)
        b = b.flatten()

        # Strip out the first values
        # `a` is negated so that it can be used with a multiply-accumulate
        # instruction on chip.
        assert b[0] == 0.0  # Oops!
        ab = np.vstack((-a[1:], b[1:])).T.flatten()

        # Convert the values to fixpoint and write into a data buffer
        struct.pack_into("<I{}s".format(self.order * 2 * 4), buffer, offset,
                         self.order, tp.np_to_fix(ab).tostring())
示例#3
0
    def make_vertices(self, output_signals, machine_timestep, filter_region,
                      filter_routing_region):
        """Partition the transform matrix into groups of rows and assign each
        group of rows to a core for computation.

        If the group needs to be split over multiple chips (i.e., the group is
        larger than 17 cores) then partition the matrix such that any used
        chips are used in their entirety.
        """
        if OutputPort.standard not in output_signals:
            self.cores = list()
        else:
            # Get the output transform, keys and slices for this slice of the
            # filter.
            transform, keys, output_slices = \
                get_transforms_and_keys(output_signals[OutputPort.standard],
                                        self.column_slice)

            size_out = transform.shape[0]

            # Build as many vertices as required to keep the number of rows
            # handled by each core below max_rows.
            n_cores = (
                (size_out // self.max_rows) +
                (1 if size_out % self.max_rows else 0)
            )

            # Build the transform region for these cores
            transform_region = regions.MatrixRegion(
                np_to_fix(transform),
                sliced_dimension=regions.MatrixPartitioning.rows
            )

            # Build all the vertices
            self.cores = [
                FilterCore(self.column_slice, out_slice,
                           transform_region, keys, output_slices,
                           machine_timestep,
                           filter_region, filter_routing_region) for
                out_slice in divide_slice(slice(0, size_out), n_cores)
            ]

        return self.cores
示例#4
0
    def make_vertices(self, output_signals, machine_timestep, filter_region,
                      filter_routing_region):
        """Partition the transform matrix into groups of rows and assign each
        group of rows to a core for computation.

        If the group needs to be split over multiple chips (i.e., the group is
        larger than 17 cores) then partition the matrix such that any used
        chips are used in their entirety.
        """
        if OutputPort.standard not in output_signals:
            self.cores = list()
        else:
            # Get the output transform, keys and slices for this slice of the
            # filter.
            transform, keys, output_slices = \
                get_transforms_and_keys(output_signals[OutputPort.standard],
                                        self.column_slice)

            size_out = transform.shape[0]

            # Build as many vertices as required to keep the number of rows
            # handled by each core below max_rows.
            n_cores = ((size_out // self.max_rows) +
                       (1 if size_out % self.max_rows else 0))

            # Build the transform region for these cores
            transform_region = regions.MatrixRegion(
                np_to_fix(transform),
                sliced_dimension=regions.MatrixPartitioning.rows)

            # Build all the vertices
            self.cores = [
                FilterCore(self._label, self.column_slice, out_slice,
                           transform_region, keys, output_slices,
                           machine_timestep, filter_region,
                           filter_routing_region)
                for out_slice in divide_slice(slice(0, size_out), n_cores)
            ]

        return self.cores
示例#5
0
    def test_pack_data(self, num, den, dt, order):
        # Create the filter
        lf = LinearFilter(0, False, num, den)

        # Create a buffer to pack data into
        data = bytearray((order*2 + 1)*4)

        # Pack the parameters
        lf.pack_data(dt, data, 0)

        # Generate what we expect the data to look like
        numd, dend, _ = cont2discrete((num, den), dt)
        numd = numd.flatten()
        exp = list()
        for a, b in zip(dend[1:], numd[1:]):
            exp.append(-a)
            exp.append(b)
        expected_data = tp.np_to_fix(np.array(exp)).tostring()

        # Check that's what we get
        assert struct.unpack_from("<I", data, 0)[0] == order
        assert data[4:] == expected_data
示例#6
0
    def test_pack_data(self, num, den, dt, order):
        # Create the filter
        lf = LinearFilter(0, False, num, den)

        # Create a buffer to pack data into
        data = bytearray((order * 2 + 1) * 4)

        # Pack the parameters
        lf.pack_data(dt, data, 0)

        # Generate what we expect the data to look like
        numd, dend, _ = cont2discrete((num, den), dt)
        numd = numd.flatten()
        exp = list()
        for a, b in zip(dend[1:], numd[1:]):
            exp.append(-a)
            exp.append(b)
        expected_data = tp.np_to_fix(np.array(exp)).tostring()

        # Check that's what we get
        assert struct.unpack_from("<I", data, 0)[0] == order
        assert data[4:] == expected_data
示例#7
0
    def make_vertices(self, model, n_steps):
        """Construct the data which can be loaded into the memory of a
        SpiNNaker machine.
        """
        # Build encoders, gain and bias regions
        params = model.params[self.ensemble]
        ens_regions = dict()

        # Convert the encoders combined with the gain to S1615 before creating
        # the region.
        encoders_with_gain = params.scaled_encoders
        ens_regions[EnsembleRegions.encoders] = regions.MatrixRegion(
            tp.np_to_fix(encoders_with_gain),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Combine the direct input with the bias before converting to S1615 and
        # creating the region.
        bias_with_di = params.bias + np.dot(encoders_with_gain,
                                            self.direct_input)
        assert bias_with_di.ndim == 1
        ens_regions[EnsembleRegions.bias] = regions.MatrixRegion(
            tp.np_to_fix(bias_with_di),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Convert the gains to S1615 before creating the region
        ens_regions[EnsembleRegions.gain] = regions.MatrixRegion(
            tp.np_to_fix(params.gain),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Extract all the filters from the incoming connections
        incoming = model.get_signals_to_object(self)

        (ens_regions[EnsembleRegions.input_filters],
         ens_regions[EnsembleRegions.input_routing]) = make_filter_regions(
            incoming[InputPort.standard], model.dt, True,
            model.keyspaces.filter_routing_tag,
            width=self.ensemble.size_in
        )
        (ens_regions[EnsembleRegions.inhibition_filters],
         ens_regions[EnsembleRegions.inhibition_routing]) = \
            make_filter_regions(
                incoming[EnsembleInputPort.global_inhibition], model.dt, True,
                model.keyspaces.filter_routing_tag, width=1
            )

        # Extract all the decoders for the outgoing connections and build the
        # regions for the decoders and the regions for the output keys.
        outgoing = model.get_signals_from_object(self)
        if OutputPort.standard in outgoing:
            decoders, output_keys = \
                get_decoders_and_keys(outgoing[OutputPort.standard], True)
        else:
            decoders = np.array([])
            output_keys = list()
        size_out = decoders.shape[0]

        ens_regions[EnsembleRegions.decoders] = regions.MatrixRegion(
            tp.np_to_fix(decoders / model.dt),
            sliced_dimension=regions.MatrixPartitioning.rows)
        ens_regions[EnsembleRegions.keys] = regions.KeyspacesRegion(
            output_keys,
            fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True
        )

        # The population length region stores information about groups of
        # co-operating cores.
        ens_regions[EnsembleRegions.population_length] = \
            regions.ListRegion("I")

        # The ensemble region contains basic information about the ensemble
        ens_regions[EnsembleRegions.ensemble] = EnsembleRegion(
            model.machine_timestep, self.ensemble.size_in)

        # The neuron region contains information specific to the neuron type
        ens_regions[EnsembleRegions.neuron] = LIFRegion(
            model.dt, self.ensemble.neuron_type.tau_rc,
            self.ensemble.neuron_type.tau_ref
        )

        # Manage profiling
        n_profiler_samples = 0
        self.profiled = getconfig(model.config, self.ensemble, "profile",
                                  False)
        if self.profiled:
            # Try and get number of samples from config
            n_profiler_samples = getconfig(model.config, self.ensemble,
                                           "profile_num_samples")

            # If it's not specified, calculate sensible default
            if n_profiler_samples is None:
                n_profiler_samples = (len(EnsembleSlice.profiler_tag_names) *
                                      n_steps * 2)

        # Create profiler region
        ens_regions[EnsembleRegions.profiler] = regions.Profiler(
            n_profiler_samples)
        ens_regions[EnsembleRegions.ensemble].n_profiler_samples = \
            n_profiler_samples

        # Manage probes
        for probe in self.local_probes:
            if probe.attr in ("output", "spikes"):
                self.record_spikes = True
            elif probe.attr == "voltage":
                self.record_voltages = True
            else:
                raise NotImplementedError(
                    "Cannot probe {} on Ensembles".format(probe.attr)
                )

        # Set the flags
        ens_regions[EnsembleRegions.ensemble].record_spikes = \
            self.record_spikes
        ens_regions[EnsembleRegions.ensemble].record_voltages = \
            self.record_voltages

        # Create the probe recording regions
        ens_regions[EnsembleRegions.spikes] = regions.SpikeRecordingRegion(
            n_steps if self.record_spikes else 0)
        ens_regions[EnsembleRegions.voltages] = regions.VoltageRecordingRegion(
            n_steps if self.record_voltages else 0)

        # Create constraints against which to partition, initially assume that
        # we can devote 16 cores to every problem.
        sdram_constraint = partition.Constraint(128 * 2**20,
                                                0.9)  # 90% of 128MiB
        dtcm_constraint = partition.Constraint(16 * 64 * 2**10,
                                               0.9)  # 90% of 16 cores DTCM

        # The number of cycles available is 200MHz * the machine timestep; or
        # 200 * the machine timestep in microseconds.
        cycles = 200 * model.machine_timestep
        cpu_constraint = partition.Constraint(cycles * 16,
                                              0.8)  # 80% of 16 cores compute

        # Form the constraints dictionary
        def _make_constraint(f, size_in, size_out, **kwargs):
            """Wrap a usage computation method to work with the partitioner."""
            def f_(vertex_slice):
                # Calculate the number of neurons
                n_neurons = vertex_slice.stop - vertex_slice.start

                # Call the original method
                return f(size_in, size_out, n_neurons, **kwargs)
            return f_

        partition_constraints = {
            sdram_constraint: _make_constraint(_lif_sdram_usage,
                                               self.ensemble.size_in,
                                               size_out),
            dtcm_constraint: _make_constraint(_lif_dtcm_usage,
                                              self.ensemble.size_in, size_out),
            cpu_constraint: _make_constraint(_lif_cpu_usage,
                                             self.ensemble.size_in, size_out),
        }

        # Partition the ensemble to create clusters of co-operating cores
        self.clusters = list()
        vertices = list()
        constraints = list()
        for sl in partition.partition(slice(0, self.ensemble.n_neurons),
                                      partition_constraints):
            # For each slice we create a cluster of co-operating cores.  We
            # instantiate the cluster and then ask it to produce vertices which
            # will be added to the netlist.
            cluster = EnsembleCluster(sl, self.ensemble.size_in, size_out,
                                      ens_regions)
            self.clusters.append(cluster)

            # Get the vertices for the cluster
            cluster_vertices = cluster.make_vertices(cycles)
            vertices.extend(cluster_vertices)

            # Create a constraint which forces these vertices to be present on
            # the same chip
            constraints.append(SameChipConstraint(cluster_vertices))

        # Return the vertices and callback methods
        return netlistspec(vertices, self.load_to_machine,
                           after_simulation_function=self.after_simulation,
                           constraints=constraints)
    def make_vertices(self, model, n_steps):
        """Make vertices for the filter."""
        # Get the outgoing transforms and keys
        sigs = model.get_signals_from_object(self)
        if OutputPort.standard in sigs:
            outgoing = sigs[OutputPort.standard]
            transform, output_keys, sigs_pars_slices = \
                get_transforms_and_keys(outgoing)
        else:
            transform = np.array([[]])
            output_keys = list()
            sigs_pars_slices = list()

        size_out = len(output_keys)

        # Calculate how many cores and chips to use.
        if self.n_cores_per_chip is None or self.n_chips is None:
            # The number of cores is largely a function of the input size, we
            # try to ensure that each core is receiving a max of 32 packets per
            # timestep.
            n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0)))

            # The number of chips is now determined by the size in (columns in
            # the transform matrix), the size out (rows in the transform
            # matrix) and the number of cores per chip.
            n_chips = self.n_chips or 1
            n_cores = n_chips * n_cores_per_chip

            while True:
                rows_per_core = int(
                    np.ceil(float(size_out) / (n_cores * n_chips)))
                load_per_core = rows_per_core * self.size_in

                # The 8,000 limits the number of columns in each row that we
                # need to process. This is a heuristic.
                if load_per_core <= 8000 or n_chips > 9:
                    # The load per core is acceptable or we're using way too
                    # many chips
                    break

                if n_cores < 16:
                    # Increase the number of cores per chip if we can
                    n_cores += 1
                else:
                    # Otherwise increase the number of chips
                    n_chips += 1

            # Store the result
            self.n_cores_per_chip = n_cores
            self.n_chips = n_chips

        # Slice the input space into the given number of subspaces, this is
        # repeated on each chip.
        input_slices = list(
            divide_slice(slice(0, self.size_in), self.n_cores_per_chip))

        # Slice the output space into the given number of subspaces, this is
        # sliced across all of the chips.
        output_slices = divide_slice(slice(0, size_out),
                                     self.n_cores_per_chip * self.n_chips)

        # Construct the output keys and transform regions; the output keys and
        # sliced, and the transform is sliced by rows.
        self.output_keys_region = regions.KeyspacesRegion(
            output_keys,
            fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True)
        self.transform_region = regions.MatrixRegion(
            np_to_fix(transform),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Construct the system region
        self.system_region = SystemRegion(self.size_in, model.machine_timestep)

        # Get the incoming filters
        incoming = model.get_signals_to_object(self)
        self.filters_region, self.routing_region = make_filter_regions(
            incoming[InputPort.standard],
            model.dt,
            True,
            model.keyspaces.filter_routing_tag,
            width=self.size_in)

        # Make the vertices and constraints
        iter_output_slices = iter(output_slices)
        cons = list()  # List of constraints

        # For each chip that we'll be using
        for _ in range(self.n_chips):
            chip_vertices = list()

            # Each core is given an input slice and an output slice.  The same
            # set of input slices is used per chip, but we iterate through the
            # whole list of output slices.
            for in_slice, out_slice in zip(input_slices, iter_output_slices):
                # Determine the amount of SDRAM required (the 24 additional
                # bytes are for the application pointer table).  We also
                # include this cores contribution to a shared SDRAM vector.
                sdram = (24 + 4 * (in_slice.stop - in_slice.start) +
                         self.system_region.sizeof() +
                         self.filters_region.sizeof_padded() +
                         self.routing_region.sizeof_padded() +
                         self.output_keys_region.sizeof_padded(out_slice) +
                         self.transform_region.sizeof_padded(out_slice))

                # Create the vertex and include in the list of vertices
                v = ParallelFilterSlice(in_slice, out_slice, {
                    Cores: 1,
                    SDRAM: sdram
                }, sigs_pars_slices)
                chip_vertices.append(v)
                self.vertices.append(v)

            # Create a constraint which will force all of the vertices to exist
            # of the same chip.
            cons.append(SameChipConstraint(chip_vertices))

        # Return the spec
        return netlistspec(self.vertices,
                           self.load_to_machine,
                           constraints=cons)
示例#9
0
    def before_simulation(self, netlist, simulator, n_steps):
        """Generate the values to output for the next set of simulation steps.
        """
        # Write out the system region to deal with the current run-time
        self.system_region.n_steps = n_steps

        # Evaluate the node for this period of time
        if self.period is not None:
            max_n = min(n_steps, int(np.ceil(self.period / simulator.dt)))
        else:
            max_n = n_steps

        ts = np.arange(simulator.steps, simulator.steps + max_n) * simulator.dt
        if callable(self.function):
            values = np.array([self.function(t) for t in ts])
        elif isinstance(self.function, Process):
            values = self.function.run_steps(max_n, d=self.size_out,
                                             dt=simulator.dt)
        else:
            values = np.array([self.function for t in ts])

        # Ensure that the values can be sliced, regardless of how they were
        # generated.
        values = npext.array(values, min_dims=2)

        # Compute the output for each connection
        outputs = []
        for transmission_params, transform in self.transmission_parameters:
            output = []

            # For each f(t) for the next set of simulations we calculate the
            # output at the end of the connection.  To do this we first apply
            # the pre-slice, then the function and then the post-slice.
            for v in values:
                # Apply the pre-slice
                v = v[transmission_params.pre_slice]

                # Apply the function on the connection, if there is one.
                if transmission_params.function is not None:
                    v = np.asarray(transmission_params.function(v),
                                   dtype=float)

                output.append(np.dot(transform, v.T))
            outputs.append(np.array(output).reshape(max_n, -1))

        # Combine all of the output values to form a large matrix which we can
        # dump into memory.
        output_matrix = np.hstack(outputs)

        new_output_region = regions.MatrixRegion(
            np_to_fix(output_matrix),
            sliced_dimension=regions.MatrixPartitioning.columns
        )

        # Write the simulation values into memory
        for vertex in self.vertices:
            self.vertices_region_memory[vertex][self.system_region].seek(0)
            self.system_region.n_steps = max_n
            self.system_region.write_subregion_to_file(
                self.vertices_region_memory[vertex][self.system_region],
                vertex.slice
            )

            self.vertices_region_memory[vertex][self.output_region].seek(0)
            new_output_region.write_subregion_to_file(
                self.vertices_region_memory[vertex][self.output_region],
                vertex.slice
            )
示例#10
0
    def make_vertices(self, model, n_steps):
        """Construct the data which can be loaded into the memory of a
        SpiNNaker machine.
        """
        # Build encoders, gain and bias regions
        params = model.params[self.ensemble]
        ens_regions = dict()

        # Convert the encoders combined with the gain to S1615 before creating
        # the region.
        encoders_with_gain = params.scaled_encoders
        ens_regions[EnsembleRegions.encoders] = regions.MatrixRegion(
            tp.np_to_fix(encoders_with_gain),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Combine the direct input with the bias before converting to S1615 and
        # creating the region.
        bias_with_di = params.bias + np.dot(encoders_with_gain,
                                            self.direct_input)
        assert bias_with_di.ndim == 1
        ens_regions[EnsembleRegions.bias] = regions.MatrixRegion(
            tp.np_to_fix(bias_with_di),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Convert the gains to S1615 before creating the region
        ens_regions[EnsembleRegions.gain] = regions.MatrixRegion(
            tp.np_to_fix(params.gain),
            sliced_dimension=regions.MatrixPartitioning.rows)

        # Extract all the filters from the incoming connections
        incoming = model.get_signals_to_object(self)

        (ens_regions[EnsembleRegions.input_filters],
         ens_regions[EnsembleRegions.input_routing]) = make_filter_regions(
             incoming[InputPort.standard],
             model.dt,
             True,
             model.keyspaces.filter_routing_tag,
             width=self.ensemble.size_in)
        (ens_regions[EnsembleRegions.inhibition_filters],
         ens_regions[EnsembleRegions.inhibition_routing]) = \
            make_filter_regions(
                incoming[EnsembleInputPort.global_inhibition], model.dt, True,
                model.keyspaces.filter_routing_tag, width=1
            )

        # Extract all the decoders for the outgoing connections and build the
        # regions for the decoders and the regions for the output keys.
        outgoing = model.get_signals_from_object(self)
        if OutputPort.standard in outgoing:
            decoders, output_keys = \
                get_decoders_and_keys(outgoing[OutputPort.standard], True)
        else:
            decoders = np.array([])
            output_keys = list()
        size_out = decoders.shape[0]

        ens_regions[EnsembleRegions.decoders] = regions.MatrixRegion(
            tp.np_to_fix(decoders / model.dt),
            sliced_dimension=regions.MatrixPartitioning.rows)
        ens_regions[EnsembleRegions.keys] = regions.KeyspacesRegion(
            output_keys,
            fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True)

        # The population length region stores information about groups of
        # co-operating cores.
        ens_regions[EnsembleRegions.population_length] = \
            regions.ListRegion("I")

        # The ensemble region contains basic information about the ensemble
        ens_regions[EnsembleRegions.ensemble] = EnsembleRegion(
            model.machine_timestep, self.ensemble.size_in)

        # The neuron region contains information specific to the neuron type
        ens_regions[EnsembleRegions.neuron] = LIFRegion(
            model.dt, self.ensemble.neuron_type.tau_rc,
            self.ensemble.neuron_type.tau_ref)

        # Manage profiling
        n_profiler_samples = 0
        self.profiled = getconfig(model.config, self.ensemble, "profile",
                                  False)
        if self.profiled:
            # Try and get number of samples from config
            n_profiler_samples = getconfig(model.config, self.ensemble,
                                           "profile_num_samples")

            # If it's not specified, calculate sensible default
            if n_profiler_samples is None:
                n_profiler_samples = (len(EnsembleSlice.profiler_tag_names) *
                                      n_steps * 2)

        # Create profiler region
        ens_regions[EnsembleRegions.profiler] = regions.Profiler(
            n_profiler_samples)
        ens_regions[EnsembleRegions.ensemble].n_profiler_samples = \
            n_profiler_samples

        # Manage probes
        for probe in self.local_probes:
            if probe.attr in ("output", "spikes"):
                self.record_spikes = True
            elif probe.attr == "voltage":
                self.record_voltages = True
            else:
                raise NotImplementedError(
                    "Cannot probe {} on Ensembles".format(probe.attr))

        # Set the flags
        ens_regions[EnsembleRegions.ensemble].record_spikes = \
            self.record_spikes
        ens_regions[EnsembleRegions.ensemble].record_voltages = \
            self.record_voltages

        # Create the probe recording regions
        ens_regions[EnsembleRegions.spikes] = regions.SpikeRecordingRegion(
            n_steps if self.record_spikes else 0)
        ens_regions[EnsembleRegions.voltages] = regions.VoltageRecordingRegion(
            n_steps if self.record_voltages else 0)

        # Create constraints against which to partition, initially assume that
        # we can devote 16 cores to every problem.
        sdram_constraint = partition.Constraint(128 * 2**20,
                                                0.9)  # 90% of 128MiB
        dtcm_constraint = partition.Constraint(16 * 64 * 2**10,
                                               0.9)  # 90% of 16 cores DTCM

        # The number of cycles available is 200MHz * the machine timestep; or
        # 200 * the machine timestep in microseconds.
        cycles = 200 * model.machine_timestep
        cpu_constraint = partition.Constraint(cycles * 16,
                                              0.8)  # 80% of 16 cores compute

        # Form the constraints dictionary
        def _make_constraint(f, size_in, size_out, **kwargs):
            """Wrap a usage computation method to work with the partitioner."""
            def f_(vertex_slice):
                # Calculate the number of neurons
                n_neurons = vertex_slice.stop - vertex_slice.start

                # Call the original method
                return f(size_in, size_out, n_neurons, **kwargs)

            return f_

        partition_constraints = {
            sdram_constraint:
            _make_constraint(_lif_sdram_usage, self.ensemble.size_in,
                             size_out),
            dtcm_constraint:
            _make_constraint(_lif_dtcm_usage, self.ensemble.size_in, size_out),
            cpu_constraint:
            _make_constraint(_lif_cpu_usage, self.ensemble.size_in, size_out),
        }

        # Partition the ensemble to create clusters of co-operating cores
        self.clusters = list()
        vertices = list()
        constraints = list()
        for sl in partition.partition(slice(0, self.ensemble.n_neurons),
                                      partition_constraints):
            # For each slice we create a cluster of co-operating cores.  We
            # instantiate the cluster and then ask it to produce vertices which
            # will be added to the netlist.
            cluster = EnsembleCluster(sl, self.ensemble.size_in, size_out,
                                      ens_regions)
            self.clusters.append(cluster)

            # Get the vertices for the cluster
            cluster_vertices = cluster.make_vertices(cycles)
            vertices.extend(cluster_vertices)

            # Create a constraint which forces these vertices to be present on
            # the same chip
            constraints.append(SameChipConstraint(cluster_vertices))

        # Return the vertices and callback methods
        return netlistspec(vertices,
                           self.load_to_machine,
                           after_simulation_function=self.after_simulation,
                           constraints=constraints)
示例#11
0
    def make_vertices(self, model, n_steps):  # TODO remove n_steps
        """Construct the data which can be loaded into the memory of a
        SpiNNaker machine.
        """
        # Build encoders, gain and bias regions
        params = model.params[self.ensemble]

        # Convert the encoders combined with the gain to S1615 before creating
        # the region.
        encoders_with_gain = params.scaled_encoders
        self.encoders_region = regions.MatrixRegion(
            tp.np_to_fix(encoders_with_gain),
            sliced_dimension=regions.MatrixPartitioning.rows
        )

        # Combine the direct input with the bias before converting to S1615 and
        # creating the region.
        bias_with_di = params.bias + np.dot(encoders_with_gain,
                                            self.direct_input)
        assert bias_with_di.ndim == 1
        self.bias_region = regions.MatrixRegion(
            tp.np_to_fix(bias_with_di),
            sliced_dimension=regions.MatrixPartitioning.rows
        )

        # Convert the gains to S1615 before creating the region
        self.gain_region = regions.MatrixRegion(
            tp.np_to_fix(params.gain),
            sliced_dimension=regions.MatrixPartitioning.rows
        )

        # Extract all the filters from the incoming connections
        incoming = model.get_signals_connections_to_object(self)

        self.input_filters, self.input_filter_routing = make_filter_regions(
            incoming[InputPort.standard], model.dt, True,
            model.keyspaces.filter_routing_tag, width=self.ensemble.size_in
        )
        self.inhib_filters, self.inhib_filter_routing = make_filter_regions(
            incoming[EnsembleInputPort.global_inhibition], model.dt, True,
            model.keyspaces.filter_routing_tag, width=1
        )
        self.mod_filters, self.mod_filter_routing = make_filter_regions(
            {}, model.dt, True, model.keyspaces.filter_routing_tag
        )

        # Extract all the decoders for the outgoing connections and build the
        # regions for the decoders and the regions for the output keys.
        outgoing = model.get_signals_connections_from_object(self)
        decoders, output_keys = \
            get_decoders_and_keys(model, outgoing[OutputPort.standard], True)
        size_out = decoders.shape[1]

        # TODO: Include learnt decoders
        self.pes_region = PESRegion()

        self.decoders_region = regions.MatrixRegion(
            tp.np_to_fix(decoders / model.dt),
            sliced_dimension=regions.MatrixPartitioning.rows
        )
        self.output_keys_region = regions.KeyspacesRegion(
            output_keys, fields=[regions.KeyField({'cluster': 'cluster'})]
        )

        # Create the recording regions for locally situated probes
        self.spike_region = None
        self.probe_spikes = False
        self.voltage_region = None
        self.probe_voltages = False

        for probe in self.local_probes:
            # For each probe determine which regions and flags should be set
            if probe.attr in ("output", "spikes"):
                # If spikes are being probed then ensure that the flag is set
                # and a region exists.
                if not self.probe_spikes:
                    self.spike_region = SpikeRegion(n_steps)
                    self.probe_spikes = True
            elif probe.attr in ("voltage"):
                # If voltages are being probed then ensure that the flag is set
                # and a region exists.
                if not self.probe_voltages:
                    self.voltage_region = VoltageRegion(n_steps)
                    self.probe_voltages = True

        # If profiling is enabled
        num_profiler_samples = 0
        if getconfig(model.config, self.ensemble, "profile", False):
            # Try and get number of samples from config
            num_profiler_samples = getconfig(model.config, self.ensemble,
                                             "profile_num_samples")

            # If it's not specified, calculate sensible default
            if num_profiler_samples is None:
                num_profiler_samples =\
                    len(EnsembleLIF.profiler_tag_names) * n_steps * 2

        # Create profiler region
        self.profiler_region = regions.Profiler(num_profiler_samples)

        # Create the regions list
        self.regions = [
            SystemRegion(self.ensemble.size_in,
                         size_out,
                         model.machine_timestep,
                         self.ensemble.neuron_type.tau_ref,
                         self.ensemble.neuron_type.tau_rc,
                         model.dt,
                         self.probe_spikes,
                         self.probe_voltages,
                         num_profiler_samples
                         ),
            self.bias_region,
            self.encoders_region,
            self.decoders_region,
            self.output_keys_region,
            self.input_filters,
            self.input_filter_routing,
            self.inhib_filters,
            self.inhib_filter_routing,
            self.gain_region,
            self.mod_filters,
            self.mod_filter_routing,
            self.pes_region,
            self.profiler_region,
            self.spike_region,
            self.voltage_region,
        ]

        # Partition the ensemble and get a list of vertices to load to the
        # machine.  We can expect to be DTCM or CPU bound, so the SDRAM bound
        # can be quite lax to allow for lots of data probing.
        # TODO: Include other DTCM usage
        def cpu_usage(sl):
            """Calculate the CPU usage (in cycles) based on the number of
            neurons and the size_in and size_out of the ensemble.

            The equation and coefficients are taken from: "An Efficient
            SpiNNaker Implementation of the NEF", Mundy, Knight, Stewart and
            Furber [IJCNN 2015]
            """
            n_neurons = (sl.stop - sl.start)
            return (245 + 43*self.ensemble.size_in + 100 + 702*size_out +
                    188 + 69*n_neurons + 13*n_neurons*self.ensemble.size_in)

        self.vertices = list()
        sdram_constraint = partition.Constraint(8*2**20)  # Max 8MiB
        dtcm_constraint = partition.Constraint(64*2**10, .75)  # 75% of 64KiB
        cpu_constraint = partition.Constraint(200000, .8)  # 80% of 200k cycles
        constraints = {
            sdram_constraint: lambda s: regions.utils.sizeof_regions(
                self.regions, s),
            # **HACK** don't include last three regions in DTCM estimate
            # (profiler and spike recording)
            dtcm_constraint: lambda s: regions.utils.sizeof_regions(
                self.regions[:-3], s) + 5*(s.stop - s.start),
            cpu_constraint: cpu_usage,
        }
        app_name = (
            "ensemble_profiled" if num_profiler_samples > 0
            else "ensemble"
        )
        for sl in partition.partition(slice(0, self.ensemble.n_neurons),
                                      constraints):
            resources = {
                Cores: 1,
                SDRAM: regions.utils.sizeof_regions(self.regions, sl),
            }
            vsl = VertexSlice(sl, get_application(app_name), resources)
            self.vertices.append(vsl)

        # Return the vertices and callback methods
        return netlistspec(self.vertices, self.load_to_machine,
                           after_simulation_function=self.after_simulation)
示例#12
0
    def make_vertices(self, model, n_steps):
        """Make vertices for the filter."""
        # Get the outgoing transforms and keys
        sigs = model.get_signals_from_object(self)
        if OutputPort.standard in sigs:
            outgoing = sigs[OutputPort.standard]
            transform, output_keys, sigs_pars_slices = \
                get_transforms_and_keys(outgoing)
        else:
            transform = np.array([[]])
            output_keys = list()
            sigs_pars_slices = list()

        size_out = len(output_keys)

        # Calculate how many cores and chips to use.
        if self.n_cores_per_chip is None or self.n_chips is None:
            # The number of cores is largely a function of the input size, we
            # try to ensure that each core is receiving a max of 32 packets per
            # timestep.
            n_cores_per_chip = int(min(16, np.ceil(self.size_in / 32.0)))

            # The number of chips is now determined by the size in (columns in
            # the transform matrix), the size out (rows in the transform
            # matrix) and the number of cores per chip.
            n_chips = self.n_chips or 1
            n_cores = n_chips * n_cores_per_chip

            while True:
                rows_per_core = int(np.ceil(float(size_out) /
                                            (n_cores * n_chips)))
                load_per_core = rows_per_core * self.size_in

                # The 8,000 limits the number of columns in each row that we
                # need to process. This is a heuristic.
                if load_per_core <= 8000 or n_chips > 9:
                    # The load per core is acceptable or we're using way too
                    # many chips
                    break

                if n_cores < 16:
                    # Increase the number of cores per chip if we can
                    n_cores += 1
                else:
                    # Otherwise increase the number of chips
                    n_chips += 1

            # Store the result
            self.n_cores_per_chip = n_cores
            self.n_chips = n_chips

        # Slice the input space into the given number of subspaces, this is
        # repeated on each chip.
        input_slices = list(divide_slice(slice(0, self.size_in),
                                         self.n_cores_per_chip))

        # Slice the output space into the given number of subspaces, this is
        # sliced across all of the chips.
        output_slices = divide_slice(slice(0, size_out),
                                     self.n_cores_per_chip * self.n_chips)

        # Construct the output keys and transform regions; the output keys and
        # sliced, and the transform is sliced by rows.
        self.output_keys_region = regions.KeyspacesRegion(
            output_keys, fields=[regions.KeyField({'cluster': 'cluster'})],
            partitioned_by_atom=True
        )
        self.transform_region = regions.MatrixRegion(
            np_to_fix(transform),
            sliced_dimension=regions.MatrixPartitioning.rows
        )

        # Construct the system region
        self.system_region = SystemRegion(self.size_in, model.machine_timestep)

        # Get the incoming filters
        incoming = model.get_signals_to_object(self)
        self.filters_region, self.routing_region = make_filter_regions(
            incoming[InputPort.standard], model.dt, True,
            model.keyspaces.filter_routing_tag, width=self.size_in
        )

        # Make the vertices and constraints
        iter_output_slices = iter(output_slices)
        cons = list()  # List of constraints

        # For each chip that we'll be using
        for _ in range(self.n_chips):
            chip_vertices = list()

            # Each core is given an input slice and an output slice.  The same
            # set of input slices is used per chip, but we iterate through the
            # whole list of output slices.
            for in_slice, out_slice in zip(input_slices,
                                           iter_output_slices):
                # Determine the amount of SDRAM required (the 24 additional
                # bytes are for the application pointer table).  We also
                # include this cores contribution to a shared SDRAM vector.
                sdram = (24 + 4*(in_slice.stop - in_slice.start) +
                         self.system_region.sizeof() +
                         self.filters_region.sizeof_padded() +
                         self.routing_region.sizeof_padded() +
                         self.output_keys_region.sizeof_padded(out_slice) +
                         self.transform_region.sizeof_padded(out_slice))

                # Create the vertex and include in the list of vertices
                v = ParallelFilterSlice(in_slice, out_slice,
                                        {Cores: 1, SDRAM: sdram},
                                        sigs_pars_slices)
                chip_vertices.append(v)
                self.vertices.append(v)

            # Create a constraint which will force all of the vertices to exist
            # of the same chip.
            cons.append(SameChipConstraint(chip_vertices))

        # Return the spec
        return netlistspec(self.vertices, self.load_to_machine,
                           constraints=cons)
示例#13
0
    def before_simulation(self, netlist, simulator, n_steps):
        """Generate the values to output for the next set of simulation steps.
        """
        # Write out the system region to deal with the current run-time
        self.system_region.n_steps = n_steps

        # Evaluate the node for this period of time
        if self.period is not None:
            max_n = min(n_steps, int(np.ceil(self.period / simulator.dt)))
        else:
            max_n = n_steps

        ts = np.arange(simulator.steps, simulator.steps + max_n) * simulator.dt
        if callable(self.function):
            values = np.array([self.function(t) for t in ts])
        elif isinstance(self.function, Process):
            values = self.function.run_steps(max_n, d=self.size_out,
                                             dt=simulator.dt)
        else:
            values = np.array([self.function for t in ts])

        # Ensure that the values can be sliced, regardless of how they were
        # generated.
        values = npext.array(values, min_dims=2)

        # Compute the output for each connection
        outputs = []
        for conn, transform in self.conns_transforms:
            output = []

            # For each f(t) for the next set of simulations we calculate the
            # output at the end of the connection.  To do this we first apply
            # the pre-slice, then the function and then the post-slice.
            for v in values:
                # Apply the pre-slice
                v = v[conn.pre_slice]

                # Apply the function on the connection, if there is one.
                if conn.function is not None:
                    v = np.asarray(conn.function(v), dtype=float)

                output.append(np.dot(transform, v.T))
            outputs.append(np.array(output).reshape(max_n, -1))

        # Combine all of the output values to form a large matrix which we can
        # dump into memory.
        output_matrix = np.hstack(outputs)

        new_output_region = regions.MatrixRegion(
            np_to_fix(output_matrix),
            sliced_dimension=regions.MatrixPartitioning.columns
        )

        # Write the simulation values into memory
        for vertex in self.vertices:
            self.vertices_region_memory[vertex][self.system_region].seek(0)
            self.system_region.n_steps = max_n
            self.system_region.write_subregion_to_file(
                self.vertices_region_memory[vertex][self.system_region],
                vertex.slice
            )

            self.vertices_region_memory[vertex][self.output_region].seek(0)
            new_output_region.write_subregion_to_file(
                self.vertices_region_memory[vertex][self.output_region],
                vertex.slice
            )